From c09848c11d0455933c2f00fda4109efd8130eb66 Mon Sep 17 00:00:00 2001 From: Sandro Wenzel Date: Wed, 22 Jan 2025 11:00:11 +0100 Subject: [PATCH 0001/1914] CPV,PHS: code reduction and fix in digitization --- .../DigitizerWorkflow/src/CPVDigitizerSpec.cxx | 17 ++--------------- Steer/DigitizerWorkflow/src/CPVDigitizerSpec.h | 8 -------- .../src/PHOSDigitizerSpec.cxx | 18 +++--------------- .../DigitizerWorkflow/src/PHOSDigitizerSpec.h | 8 -------- 4 files changed, 5 insertions(+), 46 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.cxx index 91b39a8f5031e..a950cda6019ec 100644 --- a/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.cxx +++ b/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.cxx @@ -55,20 +55,6 @@ void DigitizerSpec::initDigitizerTask(framework::InitContext& ic) mDeadTime = o2::cpv::CPVSimParams::Instance().mDeadTime; // PHOS dead time (should include readout => mReadoutTime< mDeadTime) } } -// helper function which will be offered as a service -void DigitizerSpec::retrieveHits(const char* brname, - int sourceID, - int entryID) -{ - auto br = mSimChains[sourceID]->GetBranch(brname); - if (!br) { - LOG(error) << "No branch found"; - return; - } - mHits->clear(); - br->SetAddress(&mHits); - br->GetEntry(entryID); -} void DigitizerSpec::updateTimeDependentParams(framework::ProcessingContext& ctx) { @@ -165,7 +151,8 @@ void DigitizerSpec::run(framework::ProcessingContext& pc) // get the hits for this event and this source int source = part->sourceID; int entry = part->entryID; - retrieveHits("CPVHit", source, entry); + mHits->clear(); + context->retrieveHits(mSimChains, "CPVHit", source, entry, mHits); part++; if (part == eventParts[collID].end() && isLastStream) { // last stream, copy digits directly to output vector mDigitizer.processHits(mHits, mDigitsFinal, mDigitsOut, mLabels, collID, source, dt); diff --git a/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.h b/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.h index 8d75def27ade4..151087a05138b 100644 --- a/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.h +++ b/Steer/DigitizerWorkflow/src/CPVDigitizerSpec.h @@ -54,14 +54,6 @@ class DigitizerSpec final : public o2::base::BaseDPLDigitizer void run(framework::ProcessingContext& ctx); private: - /// \brief helper function which will be offered as a service - /// \param brname Name of the hit branch - /// \param sourceID ID of the source - /// \param entryID ID of the entry in the source - void retrieveHits(const char* brname, - int sourceID, - int entryID); - void updateTimeDependentParams(framework::ProcessingContext& ctx); float mReadoutTime = 0.; ///< PHOS readout time diff --git a/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.cxx index da5e299b9eac2..943975c7a3256 100644 --- a/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.cxx +++ b/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.cxx @@ -60,20 +60,6 @@ void DigitizerSpec::initDigitizerTask(framework::InitContext& ic) } mHits = new std::vector(); } -// helper function which will be offered as a service -void DigitizerSpec::retrieveHits(const char* brname, - int sourceID, - int entryID) -{ - auto br = mSimChains[sourceID]->GetBranch(brname); - if (!br) { - LOG(error) << "No branch found"; - return; - } - mHits->clear(); - br->SetAddress(&mHits); - br->GetEntry(entryID); -} void DigitizerSpec::run(framework::ProcessingContext& pc) { @@ -156,7 +142,9 @@ void DigitizerSpec::run(framework::ProcessingContext& pc) // get the hits for this event and this source int source = part->sourceID; int entry = part->entryID; - retrieveHits("PHSHit", source, entry); + mHits->clear(); + context->retrieveHits(mSimChains, "PHSHit", source, entry, mHits); + part++; if (part == eventParts[collID].end() && isLastStream) { // last stream, copy digits directly to output vector mDigitizer.processHits(mHits, mDigitsFinal, mDigitsOut, mLabels, entry, source, dt); diff --git a/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.h b/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.h index 03360730a4c19..cf30ad085d717 100644 --- a/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.h +++ b/Steer/DigitizerWorkflow/src/PHOSDigitizerSpec.h @@ -55,14 +55,6 @@ class DigitizerSpec final : public o2::base::BaseDPLDigitizer void run(framework::ProcessingContext& ctx); private: - /// \brief helper function which will be offered as a service - /// \param brname Name of the hit branch - /// \param sourceID ID of the source - /// \param entryID ID of the entry in the source - void retrieveHits(const char* brname, - int sourceID, - int entryID); - float mReadoutTime = 0.; ///< PHOS readout time float mDeadTime = 0.; ///< PHOS dead time bool mInitSimParams = true; ///< To initialize SimParams From 6e582ef4ad23f3307617b9c6f882adc74968a4f5 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 22 Jan 2025 11:23:17 +0100 Subject: [PATCH 0002/1914] DPL: drop obsolete communication method completely This still parses the input for no reason. --- Framework/Core/src/runDataProcessing.cxx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Framework/Core/src/runDataProcessing.cxx b/Framework/Core/src/runDataProcessing.cxx index 28bbde2d474de..66fc2c7b2c3df 100644 --- a/Framework/Core/src/runDataProcessing.cxx +++ b/Framework/Core/src/runDataProcessing.cxx @@ -64,7 +64,6 @@ #include "Framework/DataTakingContext.h" #include "Framework/CommonServices.h" #include "Framework/DefaultsHelpers.h" -#include "ControlServiceHelpers.h" #include "ProcessingPoliciesHelpers.h" #include "DriverServerContext.h" #include "HTTPParser.h" @@ -848,9 +847,7 @@ void processChildrenOutput(uv_loop_t* loop, // TODO: have multiple display modes // TODO: graphical view of the processing? assert(infos.size() == controls.size()); - std::match_results match; ParsedMetricMatch metricMatch; - ParsedConfigMatch configMatch; int processed = 0; for (size_t di = 0, de = infos.size(); di < de; ++di) { @@ -881,11 +878,7 @@ void processChildrenOutput(uv_loop_t* loop, // in the GUI. // Then we check if it is part of our Poor man control system // if yes, we execute the associated command. - if (logLevel == LogParsingHelpers::LogLevel::Info && ControlServiceHelpers::parseControl(token, match)) { - throw runtime_error("stdout is not supported anymore as a driver backend. Please use ws://"); - } else if (logLevel == LogParsingHelpers::LogLevel::Info && DeviceConfigHelper::parseConfig(token.substr(16), configMatch)) { - throw runtime_error("stdout is not supported anymore as a driver backend. Please use ws://"); - } else if (!control.quiet && (token.find(control.logFilter) != std::string::npos) && logLevel >= info.logLevel) { + if (!control.quiet && (token.find(control.logFilter) != std::string::npos) && logLevel >= info.logLevel) { assert(info.historyPos >= 0); assert(info.historyPos < info.history.size()); info.history[info.historyPos] = token; From 89fbec2b6f104cbc25058bc4d45c18e219ca9ce7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 22 Jan 2025 13:38:13 +0100 Subject: [PATCH 0003/1914] DPL Analysis: avoid calculating indexing columns All we need to know is that there is at least one for the constrain to apply. --- Framework/Core/include/Framework/ASoA.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 7b0b69ec8941f..e09b2ffd69df1 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1024,6 +1024,9 @@ concept can_bind = requires(T&& t) { { t.B::mColumnIterator }; }; +template +concept has_index = (is_indexing_column || ...); + template struct TableIterator : IP, C... { public: @@ -1031,8 +1034,6 @@ struct TableIterator : IP, C... { using policy_t = IP; using all_columns = framework::pack; using persistent_columns_t = framework::selected_pack; - using indexing_columns_t = framework::selected_pack; - constexpr inline static bool has_index_v = framework::pack_size(indexing_columns_t{}) > 0; using external_index_columns_t = framework::selected_pack; using internal_index_columns_t = framework::selected_pack; using bindings_pack_t = decltype([](framework::pack) -> framework::pack {}(external_index_columns_t{})); // decltype(extractBindings(external_index_columns_t{})); @@ -1040,15 +1041,21 @@ struct TableIterator : IP, C... { TableIterator(arrow::ChunkedArray* columnData[sizeof...(C)], IP&& policy) : IP{policy}, C(columnData[framework::has_type_at_v(all_columns{})])... + { + bind(); + } + + TableIterator(arrow::ChunkedArray* columnData[sizeof...(C)], IP&& policy) + requires(has_index) + : IP{policy}, + C(columnData[framework::has_type_at_v(all_columns{})])... { bind(); // In case we have an index column might need to constrain the actual // number of rows in the view to the range provided by the index. // FIXME: we should really understand what happens to an index when we // have a RowViewFiltered. - if constexpr (has_index_v) { - this->limitRange(this->rangeStart(), this->rangeEnd()); - } + this->limitRange(this->rangeStart(), this->rangeEnd()); } TableIterator() = default; @@ -1192,7 +1199,7 @@ struct TableIterator : IP, C... { [this](T*) -> void {}, }; (f(static_cast(nullptr)), ...); - if constexpr (has_index_v) { + if constexpr (has_index) { this->setIndices(this->getIndices()); this->setOffsets(this->getOffsets()); } From 47eb04f031e3346ca7e09cac6ee791defc96b79b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 19 Jan 2025 21:35:05 +0100 Subject: [PATCH 0004/1914] GPU: Remove C++ <17 protections from GPU code --- GPU/Common/GPUCommonAlgorithm.h | 3 +-- GPU/Common/GPUCommonConstants.h | 2 +- GPU/Common/GPUCommonDef.h | 7 ------ GPU/Common/GPUCommonLogger.h | 2 +- GPU/Common/GPUCommonMath.h | 12 +++++----- GPU/GPUTracking/Base/GPUGeneralKernels.h | 6 ++--- GPU/GPUTracking/Base/GPUParam.inc | 2 +- GPU/GPUTracking/Base/GPUProcessor.h | 2 +- .../Base/opencl/GPUReconstructionOCL.cl | 3 --- .../GPUTPCCompressionKernels.cxx | 24 +++++++++---------- .../GPUTPCCompressionKernels.h | 2 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 2 +- .../DataTypes/GPUTPCGMPolynomialField.h | 6 ++--- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 8 +++---- .../Definitions/GPUDefGPUParameters.h | 7 +----- GPU/GPUTracking/Definitions/GPULogging.h | 2 +- GPU/GPUTracking/ITS/GPUITSFitterKernels.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 2 +- .../Merger/GPUTPCGlobalDebugSortKernels.h | 2 +- .../Refit/GPUTrackingRefitKernel.h | 2 +- .../SliceTracker/GPUTPCCreateOccupancyMap.h | 2 +- .../SliceTracker/GPUTPCCreateSliceData.h | 2 +- .../SliceTracker/GPUTPCGlobalTracking.h | 4 ++-- .../SliceTracker/GPUTPCNeighboursCleaner.h | 2 +- .../SliceTracker/GPUTPCNeighboursFinder.h | 2 +- .../GPUTPCSectorDebugSortKernels.h | 2 +- .../SliceTracker/GPUTPCSliceData.cxx | 4 ++-- .../SliceTracker/GPUTPCStartHitsFinder.h | 2 +- .../SliceTracker/GPUTPCStartHitsSorter.h | 2 +- .../SliceTracker/GPUTPCTrackletConstructor.h | 2 +- .../SliceTracker/GPUTPCTrackletSelector.h | 2 +- .../GPUTPCCFChargeMapFiller.h | 2 +- .../GPUTPCCFCheckPadBaseline.h | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 2 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 10 ++++---- .../TPCClusterFinder/GPUTPCCFDecodeZS.h | 4 ++-- .../TPCClusterFinder/GPUTPCCFDeconvolution.h | 2 +- .../TPCClusterFinder/GPUTPCCFGather.h | 2 +- .../GPUTPCCFMCLabelFlattener.h | 2 +- .../GPUTPCCFNoiseSuppression.h | 2 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.h | 2 +- .../GPUTPCCFStreamCompaction.h | 2 +- .../TPCConvert/GPUTPCConvertKernel.h | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 2 +- .../TRDTracking/GPUTRDTrackerKernels.cxx | 5 +--- .../TRDTracking/GPUTRDTrackerKernels.h | 2 +- GPU/GPUTracking/utils/qconfigrtc.h | 4 ---- GPU/GPUTracking/utils/strtag.h | 6 +---- GPU/TPCFastTransformation/Spline2D.h | 2 +- GPU/TPCFastTransformation/Spline2DSpec.h | 2 +- GPU/TPCFastTransformation/SplineSpec.h | 2 +- .../devtools/IrregularSpline2D3D.h | 4 ++-- 54 files changed, 82 insertions(+), 109 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index e5a963b4c2020..dd3cc1925b04a 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -17,8 +17,7 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_GPUCODE) -//&& (!defined __cplusplus || __cplusplus < 201402L) // This would enable to custom search also on the CPU if available by the compiler, but it is not always faster, so we stick to std::sort +#if !defined(GPUCA_GPUCODE) // Could also enable custom search on the CPU, but it is not always faster, so we stick to std::sort #include #define GPUCA_ALGORITHM_STD #endif diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index c6dfedc14ab7e..d209b6a42a794 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -19,7 +19,7 @@ namespace GPUCA_NAMESPACE::gpu::gpu_common_constants { -static CONSTEXPR const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP +static constexpr const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP } #endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index 2b3164d16d981..059e011a715d8 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -40,13 +40,6 @@ #endif #endif -// Definitions for C++11 features -#if defined(__cplusplus) && __cplusplus >= 201703L - #define CONSTEXPR constexpr -#else - #define CONSTEXPR -#endif - // Set AliRoot / O2 namespace #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined(GPUCA_ALIROOT_LIB) || defined (GPUCA_GPUCODE) #define GPUCA_ALIGPUCODE diff --git a/GPU/Common/GPUCommonLogger.h b/GPU/Common/GPUCommonLogger.h index 6818564c048db..ca5a8ff3c3410 100644 --- a/GPU/Common/GPUCommonLogger.h +++ b/GPU/Common/GPUCommonLogger.h @@ -45,7 +45,7 @@ struct DummyLogger { #define LOGP(...) // #define LOGP(...) static_assert(false, "LOGP(...) unsupported in GPU code"); -#elif defined(GPUCA_STANDALONE) || defined(GPUCA_ALIROOT_LIB) || (!defined(__cplusplus) || __cplusplus < 201703L) +#elif defined(GPUCA_STANDALONE) || defined(GPUCA_ALIROOT_LIB) #include #include #define LOG(type) std::cout diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index ec1c3d54096a3..27b3be8869b94 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -73,8 +73,8 @@ class GPUCommonMath GPUd() static float Log(float x); GPUd() static float Exp(float x); GPUhdni() static float Copysign(float x, float y); - GPUd() static CONSTEXPR float TwoPi() { return 6.2831853f; } - GPUd() static CONSTEXPR float Pi() { return 3.1415927f; } + GPUd() static constexpr float TwoPi() { return 6.2831853f; } + GPUd() static constexpr float Pi() { return 3.1415927f; } GPUd() static float Round(float x); GPUd() static float Floor(float x); GPUd() static uint32_t Float2UIntReint(const float& x); @@ -143,12 +143,12 @@ class GPUCommonMath GPUd() static float FMulRZ(float a, float b); template - GPUd() CONSTEXPR static T nextMultipleOf(T val); + GPUd() constexpr static T nextMultipleOf(T val); template GPUdi() static float Sum2(float w, Args... args) { - if CONSTEXPR (sizeof...(Args) == 0) { + if constexpr (sizeof...(Args) == 0) { return w * w; } else { return w * w + Sum2(args...); @@ -181,9 +181,9 @@ typedef GPUCommonMath CAMath; #endif // clang-format on template -GPUdi() CONSTEXPR T GPUCommonMath::nextMultipleOf(T val) +GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val) { - if CONSTEXPR (I & (I - 1)) { + if constexpr (I & (I - 1)) { T tmp = val % I; if (tmp) { val += I - tmp; diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 9829fe350fde1..333a53576663b 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -81,7 +81,7 @@ class GPUKernelTemplate }; typedef GPUconstantref() GPUConstantMem processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; @@ -96,7 +96,7 @@ class GPUKernelTemplate class GPUMemClean16 : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; @@ -105,7 +105,7 @@ class GPUMemClean16 : public GPUKernelTemplate class GPUitoa : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 0b32067f8980c..357af42276575 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -116,7 +116,7 @@ GPUdi() float GPUParam::GetSystematicClusterErrorC122(float x, float y, uint8_t if (dx > occupancyTotal * rec.tpc.sysClusErrorC12Box) { return 0.f; } - CONSTEXPR float dEdgeInv = 18.f / CAMath::Pi(); + constexpr float dEdgeInv = 18.f / CAMath::Pi(); const float dy = (sector == (GPUCA_NSLICES / 2 + 1) ? 0.5f : -0.5f) * (y / x) * dEdgeInv + 0.5f; const float errC12 = rec.tpc.sysClusErrorC12Norm * occupancyTotal * dy; return errC12 * errC12; diff --git a/GPU/GPUTracking/Base/GPUProcessor.h b/GPU/GPUTracking/Base/GPUProcessor.h index eb635ae210b73..473b6dd903226 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.h +++ b/GPU/GPUTracking/Base/GPUProcessor.h @@ -108,7 +108,7 @@ class GPUProcessor if (basePtr == 0) { basePtr = 1; } - CONSTEXPR const size_t maxAlign = (alignof(S) > alignment) ? alignof(S) : alignment; + constexpr const size_t maxAlign = (alignof(S) > alignment) ? alignof(S) : alignment; basePtr += getAlignment(basePtr); S* retVal = (S*)(basePtr); basePtr += nEntries * sizeof(S); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 863cd82cb56eb..4a3cda6c2cddc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -14,9 +14,6 @@ // clang-format off #define __OPENCL__ -#if defined(__cplusplus) && __cplusplus >= 201703L - #define __OPENCL__ -#endif #define GPUCA_GPUTYPE_OPENCL #ifdef __OPENCL__ diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 15888e14eec04..8f28cbe5563af 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -332,7 +332,7 @@ GPUdi() GPUTPCCompressionGatherKernels::Vec128* GPUTPCCompressionGatherKernels:: template GPUdi() bool GPUTPCCompressionGatherKernels::isAlignedTo(const S* ptr) { - if CONSTEXPR (alignof(S) >= alignof(T)) { + if constexpr (alignof(S) >= alignof(T)) { static_cast(ptr); return true; } else { @@ -343,10 +343,10 @@ GPUdi() bool GPUTPCCompressionGatherKernels::isAlignedTo(const S* ptr) template <> GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy(uint8_t* GPUrestrict() dst, const uint8_t* GPUrestrict() src, uint32_t size, int32_t nThreads, int32_t iThread) { - CONSTEXPR const int32_t vec128Elems = CpyVector::Size; - CONSTEXPR const int32_t vec64Elems = CpyVector::Size; - CONSTEXPR const int32_t vec32Elems = CpyVector::Size; - CONSTEXPR const int32_t vec16Elems = CpyVector::Size; + constexpr const int32_t vec128Elems = CpyVector::Size; + constexpr const int32_t vec64Elems = CpyVector::Size; + constexpr const int32_t vec32Elems = CpyVector::Size; + constexpr const int32_t vec16Elems = CpyVector::Size; if (size >= uint32_t(nThreads * vec128Elems)) { compressorMemcpyVectorised(dst, src, size, nThreads, iThread); @@ -364,9 +364,9 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy(uint8_t* template <> GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy(uint16_t* GPUrestrict() dst, const uint16_t* GPUrestrict() src, uint32_t size, int32_t nThreads, int32_t iThread) { - CONSTEXPR const int32_t vec128Elems = CpyVector::Size; - CONSTEXPR const int32_t vec64Elems = CpyVector::Size; - CONSTEXPR const int32_t vec32Elems = CpyVector::Size; + constexpr const int32_t vec128Elems = CpyVector::Size; + constexpr const int32_t vec64Elems = CpyVector::Size; + constexpr const int32_t vec32Elems = CpyVector::Size; if (size >= uint32_t(nThreads * vec128Elems)) { compressorMemcpyVectorised(dst, src, size, nThreads, iThread); @@ -382,8 +382,8 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy(uint16_t template <> GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy(uint32_t* GPUrestrict() dst, const uint32_t* GPUrestrict() src, uint32_t size, int32_t nThreads, int32_t iThread) { - CONSTEXPR const int32_t vec128Elems = CpyVector::Size; - CONSTEXPR const int32_t vec64Elems = CpyVector::Size; + constexpr const int32_t vec128Elems = CpyVector::Size; + constexpr const int32_t vec64Elems = CpyVector::Size; if (size >= uint32_t(nThreads * vec128Elems)) { compressorMemcpyVectorised(dst, src, size, nThreads, iThread); @@ -446,8 +446,8 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpyBuffered(V* buf, T* V* GPUrestrict() dstAligned = nullptr; T* bufT = reinterpret_cast(buf); - CONSTEXPR const int32_t bufSize = GPUCA_WARP_SIZE; - CONSTEXPR const int32_t bufTSize = bufSize * sizeof(V) / sizeof(T); + constexpr const int32_t bufSize = GPUCA_WARP_SIZE; + constexpr const int32_t bufTSize = bufSize * sizeof(V) / sizeof(T); for (uint32_t i = 0; i < nEntries; i++) { uint32_t srcPos = 0; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h index 5186b16c49be3..2fc114324830e 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h @@ -27,7 +27,7 @@ namespace GPUCA_NAMESPACE::gpu class GPUTPCCompressionKernels : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; } enum K : int32_t { step0attached = 0, diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index 499287dc3200d..d4cee937b727f 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -45,7 +45,7 @@ class GPUSettings RejectionStrategyA = 1, RejectionStrategyB = 2 }; - static CONSTEXPR const uint32_t TPC_MAX_TF_TIME_BIN = ((256 * 3564 + 2 * 8 - 2) / 8); + static constexpr const uint32_t TPC_MAX_TF_TIME_BIN = ((256 * 3564 + 2 * 8 - 2) / 8); }; // Settings describing the global run parameters diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 88294b2b06c25..19b72c8a7be90 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -55,9 +55,9 @@ class GPUTPCGMPolynomialField void Print() const; - static CONSTEXPR const int32_t NTPCM = 10; // number of coefficients - static CONSTEXPR const int32_t NTRDM = 20; // number of coefficients for the TRD field - static CONSTEXPR const int32_t NITSM = 10; // number of coefficients for the ITS field + static constexpr const int32_t NTPCM = 10; // number of coefficients + static constexpr const int32_t NTRDM = 20; // number of coefficients for the TRD field + static constexpr const int32_t NITSM = 10; // number of coefficients for the ITS field GPUd() static void GetPolynomsTpc(float x, float y, float z, float f[NTPCM]); GPUd() static void GetPolynomsTrd(float x, float y, float z, float f[NTRDM]); diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index fcafa34547828..b77961c11c0d4 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -60,7 +60,7 @@ class GPUTPCGeometry // TODO: Make values constexpr const float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; const float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; - static CONSTEXPR float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! + static constexpr float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! public: GPUd() int32_t GetRegion(int32_t row) const { return mRegion[row]; } @@ -90,7 +90,7 @@ class GPUTPCGeometry // TODO: Make values constexpr const float mPadHeight[3] = {.75f, 1.f, 1.5f}; const float mPadWidth[3] = {.4f, .6f, .6f}; - static CONSTEXPR float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! + static constexpr float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! public: GPUd() int32_t GetRegion(int32_t row) const { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } @@ -102,10 +102,10 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() int32_t EndOROC2() const { return GPUCA_ROW_COUNT; } #endif private: - static CONSTEXPR float FACTOR_Z2T = 1.f / FACTOR_T2Z; + static constexpr float FACTOR_Z2T = 1.f / FACTOR_T2Z; public: - GPUd() static CONSTEXPR float TPCLength() { return 250.f - 0.275f; } + GPUd() static constexpr float TPCLength() { return 250.f - 0.275f; } GPUd() float Row2X(int32_t row) const { return (mX[row]); } GPUd() float PadHeight(int32_t row) const { return (mPadHeight[GetRegion(row)]); } GPUd() float PadHeightByRegion(int32_t region) const { return (mPadHeight[region]); } diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 7cd41e1a4f846..d246f77a50290 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -632,13 +632,8 @@ #define GPUCA_SORT_STARTHITS #endif -#if defined(__cplusplus) && __cplusplus >= 201703L #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT -#else -#define GPUCA_NEW_ALIGNMENT -#define GPUCA_OPERATOR_NEW_ALIGNMENT -#endif -// clang-format on + // clang-format on #endif diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index 79f888501745f..e33c9463a2f48 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -44,7 +44,7 @@ fmt::fprintf(stderr, string "\n", ##__VA_ARGS__); \ throw std::exception(); \ } -#elif defined(GPUCA_STANDALONE) || defined(GPUCA_GPUCODE_DEVICE) || (defined(GPUCA_ALIROOT_LIB) && defined(GPUCA_GPUCODE) && defined(__cplusplus) && __cplusplus < 201703L) +#elif defined(GPUCA_STANDALONE) || defined(GPUCA_GPUCODE_DEVICE) || (defined(GPUCA_ALIROOT_LIB) && defined(GPUCA_GPUCODE)) // For standalone / CUDA / HIP, we just use printf, which should be available // Temporarily, we also have to handle CUDA on AliRoot with O2 defaults due to ROOT / CUDA incompatibilities #include diff --git a/GPU/GPUTracking/ITS/GPUITSFitterKernels.h b/GPU/GPUTracking/ITS/GPUITSFitterKernels.h index f4b120564f179..9b3d300fc2bec 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitterKernels.h +++ b/GPU/GPUTracking/ITS/GPUITSFitterKernels.h @@ -30,7 +30,7 @@ class GPUITSTrack; class GPUITSFitterKernels : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::ITSTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::ITSTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index af5f7d177e6cc..931b58d41e21b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -66,7 +66,7 @@ class GPUTPCGMMerger : public GPUProcessor ~GPUTPCGMMerger() = default; GPUTPCGMMerger(const GPUTPCGMMerger&) = delete; const GPUTPCGMMerger& operator=(const GPUTPCGMMerger&) const = delete; - static CONSTEXPR const int32_t NSLICES = GPUCA_NSLICES; //* N slices + static constexpr const int32_t NSLICES = GPUCA_NSLICES; //* N slices struct memory { GPUAtomic(uint32_t) nRetryRefit; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index 1d5d445a7de67..958d5081baf81 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -26,7 +26,7 @@ namespace gpu class GPUTPCGMMergerGeneral : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } #if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) typedef GPUTPCGMMerger processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h index 4abf4ecae9a7a..e54fb8885091d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h @@ -32,7 +32,7 @@ class GPUTPCGlobalDebugSortKernels : public GPUKernelTemplate globalTracks1 = 2, globalTracks2 = 3, borderTracks = 4 }; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } typedef GPUTPCGMMerger processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors.tpcMerger; } diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.h b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.h index d52a3f3fdedb5..9b99ffb8402c0 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.h +++ b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.h @@ -24,7 +24,7 @@ namespace o2::gpu class GPUTrackingRefitKernel : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCCompression; } enum K : int32_t { mode0asGPU = 0, diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h index 86a1a66eeef43..8b96ad8e74183 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h @@ -29,7 +29,7 @@ class GPUTPCCreateOccupancyMap : public GPUKernelTemplate enum K { defaultKernel = 0, fill = 0, fold = 1 }; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args); }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h index 916891c2035ef..2789621a7de5c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h @@ -34,7 +34,7 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index 367b4314814fe..5dc469e2654f5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -32,7 +32,7 @@ class GPUTPCGlobalTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -52,7 +52,7 @@ class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h index 23c1e21e87ab0..311fef5204c3d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h @@ -40,7 +40,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h index a121a0f14eb67..882428821ae84 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h @@ -51,7 +51,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h index 50a3738501e2e..04001603a2a28 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h @@ -30,7 +30,7 @@ class GPUTPCSectorDebugSortKernels : public GPUKernelTemplate hitData = 0, startHits = 1, sliceTracks = 2 }; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } typedef GPUTPCTracker processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index 5177c48b6a834..48c490a6f5559 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -233,7 +233,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSlice][rowIndex]; const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSlice][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSlice][0]); - CONSTEXPR const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24); + constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24); GPUTPCRow& row = mRows[rowIndex]; if (iThread == 0) { row.mFirstHitInBinOffset = CAMath::nextMultipleOf(GetGridSize(RowOffset, rowIndex) + rowIndex * GPUCA_ROWALIGNMENT / sizeof(int32_t)); @@ -318,7 +318,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n GPUbarrier(); const GPUTPCGrid& grid = row.mGrid; const int32_t numberOfBins = grid.N(); - CONSTEXPR const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning + constexpr const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning if (sizeof(calink) < 4 && numberOfBins >= maxBins) { if (iThread == 0) { mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_BIN_OVERFLOW, iSlice * 1000 + rowIndex, numberOfBins, maxBins); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h index b2b9bfb355fa1..ed49fad8fc6dc 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h @@ -40,7 +40,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h index 838fcf7e7d7e1..4c937b0414e30 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h @@ -40,7 +40,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index b1ef74b9896c1..a961501207911 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -99,7 +99,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h index af13b30022e6f..115f0785fc212 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h @@ -44,7 +44,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h index ffb13dbbb0607..8a0d8089ad1f5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h @@ -49,7 +49,7 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index 0440121175f3a..1e1b94cd43b74 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -45,7 +45,7 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index 928a23db7c025..ff8820efc1d69 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -51,7 +51,7 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 5413878421884..daacbc0994295 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -300,7 +300,7 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread( { const CfFragment& fragment = clusterer.mPmemory->fragment; - if CONSTEXPR (TPCZSHDRV2::TIGHTLY_PACKED_V3) { + if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) { uint32_t byte = 0, bits = 0, nSamplesWritten = 0, rawFECChannel = 0; @@ -396,7 +396,7 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBMultiThread( uint32_t adc = 0; - if CONSTEXPR (TPCZSHDRV2::TIGHTLY_PACKED_V3) { + if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) { // Try to access adcData with 4 byte reads instead of 1 byte. // You'd think this would improve performace, but it's actually slower... @@ -668,7 +668,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB( [[maybe_unused]] const uint8_t* nextPage) { - if CONSTEXPR (DecodeInParallel) { + if constexpr (DecodeInParallel) { return DecodeTBMultiThread(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage); } else { uint16_t nSamplesWritten = 0; @@ -693,7 +693,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( [[maybe_unused]] const uint8_t* nextPage) { #define MAYBE_PAGE_OVERFLOW(pagePtr) \ - if CONSTEXPR (PayloadExtendsToNextPage) { \ + if constexpr (PayloadExtendsToNextPage) { \ if (pagePtr >= payloadEnd && pagePtr < nextPage) { \ ptrdiff_t diff = pagePtr - payloadEnd; \ pagePtr = nextPage; \ @@ -840,7 +840,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( [[maybe_unused]] const uint8_t* nextPage) { #define MAYBE_PAGE_OVERFLOW(pagePtr) \ - if CONSTEXPR (PayloadExtendsToNextPage) { \ + if constexpr (PayloadExtendsToNextPage) { \ if (pagePtr >= payloadEnd && pagePtr < nextPage) { \ ptrdiff_t diff = pagePtr - payloadEnd; \ pagePtr = nextPage; \ diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h index 3d5f4dd4380d4..bf34b78227f19 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h @@ -55,7 +55,7 @@ class GPUTPCCFDecodeZS : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } @@ -76,7 +76,7 @@ class GPUTPCCFDecodeZSLinkBase : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h index f3f572646751a..0fb27c07e2825 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h @@ -44,7 +44,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h index 681c3c9b5c380..da486741ea62c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h @@ -34,7 +34,7 @@ class GPUTPCCFGather : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h index bb1473ec832df..b1c266cf5fed1 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h @@ -46,7 +46,7 @@ class GPUTPCCFMCLabelFlattener : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index fe518a3a96d39..41f463fd4fe89 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -48,7 +48,7 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h index c0a263f61eaa6..4aa3574abd3ff 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h @@ -44,7 +44,7 @@ class GPUTPCCFPeakFinder : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index 2ea7c8012e53f..8b2206da3088c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -46,7 +46,7 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate } #endif - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCClusterFinding; } diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h index cf743d059b3f7..5bfe05de0c631 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h @@ -24,7 +24,7 @@ namespace gpu class GPUTPCConvertKernel : public GPUKernelTemplate { public: - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCConversion; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCConversion; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); }; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h index 8cde81cec9626..49a6178dbbff1 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h @@ -175,7 +175,7 @@ class GPUTRDGeometry GPUd() int32_t GetRowMax(int32_t layer, int32_t stack, int32_t /* sector */) const { return 0; } GPUd() bool ChamberInGeometry(int32_t det) const { return false; } - static CONSTEXPR const int32_t kNstack = 0; + static constexpr const int32_t kNstack = 0; }; } // namespace gpu } // namespace GPUCA_NAMESPACE diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 96a5547ad28de..d688e2957846d 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -194,7 +194,7 @@ class GPUTRDTracker_t : public GPUProcessor float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection /// ---- end error parametrization ---- bool mDebugOutput; // store debug output - static CONSTEXPR const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker + static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker float mMaxEta; // TPC tracks with higher eta are ignored float mRoadZ; // in z, a constant search road is used float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index a69bb9394f735..336e54e9efc67 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -27,10 +27,7 @@ GPUdii() void GPUTRDTrackerKernels::Thread(int32_t nBlocks, int32_t nThreads, in { auto* trdTracker = &processors.getTRDTracker(); #ifndef GPUCA_GPUCODE_DEVICE -#if defined(__cplusplus) && __cplusplus >= 201703L - if constexpr (std::is_same_v) -#endif - { + if constexpr (std::is_same_v) { if (externalInstance) { trdTracker = externalInstance; } diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 79e996ab79c71..4430ce850fb14 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -28,7 +28,7 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate enum K { defaultKernel = 0, gpuVersion = 0, o2Version = 1 }; - GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; diff --git a/GPU/GPUTracking/utils/qconfigrtc.h b/GPU/GPUTracking/utils/qconfigrtc.h index 54114cb3846f7..97365a6412c5d 100644 --- a/GPU/GPUTracking/utils/qconfigrtc.h +++ b/GPU/GPUTracking/utils/qconfigrtc.h @@ -30,15 +30,11 @@ template static std::string qConfigPrintRtc(const T& tSrc, bool useConstexpr) { -#if defined(__cplusplus) && __cplusplus >= 201703L std::stringstream out; #define QCONFIG_PRINT_RTC #include "qconfig.h" #undef QCONFIG_PRINT_RTC return out.str(); -#else - throw std::runtime_error("not supported"); -#endif } #define QCONFIG_CONVERT_RTC diff --git a/GPU/GPUTracking/utils/strtag.h b/GPU/GPUTracking/utils/strtag.h index 69e79de004779..24c527ca11a8c 100644 --- a/GPU/GPUTracking/utils/strtag.h +++ b/GPU/GPUTracking/utils/strtag.h @@ -19,11 +19,7 @@ #include template -#if defined(__cplusplus) && __cplusplus >= 201402L -constexpr -#endif - T - qStr2Tag(const char* str) +constexpr T qStr2Tag(const char* str) { if (strlen(str) != sizeof(T)) { throw std::runtime_error("Invalid tag length"); diff --git a/GPU/TPCFastTransformation/Spline2D.h b/GPU/TPCFastTransformation/Spline2D.h index ccd68b029c0f1..64c1b487987fc 100644 --- a/GPU/TPCFastTransformation/Spline2D.h +++ b/GPU/TPCFastTransformation/Spline2D.h @@ -22,7 +22,7 @@ #include "FlatObject.h" #include "GPUCommonDef.h" -#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) #include #include #endif diff --git a/GPU/TPCFastTransformation/Spline2DSpec.h b/GPU/TPCFastTransformation/Spline2DSpec.h index ab8c3cb39754d..b626df3fa7e51 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.h +++ b/GPU/TPCFastTransformation/Spline2DSpec.h @@ -22,7 +22,7 @@ #include "GPUCommonDef.h" #include "SplineUtil.h" -#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) #include #include #endif diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index d4e64b8dad1f6..f9d3ec0613f64 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -22,7 +22,7 @@ #include "GPUCommonDef.h" #include "SplineUtil.h" -#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) #include #include #endif diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h index 99550cc12219f..026338b3380fa 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h @@ -21,7 +21,7 @@ #include "FlatObject.h" #include "GPUCommonDef.h" -#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) #include #include #endif @@ -331,7 +331,7 @@ GPUdi() void IrregularSpline2D3D::getSplineVec(const float* correctedData, float // Same as getSpline, but using vectorized calculation. // \param correctedData should be at least 128-bit aligned -#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) && defined(__cplusplus) && __cplusplus >= 201703L +#if !defined(__ROOTCLING__) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_NO_VC) const IrregularSpline1D& gridU = getGridU(); const IrregularSpline1D& gridV = getGridV(); int32_t nu = gridU.getNumberOfKnots(); From e0825624103f968d23a4b9ef12a430c510c6138c Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Tue, 21 Jan 2025 22:24:46 +0100 Subject: [PATCH 0005/1914] GPU: Fix compilation without Vc. --- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 9a890202524f0..9b249f9ef77a6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -132,7 +132,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread maxConsecCharges[localpad] = CAMath::Max(maxConsecCharges[localpad], consecCharges[localpad]); const Charge unpackedCharge = Charge(packedCharge) / Charge(1 << PackedCharge::DecimalBits); - maxCharge[localPadId] = CAMath::Max(maxCharge[localPad], unpackedCharge); + maxCharge[localpad] = CAMath::Max(maxCharge[localpad], unpackedCharge); } else { consecCharges[localpad] = 0; } From 7b2c0211c203a299025626daf9c19386dc164db7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 21 Jan 2025 16:04:28 +0100 Subject: [PATCH 0006/1914] Fix GPUTPCGeometry LinearPad2Y/Y2Pad methods, define biased ones in GPUTPCCompressionTrackModel --- .../DataCompression/GPUTPCCompressionKernels.cxx | 4 ++-- .../DataCompression/GPUTPCCompressionTrackModel.h | 12 ++++++++++++ .../DataCompression/TPCClusterDecompressionCore.inc | 6 +++--- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 8 ++++++++ 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 8f28cbe5563af..27d7058bd8fc2 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -69,7 +69,7 @@ GPUdii() void GPUTPCCompressionKernels::Threadclusters[hit.slice][hit.row][hit.num - clusters->clusterOffset[hit.slice][hit.row]]; float x = param.tpcGeometry.Row2X(hit.row); - float y = param.tpcGeometry.LinearPad2Y(hit.slice, hit.row, orgCl.getPad()); + float y = track.LinearPad2Y(hit.slice, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); float z = param.tpcGeometry.LinearTime2Z(hit.slice, orgCl.getTime()); if (nClustersStored) { if ((hit.slice < GPUCA_NSLICES) ^ (lastSlice < GPUCA_NSLICES)) { @@ -115,7 +115,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread= GPUCA_NSLICES / 2) ? -u : u; + } + + GPUd() float LinearY2Pad(int32_t slice, float y, float padWidth, int8_t npads) const + { + const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; + return u / padWidth + 0.5f * npads; + } + #endif protected: diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc index 73352182328d5..6ec5b6bfdfe6d 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc @@ -113,7 +113,7 @@ class TPCClusterDecompressionCore timeTmp |= 0xFF000000; } time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); - float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y()))); + float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(slice, track.Y(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)))); pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad); time = time & 0xFFFFFF; pad = (uint16_t)pad; @@ -136,7 +136,7 @@ class TPCClusterDecompressionCore pad = cmprClusters.padA[trackIndex]; } const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...); - float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad()); + float y = track.LinearPad2Y(slice, cluster.getPad(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)); float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); if (clusterIndex == 0) { zOffset = z; @@ -187,4 +187,4 @@ class TPCClusterDecompressionCore }; } // namespace GPUCA_NAMESPACE::gpu -#endif \ No newline at end of file +#endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index b77961c11c0d4..5d43667f6e92f 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -114,7 +114,11 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float LinearPad2Y(int32_t slice, int32_t row, float pad) const { +#ifdef GPUCA_TPC_GEOMETRY_O2 + const float u = (pad - 0.5f * (mNPads[row] - 1)) * PadWidth(row); +#else const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); +#endif return (slice >= GPUCA_NSLICES / 2) ? -u : u; } @@ -127,7 +131,11 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float LinearY2Pad(int32_t slice, int32_t row, float y) const { const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; +#ifdef GPUCA_TPC_GEOMETRY_O2 + return u / PadWidth(row) + 0.5f * (mNPads[row] - 1); +#else return u / PadWidth(row) + 0.5f * mNPads[row]; +#endif } GPUd() static float LinearZ2Time(int32_t slice, float z) From b8be78ac66e5bd1c11a2a00d5c8f60c769d2eed3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 22 Jan 2025 13:09:26 +0100 Subject: [PATCH 0007/1914] DPL: improve getIndexFromLabel Avoids extra string creation. --- Framework/Core/include/Framework/ASoA.h | 2 +- Framework/Core/src/ASoA.cxx | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e09b2ffd69df1..3e900b51a418d 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1626,7 +1626,7 @@ auto select(T const& t, framework::expressions::Filter const& f) return Filtered({t.asArrowTable()}, selectionToVector(framework::expressions::createSelection(t.asArrowTable(), f))); } -arrow::ChunkedArray* getIndexFromLabel(arrow::Table* table, const char* label); +arrow::ChunkedArray* getIndexFromLabel(arrow::Table* table, std::string_view label); template consteval auto base_iter(framework::pack&&) -> TableIterator diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 38a6750a90dbe..a37d0f33891e7 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -123,10 +123,19 @@ std::shared_ptr ArrowHelpers::concatTables(std::vectorschema()->fields().begin(), table->schema()->fields().end(), [&](std::shared_ptr const& f) { - return o2::framework::strToUpper(label) == o2::framework::strToUpper(std::string{f->name()}); + auto caseInsensitiveCompare = [](const std::string_view& str1, const std::string& str2) { + return std::ranges::equal( + str1, str2, + [](char c1, char c2) { + return std::tolower(static_cast(c1)) == + std::tolower(static_cast(c2)); + }); + }; + + return caseInsensitiveCompare(label, f->name()); }); if (field == table->schema()->fields().end()) { o2::framework::throw_error(o2::framework::runtime_error_f("Unable to find column with label %s", label)); From 5efb6e2b38a178b14f2102d667a74fed2c249b3a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 23 Jan 2025 09:18:18 +0100 Subject: [PATCH 0008/1914] DPL Analysis: avoid base_of_template in concepts for column type identification (#13889) --- Framework/Core/include/Framework/ASoA.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 3e900b51a418d..8af872a64176d 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -786,27 +786,31 @@ struct Index : o2::soa::IndexColumn> { static constexpr const char* mLabel = "Index"; using type = int64_t; - using bindings_t = typename o2::framework::pack<>; - std::tuple<> boundIterators; std::tuple rowIndices; /// The offsets within larger tables. Currently only /// one level of nesting is supported. std::tuple rowOffsets; }; -template -concept is_indexing_column = requires { - [](o2::soa::Index*) {}(std::declval()); +template +concept is_indexing_column = requires(C& c) { + c.rowIndices; + c.rowOffsets; }; -template -concept is_dynamic_column = framework::base_of_template; +template +concept is_dynamic_column = requires(C& c) { + c.boundIterators; +}; + +template +concept is_marker_column = requires { &C::mark; }; template using is_dynamic_t = std::conditional_t, std::true_type, std::false_type>; template -concept is_column = framework::base_of_template || is_dynamic_column || is_indexing_column || framework::base_of_template; +concept is_column = is_persistent_column || is_dynamic_column || is_indexing_column || is_marker_column; template using is_indexing_t = std::conditional_t, std::true_type, std::false_type>; From 0f1ed00d0b131f31fdc6c58d4a4dcecf2f987e82 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 23 Jan 2025 10:08:41 +0100 Subject: [PATCH 0009/1914] DPL Analysis: Refactor group slicer (#13839) --- .../Core/include/Framework/GroupSlicer.h | 201 ++++++++++-------- 1 file changed, 116 insertions(+), 85 deletions(-) diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 8fd96fb00156c..64b1d863c59e6 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -22,7 +22,6 @@ namespace o2::framework { - template struct GroupSlicer { using grouping_t = std::decay_t; @@ -46,34 +45,41 @@ struct GroupSlicer { GroupSlicerIterator& operator=(GroupSlicerIterator&&) = default; template + auto splittingFunction(T&&) + { + } + + template + requires(o2::soa::relatedByIndex, std::decay_t>()) auto splittingFunction(T&& table) { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); - if constexpr (o2::soa::relatedByIndex, std::decay_t>()) { - auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); - if constexpr (!o2::soa::is_smallgroups>) { - if (table.size() == 0) { - return; - } - sliceInfos[index] = mSlices->getCacheFor(bk); - } else { - if (table.tableSize() == 0) { - return; - } - sliceInfosUnsorted[index] = mSlices->getCacheUnsortedFor(bk); + auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); + auto bk = std::make_pair(binding, mIndexColumnName); + if constexpr (!o2::soa::is_smallgroups>) { + if (table.size() == 0) { + return; } + sliceInfos[index] = mSlices->getCacheFor(bk); + } else { + if (table.tableSize() == 0) { + return; + } + sliceInfosUnsorted[index] = mSlices->getCacheUnsortedFor(bk); } } template + auto extractingFunction(T&&) + { + } + + template auto extractingFunction(T&& table) { - if constexpr (soa::is_filtered_table>) { - constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); - selections[index] = &table.getSelectedRows(); - starts[index] = selections[index]->begin(); - } + constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); + selections[index] = &table.getSelectedRows(); + starts[index] = selections[index]->begin(); } GroupSlicerIterator(G& gt, std::tuple& at, ArrowTableSlicingCache& slices) @@ -151,83 +157,108 @@ struct GroupSlicer { return std::make_tuple(prepareArgument()...); } - template + template + requires(o2::soa::relatedByIndex, std::decay_t>() && soa::is_filtered_table) auto prepareArgument() { constexpr auto index = framework::has_type_at_v(associated_pack_t{}); auto& originalTable = std::get(*mAt); - - if constexpr (o2::soa::relatedByIndex, std::decay_t>()) { - uint64_t pos; - if constexpr (soa::is_filtered_table>) { - pos = groupSelection[position]; - } else { - pos = position; + uint64_t pos; + if constexpr (soa::is_filtered_table>) { + pos = groupSelection[position]; + } else { + pos = position; + } + // generic split + auto selection = sliceInfosUnsorted[index].getSliceFor(pos); + // intersect selections + o2::soa::SelectionVector s; + if (selections[index]->empty()) { + if (!selection.empty()) { + std::copy(selection.begin(), selection.end(), std::back_inserter(s)); } - - if constexpr (!o2::soa::is_smallgroups>) { - // optimized split - if (originalTable.size() == 0) { - return originalTable; - } - auto oc = sliceInfos[index].getSliceFor(pos); - uint64_t offset = oc.first; - auto count = oc.second; - if constexpr (soa::is_filtered_table>) { - auto groupedElementsTable = originalTable.asArrowTable()->Slice(offset, count); - if (count == 0) { - return std::decay_t{{groupedElementsTable}, soa::SelectionVector{}}; - } - - // for each grouping element we need to slice the selection vector - auto start_iterator = std::lower_bound(starts[index], selections[index]->end(), offset); - auto stop_iterator = std::lower_bound(start_iterator, selections[index]->end(), offset + count); - starts[index] = stop_iterator; - soa::SelectionVector slicedSelection{start_iterator, stop_iterator}; - std::transform(slicedSelection.begin(), slicedSelection.end(), slicedSelection.begin(), - [&offset](int64_t idx) { - return idx - static_cast(offset); - }); - - std::decay_t typedTable{{groupedElementsTable}, std::move(slicedSelection), offset}; - typedTable.bindInternalIndicesTo(&originalTable); - return typedTable; - - } else { - auto groupedElementsTable = originalTable.rawSlice(offset, offset + count - 1); - groupedElementsTable.bindInternalIndicesTo(&originalTable); - return groupedElementsTable; - } - } else { - // generic split - if constexpr (soa::is_filtered_table>) { - auto selection = sliceInfosUnsorted[index].getSliceFor(pos); - // intersect selections - o2::soa::SelectionVector s; - if (selections[index]->empty()) { - if (!selection.empty()) { - std::copy(selection.begin(), selection.end(), std::back_inserter(s)); - } - } else { - if (!selection.empty()) { - if constexpr (std::decay_t::applyFilters) { - std::set_intersection(selection.begin(), selection.end(), selections[index]->begin(), selections[index]->end(), std::back_inserter(s)); - } else { - std::copy(selection.begin(), selection.end(), std::back_inserter(s)); - } - } - } - std::decay_t typedTable{{originalTable.asArrowTable()}, std::move(s)}; - typedTable.bindInternalIndicesTo(&originalTable); - return typedTable; + } else { + if (!selection.empty()) { + if constexpr (std::decay_t::applyFilters) { + std::set_intersection(selection.begin(), selection.end(), selections[index]->begin(), selections[index]->end(), std::back_inserter(s)); } else { - throw runtime_error("Unsorted grouped table needs to be used with soa::SmallGroups<>"); + std::copy(selection.begin(), selection.end(), std::back_inserter(s)); } } + } + std::decay_t typedTable{{originalTable.asArrowTable()}, std::move(s)}; + typedTable.bindInternalIndicesTo(&originalTable); + return typedTable; + } + + template + requires(o2::soa::relatedByIndex, std::decay_t>() && !soa::is_smallgroups) + auto prepareArgument() + { + constexpr auto index = framework::has_type_at_v(associated_pack_t{}); + auto& originalTable = std::get(*mAt); + if (originalTable.size() == 0) { + return originalTable; + } + uint64_t pos; + if constexpr (soa::is_filtered_table>) { + pos = groupSelection[position]; } else { - static_assert(!o2::soa::is_smallgroups>, "SmallGroups used with a table that is not related by index to the gouping table"); + pos = position; + } + // optimized split + auto oc = sliceInfos[index].getSliceFor(pos); + uint64_t offset = oc.first; + auto count = oc.second; + auto groupedElementsTable = originalTable.asArrowTable()->Slice(offset, count); + if (count == 0) { + return std::decay_t{{groupedElementsTable}, soa::SelectionVector{}}; + } + + // for each grouping element we need to slice the selection vector + auto start_iterator = std::lower_bound(starts[index], selections[index]->end(), offset); + auto stop_iterator = std::lower_bound(start_iterator, selections[index]->end(), offset + count); + starts[index] = stop_iterator; + soa::SelectionVector slicedSelection{start_iterator, stop_iterator}; + std::transform(slicedSelection.begin(), slicedSelection.end(), slicedSelection.begin(), + [&offset](int64_t idx) { + return idx - static_cast(offset); + }); + + std::decay_t typedTable{{groupedElementsTable}, std::move(slicedSelection), offset}; + typedTable.bindInternalIndicesTo(&originalTable); + return typedTable; + } + + template + requires(o2::soa::relatedByIndex, std::decay_t>() && !soa::is_smallgroups && !soa::is_filtered_table) + auto prepareArgument() + { + constexpr auto index = framework::has_type_at_v(associated_pack_t{}); + auto& originalTable = std::get(*mAt); + if (originalTable.size() == 0) { return originalTable; } + uint64_t pos; + if constexpr (soa::is_filtered_table>) { + pos = groupSelection[position]; + } else { + pos = position; + } + // optimized split + auto oc = sliceInfos[index].getSliceFor(pos); + uint64_t offset = oc.first; + auto count = oc.second; + auto groupedElementsTable = originalTable.rawSlice(offset, offset + count - 1); + groupedElementsTable.bindInternalIndicesTo(&originalTable); + return groupedElementsTable; + } + + template + requires(!o2::soa::relatedByIndex, std::decay_t>() && !soa::is_smallgroups) + auto prepareArgument() + { + return std::get(*mAt); } std::string mIndexColumnName; From dacf73581a5da48278cb03043bc85dbd1aef1e7d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 23 Jan 2025 10:12:18 +0100 Subject: [PATCH 0010/1914] DPL Analysis: refactor inputsFromArgs, update concepts (#13847) --- .../Core/include/Framework/AnalysisTask.h | 122 ++++++++++++------ .../Core/include/Framework/Configurable.h | 4 + 2 files changed, 83 insertions(+), 43 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index cf0bba20e74ea..5d18a31dcb955 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -59,7 +59,7 @@ template static constexpr bool is_enumeration_v> = true; template -concept is_enumeration = is_enumeration_v; +concept is_enumeration = is_enumeration_v>; // Helper struct which builds a DataProcessorSpec from // the contents of an AnalysisTask... @@ -140,48 +140,84 @@ struct AnalysisDataProcessorBuilder { DataSpecUtils::updateInputList(inputs, InputSpec{o2::aod::label(), o2::aod::origin(), aod::description(o2::aod::signature()), R.version, Lifetime::Timeframe, inputMetadata}); } - template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) requires(std::is_lvalue_reference_v&&...) + /// helpers to append expression information for a single argument + template + requires(!soa::is_filtered_table>) + static void addExpression(int, uint32_t, std::vector&) { - // update grouping cache - if constexpr (soa::is_iterator>>>) { - addGroupingCandidates(bk, bku); - } + } + + template + static void addExpression(int ai, uint32_t hash, std::vector& eInfos) + { + auto fields = soa::createFieldsFromColumns(typename std::decay_t::persistent_columns_t{}); + eInfos.emplace_back(ai, hash, std::decay_t::hashes(), std::make_shared(fields)); + } + + template + static void addExpression(int ai, uint32_t hash, std::vector& eInfos) + { + addExpression::parent_t>(ai, hash, eInfos); + } + + /// helpers to append InputSpec for a single argument + template + static void addInput(const char* name, bool value, std::vector& inputs) + { + [&name, &value, &inputs] refs, size_t... Is>(std::index_sequence) mutable { + (addOriginalRef(name, value, inputs), ...); + }.template operator()::originals>(std::make_index_sequence::originals.size()>()); + } + + template + static void addInput(const char* name, bool value, std::vector& inputs) + { + addInput::parent_t>(name, value, inputs); + } - // populate input list and expression infos + /// helper to append the inputs and expression information for normalized arguments + template + static void addInputsAndExpressions(uint32_t hash, const char* name, bool value, std::vector& inputs, std::vector& eInfos) + { int ai = -1; - constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); - ([&name, &value, &eInfos, &inputs, &hash, &ai]() mutable { + ([&ai, &hash, &eInfos, &name, &value, &inputs]() mutable { ++ai; - using T = std::decay_t; - if constexpr (is_enumeration) { - std::vector inputMetadata; - // FIXME: for the moment we do not support begin, end and step. - DataSpecUtils::updateInputList(inputs, InputSpec{"enumeration", "DPL", "ENUM", 0, Lifetime::Enumeration, inputMetadata}); - } else { - // populate expression infos - if constexpr (soa::is_filtered_table) { - auto fields = soa::createFieldsFromColumns(typename T::persistent_columns_t{}); - eInfos.emplace_back(ai, hash, T::hashes(), std::make_shared(fields)); - } else if constexpr (soa::is_filtered_iterator) { - auto fields = soa::createFieldsFromColumns(typename T::parent_t::persistent_columns_t{}); - eInfos.emplace_back(ai, hash, T::parent_t::hashes(), std::make_shared(fields)); - } - // add inputs from the originals - auto adder = [&name, &value, &inputs] refs, size_t... Is>(std::index_sequence) mutable { - (addOriginalRef(name, value, inputs), ...); - }; - if constexpr (soa::is_table || soa::is_filtered_table) { - adder.template operator()(std::make_index_sequence()); - } else if constexpr (soa::is_iterator || soa::is_filtered_iterator) { - adder.template operator()(std::make_index_sequence()); - } - } - return true; - }() && + using T = std::decay_t; + addExpression(ai, hash, eInfos); + addInput(name, value, inputs); + }(), ...); } + /// helper to parse the process arguments + /// 1. enumeration (must be the only argument) + template + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + { + std::vector inputMetadata; + // FIXME: for the moment we do not support begin, end and step. + DataSpecUtils::updateInputList(inputs, InputSpec{"enumeration", "DPL", "ENUM", 0, Lifetime::Enumeration, inputMetadata}); + } + + /// 2. grouping case - 1st argument is an iterator + template + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) + { + addGroupingCandidates(bk, bku); + constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); + addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); + } + + /// 3. generic case + template + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + requires(std::is_lvalue_reference_v && ...) + { + constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); + addInputsAndExpressions(hash, name, value, inputs, eInfos); + } + template static auto extractTableFromRecord(InputRecord& record) { @@ -498,19 +534,19 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) homogeneous_apply_refs([&inputs](auto& x) { return ConditionManager>::appendCondition(inputs, x); }, *task.get()); /// parse process functions defined by corresponding configurables - if constexpr (requires { AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); }) { + if constexpr (requires { &T::process; }) { AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); } homogeneous_apply_refs( - [name = name_str, &expressionInfos, &inputs, &bindingsKeys, &bindingsKeysUnsorted](auto& x) { - using D = std::decay_t; - if constexpr (base_of_template) { + overloaded{ + [name = name_str, &expressionInfos, &inputs, &bindingsKeys, &bindingsKeysUnsorted](framework::is_process_configurable auto& x) mutable { // this pushes (argumentIndex,processHash,schemaPtr,nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators AnalysisDataProcessorBuilder::inputsFromArgs(x.process, (name + "/" + x.name).c_str(), x.value, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); return true; - } - return false; - }, + }, + [](auto&) { + return false; + }}, *task.get()); // add preslice declarations to slicing cache definition diff --git a/Framework/Core/include/Framework/Configurable.h b/Framework/Core/include/Framework/Configurable.h index f1167adb5ebdd..88e50cf3c7c26 100644 --- a/Framework/Core/include/Framework/Configurable.h +++ b/Framework/Core/include/Framework/Configurable.h @@ -11,6 +11,7 @@ #ifndef O2_FRAMEWORK_CONFIGURABLE_H_ #define O2_FRAMEWORK_CONFIGURABLE_H_ #include "Framework/ConfigurableKinds.h" +#include "Framework/Traits.h" #include #include namespace o2::framework @@ -95,6 +96,9 @@ struct ProcessConfigurable : Configurable { (As...); }; +template +concept is_process_configurable = base_of_template; + #define PROCESS_SWITCH(_Class_, _Name_, _Help_, _Default_) \ decltype(ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}) do##_Name_ = ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}; #define PROCESS_SWITCH_FULL(_Class_, _Method_, _Name_, _Help_, _Default_) \ From 611600b358eb3719fe6dde1fba47e930211c2661 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Thu, 23 Jan 2025 10:57:15 +0100 Subject: [PATCH 0011/1914] Trigger implementation in hybrid generator (#13873) * Trigger implementation in hybrid generator * Added hybrid trigger example --- Generators/include/Generators/Generator.h | 2 +- .../include/Generators/GeneratorHybrid.h | 5 + Generators/src/GeneratorFactory.cxx | 77 ++++++------- Generators/src/GeneratorHybrid.cxx | 103 +++++++++++++++++- run/SimExamples/Hybrid_trigger/README.md | 16 +++ .../Hybrid_trigger/hybridconfig.json | 32 ++++++ run/SimExamples/Hybrid_trigger/runo2sim.sh | 60 ++++++++++ run/SimExamples/Hybrid_trigger/trigger.macro | 45 ++++++++ 8 files changed, 299 insertions(+), 41 deletions(-) create mode 100644 run/SimExamples/Hybrid_trigger/README.md create mode 100644 run/SimExamples/Hybrid_trigger/hybridconfig.json create mode 100755 run/SimExamples/Hybrid_trigger/runo2sim.sh create mode 100644 run/SimExamples/Hybrid_trigger/trigger.macro diff --git a/Generators/include/Generators/Generator.h b/Generators/include/Generators/Generator.h index 6da1a0e837cc8..7181bcbc6682d 100644 --- a/Generators/include/Generators/Generator.h +++ b/Generators/include/Generators/Generator.h @@ -74,6 +74,7 @@ class Generator : public FairGenerator virtual Bool_t generateEvent() = 0; // generates event (in structure internal to generator) virtual Bool_t importParticles() = 0; // fills the mParticles vector (transfer from generator state) virtual void updateHeader(o2::dataformats::MCEventHeader* eventHeader) {}; + Bool_t triggerEvent(); /** setters **/ void setMomentumUnit(double val) { mMomentumUnit = val; }; @@ -106,7 +107,6 @@ class Generator : public FairGenerator /** internal methods **/ Bool_t addTracks(FairPrimaryGenerator* primGen); Bool_t boostEvent(); - Bool_t triggerEvent(); /** to handle cocktail constituents **/ void addSubGenerator(int subGeneratorId, std::string const& subGeneratorDescription); diff --git a/Generators/include/Generators/GeneratorHybrid.h b/Generators/include/Generators/GeneratorHybrid.h index b0993c4fd82e2..21f669776d944 100644 --- a/Generators/include/Generators/GeneratorHybrid.h +++ b/Generators/include/Generators/GeneratorHybrid.h @@ -103,6 +103,11 @@ class GeneratorHybrid : public Generator bool mCocktailMode = false; std::vector> mGroups; + // Trigger configuration + std::vector mTriggerModes; // trigger mode for each generator + std::vector> mTriggerMacros; // trigger macros for each generator (multiple triggers for each generator possible) + std::vector> mTriggerFuncs; // trigger functions for each generator (multiple triggers for each generator possible) + // Create a task arena with a specified number of threads std::thread mTBBTaskPoolRunner; tbb::concurrent_bounded_queue mInputTaskQueue; diff --git a/Generators/src/GeneratorFactory.cxx b/Generators/src/GeneratorFactory.cxx index dc2f4f2159b4d..5db1354a12908 100644 --- a/Generators/src/GeneratorFactory.cxx +++ b/Generators/src/GeneratorFactory.cxx @@ -293,48 +293,51 @@ void GeneratorFactory::setPrimaryGenerator(o2::conf::SimConfig const& conf, Fair } /** triggers **/ + // to be set via GeneratorFactory only if generator is not hybrid + // external settings via JSON are supported in the latter Trigger trigger = nullptr; DeepTrigger deeptrigger = nullptr; - - auto trgconfig = conf.getTrigger(); - if (trgconfig.empty()) { - return; - } else if (trgconfig.compare("particle") == 0) { - trigger = TriggerParticle(TriggerParticleParam::Instance()); - } else if (trgconfig.compare("external") == 0) { - // external trigger via configuration macro - auto& params = TriggerExternalParam::Instance(); - LOG(info) << "Setting up external trigger with following parameters"; - LOG(info) << params; - auto external_trigger_filename = params.fileName; - auto external_trigger_func = params.funcName; - trigger = o2::conf::GetFromMacro(external_trigger_filename, external_trigger_func, "o2::eventgen::Trigger", "trigger"); - if (!trigger) { - LOG(info) << "Trying to retrieve a \'o2::eventgen::DeepTrigger\' type" << std::endl; - deeptrigger = o2::conf::GetFromMacro(external_trigger_filename, external_trigger_func, "o2::eventgen::DeepTrigger", "deeptrigger"); - } - if (!trigger && !deeptrigger) { - LOG(fatal) << "Failed to retrieve \'external trigger\': problem with configuration "; - } - } else { - LOG(fatal) << "Invalid trigger"; - } - - /** add trigger to generators **/ - auto generators = primGen->GetListOfGenerators(); - for (int igen = 0; igen < generators->GetEntries(); ++igen) { - auto generator = dynamic_cast(generators->At(igen)); - if (!generator) { - LOG(fatal) << "request to add a trigger to an unsupported generator"; + if (!(genconfig.compare("hybrid") == 0)) { + auto trgconfig = conf.getTrigger(); + if (trgconfig.empty()) { return; + } else if (trgconfig.compare("particle") == 0) { + trigger = TriggerParticle(TriggerParticleParam::Instance()); + } else if (trgconfig.compare("external") == 0) { + // external trigger via configuration macro + auto& params = TriggerExternalParam::Instance(); + LOG(info) << "Setting up external trigger with following parameters"; + LOG(info) << params; + auto external_trigger_filename = params.fileName; + auto external_trigger_func = params.funcName; + trigger = o2::conf::GetFromMacro(external_trigger_filename, external_trigger_func, "o2::eventgen::Trigger", "trigger"); + if (!trigger) { + LOG(info) << "Trying to retrieve a \'o2::eventgen::DeepTrigger\' type" << std::endl; + deeptrigger = o2::conf::GetFromMacro(external_trigger_filename, external_trigger_func, "o2::eventgen::DeepTrigger", "deeptrigger"); + } + if (!trigger && !deeptrigger) { + LOG(fatal) << "Failed to retrieve \'external trigger\': problem with configuration "; + } + } else { + LOG(fatal) << "Invalid trigger"; } - generator->setTriggerMode(o2::eventgen::Generator::kTriggerOR); - if (trigger) { - generator->addTrigger(trigger); - } - if (deeptrigger) { - generator->addDeepTrigger(deeptrigger); + + /** add trigger to generators **/ + auto generators = primGen->GetListOfGenerators(); + for (int igen = 0; igen < generators->GetEntries(); ++igen) { + auto generator = dynamic_cast(generators->At(igen)); + if (!generator) { + LOG(fatal) << "request to add a trigger to an unsupported generator"; + return; + } + generator->setTriggerMode(o2::eventgen::Generator::kTriggerOR); + if (trigger) { + generator->addTrigger(trigger); + } + if (deeptrigger) { + generator->addDeepTrigger(deeptrigger); + } } } } diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index af6f2bea03052..03a78eb852eb6 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -183,6 +183,37 @@ Bool_t GeneratorHybrid::Init() } gens[count]->Init(); // TODO: move this to multi-threaded addSubGenerator(count, gen); + if (mTriggerModes[count] != o2::eventgen::Generator::kTriggerOFF) { + gens[count]->setTriggerMode(mTriggerModes[count]); + LOG(info) << "Setting Trigger mode of generator " << gen << " to: " << mTriggerModes[count]; + o2::eventgen::Trigger trigger = nullptr; + o2::eventgen::DeepTrigger deeptrigger = nullptr; + for (int trg = 0; trg < mTriggerMacros[count].size(); trg++) { + if (mTriggerMacros[count][trg].empty() || mTriggerFuncs[count][trg].empty()) { + continue; + } + LOG(info) << "Setting trigger " << trg << " of generator " << gen << " with following parameters"; + LOG(info) << "Macro filename: " << mTriggerMacros[count][trg]; + LOG(info) << "Function name: " << mTriggerFuncs[count][trg]; + trigger = o2::conf::GetFromMacro(mTriggerMacros[count][trg], mTriggerFuncs[count][trg], "o2::eventgen::Trigger", "trigger"); + if (!trigger) { + LOG(info) << "Trying to retrieve a \'o2::eventgen::DeepTrigger\' type"; + deeptrigger = o2::conf::GetFromMacro(mTriggerMacros[count][trg], mTriggerFuncs[count][trg], "o2::eventgen::DeepTrigger", "deeptrigger"); + } + if (!trigger && !deeptrigger) { + LOG(warn) << "Failed to retrieve \'external trigger\': problem with configuration"; + LOG(warn) << "Trigger " << trg << " of generator " << gen << " will not be included"; + continue; + } else { + LOG(info) << "Trigger " << trg << " of generator " << gen << " successfully set"; + } + if (trigger) { + gens[count]->addTrigger(trigger); + } else { + gens[count]->addDeepTrigger(deeptrigger); + } + } + } count++; } if (mRandomize) { @@ -240,9 +271,13 @@ Bool_t GeneratorHybrid::Init() // mGenIsInitialized[task] = true; // } } - generator->clearParticles(); - generator->generateEvent(); - generator->importParticles(); + bool isTriggered = false; + while (!isTriggered) { + generator->clearParticles(); + generator->generateEvent(); + generator->importParticles(); + isTriggered = generator->triggerEvent(); + } LOG(debug) << "eventgen finished for task " << task; if (!mStopFlag) { if (mGenerationMode == GenMode::kParallel) { @@ -450,6 +485,68 @@ Bool_t GeneratorHybrid::confSetter(const auto& gen) mConfigs.push_back(""); } } + if (gen.HasMember("triggers")) { + const auto& trigger = gen["triggers"]; + auto trigger_specs = [this, &trigger]() { + mTriggerMacros.push_back({}); + mTriggerFuncs.push_back({}); + if (trigger.HasMember("specs")) { + for (auto& spec : trigger["specs"].GetArray()) { + if (spec.HasMember("macro")) { + const auto& macro = spec["macro"].GetString(); + if (!(strcmp(macro, "") == 0)) { + mTriggerMacros.back().push_back(macro); + } else { + mTriggerMacros.back().push_back(""); + } + } else { + mTriggerMacros.back().push_back(""); + } + if (spec.HasMember("function")) { + const auto& function = spec["function"].GetString(); + if (!(strcmp(function, "") == 0)) { + mTriggerFuncs.back().push_back(function); + } else { + mTriggerFuncs.back().push_back(""); + } + } else { + mTriggerFuncs.back().push_back(""); + } + } + } else { + mTriggerMacros.back().push_back(""); + mTriggerFuncs.back().push_back(""); + } + }; + if (trigger.HasMember("mode")) { + const auto& trmode = trigger["mode"].GetString(); + if (strcmp(trmode, "or") == 0) { + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerOR); + trigger_specs(); + } else if (strcmp(trmode, "and") == 0) { + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerAND); + trigger_specs(); + } else if (strcmp(trmode, "off") == 0) { + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerOFF); + mTriggerMacros.push_back({""}); + mTriggerFuncs.push_back({""}); + } else { + LOG(warn) << "Wrong trigger mode provided for generator " << name << ", keeping trigger OFF"; + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerOFF); + mTriggerMacros.push_back({""}); + mTriggerFuncs.push_back({""}); + } + } else { + LOG(warn) << "No trigger mode provided for generator " << name << ", turning trigger OFF"; + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerOFF); + mTriggerMacros.push_back({""}); + mTriggerFuncs.push_back({""}); + } + } else { + mTriggerModes.push_back(o2::eventgen::Generator::kTriggerOFF); + mTriggerMacros.push_back({""}); + mTriggerFuncs.push_back({""}); + } return true; } diff --git a/run/SimExamples/Hybrid_trigger/README.md b/run/SimExamples/Hybrid_trigger/README.md new file mode 100644 index 0000000000000..21ccde29dece5 --- /dev/null +++ b/run/SimExamples/Hybrid_trigger/README.md @@ -0,0 +1,16 @@ + + +The usage of the Hybrid generator with the o2-sim is presented in this short manual. +All the other generators are implemented as sub-generators and they can be called thanks to a +JSON file, fed to o2-sim via the GeneratorHybrid.configFile parameter. The O2sim package needs to be loaded in order to use this example. + +The example can be run automatically using the runo2sim.sh script, which contains most of the +available generators in O2. The JSON template can be generated using the ${O2DPG_ROOT}/MC/bin/o2_hybrid_gen.py script. To use this example the user can simply copy the entire Hybrid example folder and execute the script after giving it execution permissions (`chmod +x runo2sim.sh`). + +# Files description + +- **runo2sim.sh** → allows to use the hybrid generator example +- **hybridconfig.json** → example JSON file for the hybrid generator configuration +- **example.optns** → options file to be used in EPOS4 implemented as subgenerator in this example (the .optns must be available in the current working directory) \ No newline at end of file diff --git a/run/SimExamples/Hybrid_trigger/hybridconfig.json b/run/SimExamples/Hybrid_trigger/hybridconfig.json new file mode 100644 index 0000000000000..cc565ecec0256 --- /dev/null +++ b/run/SimExamples/Hybrid_trigger/hybridconfig.json @@ -0,0 +1,32 @@ +{ + "generators": [ + { + "cocktail": [ + { + "name": "pythia8hi", + "triggers": { + "mode": "or", + "specs": [ + { + "macro": "${PWD}/trigger.macro", + "function": "trigger_impactb_pythia8(0.,5.)" + } + ] + }, + "config": "" + }, + { + "name": "external", + "config": { + "fileName": "${O2DPG_MC_CONFIG_ROOT}/MC/config/PWGDQ/external/generator/GeneratorPromptCharmonia.C", + "funcName": "GeneratorParamPromptJpsiToElectronEvtGen_pp13TeV()", + "iniFile": "" + } + } + ] + } + ], + "fractions": [ + 1 + ] +} \ No newline at end of file diff --git a/run/SimExamples/Hybrid_trigger/runo2sim.sh b/run/SimExamples/Hybrid_trigger/runo2sim.sh new file mode 100755 index 0000000000000..75880a396e8e5 --- /dev/null +++ b/run/SimExamples/Hybrid_trigger/runo2sim.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# +# Hybrid generator simulation example with triggers and cocktail: +# the simulation is configured using a JSON file (hybridconfig.json in this folder), whose +# template can be generated using the script ${O2DPG_ROOT}/MC/bin/o2_hybrid_gen.py. +# Trigger is taken from the trigger.macro and it's a simple impact parameter selection for +# heavy ion collisions +set -x +if [ ! "${O2DPG_ROOT}" ]; then + echo "This needs O2DPG loaded; alienv enter ..." + exit 1 +fi + +[ ! "${O2_ROOT}" ] && echo "Error: This needs O2 loaded" && exit 2 + +NEV=1 +more="" +JOBS=2 + +usage() +{ + cat </dev/stderr + exit 3 + ;; + esac + shift +done + +# Starting simulation with Hybrid generator +${O2_ROOT}/bin/o2-sim --noGeant -j $JOBS --field ccdb --vertexMode kCCDB --run 300000 --configKeyValues "MFTBase.buildAlignment=true;GeneratorHybrid.configFile=$PWD/hybridconfig.json;GeneratorHybrid.randomize=false;${more}" -g hybrid -o genevents --timestamp 1546300800000 --seed 836302859 -n $NEV \ No newline at end of file diff --git a/run/SimExamples/Hybrid_trigger/trigger.macro b/run/SimExamples/Hybrid_trigger/trigger.macro new file mode 100644 index 0000000000000..ad187dcdc7e45 --- /dev/null +++ b/run/SimExamples/Hybrid_trigger/trigger.macro @@ -0,0 +1,45 @@ +#include "Generators/Trigger.h" +#include "TParticle.h" +#include + +// a very simple trigger example, examining generated particles +o2::eventgen::Trigger trigger() +{ + // + return [](const std::vector& particles) -> bool { + std::cout << "Running trigger on event with size " << particles.size() << "\n"; + if (particles.size() > 10000) { + return true; + } + return false; + }; +} + +#include "Pythia8/Pythia.h" +#include "Pythia8/HIInfo.h" +#include +// a deep trigger example, looking into the internal generator state +o2::eventgen::DeepTrigger + trigger_impactb_pythia8(double bmin = 5., double bmax = 10.) +{ + return [bmin, bmax](void* interface, std::string name) -> bool { + if (!name.compare("pythia8")) { + auto py8 = reinterpret_cast(interface); +#if PYTHIA_VERSION_INTEGER < 8300 + auto hiinfo = py8->info.hiinfo; +#else + auto hiinfo = py8->info.hiInfo; +#endif + if (!hiinfo) { + LOG(fatal) << "Cannot define impact parameter: is \'pythia8\' running in heavy-ion mode?"; + } + auto b = hiinfo->b(); + auto selected = (b > bmin && b < bmax); + LOG(info) << "Impact parameter = " << b << " fm: " << (selected ? "selected" : "rejected"); + return selected; + } else { + LOG(fatal) << "Cannot define impact parameter for generator interface \'" << name << "\'"; + } + return false; + }; +} From 912f396d51dcdc1fd4075e1ba9c6b4f1a1007068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Thu, 23 Jan 2025 15:21:44 +0100 Subject: [PATCH 0012/1914] added merging of TCanvas to mergers (#13876) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michal Tichák --- Utilities/Mergers/CMakeLists.txt | 2 +- Utilities/Mergers/src/MergerAlgorithm.cxx | 79 ++++++++++++++- Utilities/Mergers/test/test_Algorithm.cxx | 114 +++++++++++++++++++++- 3 files changed, 191 insertions(+), 4 deletions(-) diff --git a/Utilities/Mergers/CMakeLists.txt b/Utilities/Mergers/CMakeLists.txt index 43a22dd395996..0e77e62cccc07 100644 --- a/Utilities/Mergers/CMakeLists.txt +++ b/Utilities/Mergers/CMakeLists.txt @@ -15,7 +15,7 @@ o2_add_library(Mergers SOURCES src/FullHistoryMerger.cxx src/IntegratingMerger.cxx src/Mergeable.cxx src/MergerAlgorithm.cxx src/MergerBuilder.cxx src/MergerInfrastructureBuilder.cxx src/ObjectStore.cxx - PUBLIC_LINK_LIBRARIES O2::Framework AliceO2::InfoLogger) + PUBLIC_LINK_LIBRARIES O2::Framework AliceO2::InfoLogger ROOT::Gpad) o2_target_root_dictionary( Mergers diff --git a/Utilities/Mergers/src/MergerAlgorithm.cxx b/Utilities/Mergers/src/MergerAlgorithm.cxx index a873f550d8450..a3be493d8e34e 100644 --- a/Utilities/Mergers/src/MergerAlgorithm.cxx +++ b/Utilities/Mergers/src/MergerAlgorithm.cxx @@ -16,9 +16,9 @@ #include "Mergers/MergerAlgorithm.h" -#include "Framework/Logger.h" #include "Mergers/MergeInterface.h" #include "Mergers/ObjectStore.h" +#include "Framework/Logger.h" #include #include @@ -28,7 +28,12 @@ #include #include #include +#include #include +#include +#include +#include +#include namespace o2::mergers::algorithm { @@ -43,6 +48,53 @@ size_t estimateTreeSize(TTree* tree) return totalSize; } +// Mergeable objects are kept as primitives in TCanvas object in underlying TPad. +// TPad is a linked list of primitives of any type (https://root.cern.ch/doc/master/classTPad.html) +// including other TPads. So in order to collect all mergeable objects from TCanvas +// we need to recursively transverse whole TPad structure. +auto collectUnderlyingObjects(TCanvas* canvas) -> std::vector +{ + auto collectFromTPad = [](TPad* pad, std::vector& objects, const auto& collectFromTPad) { + if (!pad) { + return; + } + auto* primitives = pad->GetListOfPrimitives(); + for (int i = 0; i < primitives->GetSize(); ++i) { + auto* primitive = primitives->At(i); + if (auto* primitivePad = dynamic_cast(primitive)) { + collectFromTPad(primitivePad, objects, collectFromTPad); + } else { + objects.push_back(primitive); + } + } + }; + + std::vector collectedObjects; + collectFromTPad(canvas, collectedObjects, collectFromTPad); + + return collectedObjects; +} + +struct MatchedCollectedObjects { + MatchedCollectedObjects(TObject* t, TObject* o) : target(t), other(o) {} + + TObject* target; + TObject* other; +}; + +auto matchCollectedToPairs(const std::vector& targetObjects, const std::vector otherObjects) -> std::vector +{ + std::vector matchedObjects; + matchedObjects.reserve(std::max(targetObjects.size(), otherObjects.size())); + for (const auto& targetObject : targetObjects) { + if (const auto found_it = std::ranges::find_if(otherObjects, [&targetObject](TObject* obj) { return std::string_view(targetObject->GetName()) == std::string_view(obj->GetName()); }); + found_it != otherObjects.end()) { + matchedObjects.emplace_back(targetObject, *found_it); + } + } + return matchedObjects; +} + void merge(TObject* const target, TObject* const other) { if (target == nullptr) { @@ -82,6 +134,29 @@ void merge(TObject* const target, TObject* const other) } } delete otherIterator; + } else if (auto targetCanvas = dynamic_cast(target)) { + + auto otherCanvas = dynamic_cast(other); + if (otherCanvas == nullptr) { + throw std::runtime_error(std::string("The target object '") + target->GetName() + + "' is a TCanvas, while the other object '" + other->GetName() + "' is not."); + } + + const auto targetObjects = collectUnderlyingObjects(targetCanvas); + const auto otherObjects = collectUnderlyingObjects(otherCanvas); + if (targetObjects.size() != otherObjects.size()) { + throw std::runtime_error(std::string("Trying to merge canvas: ") + targetCanvas->GetName() + " and canvas " + otherObjects.size() + "but contents are not the same"); + } + + const auto matched = matchCollectedToPairs(targetObjects, otherObjects); + if (targetObjects.size() != matched.size()) { + throw std::runtime_error(std::string("Trying to merge canvas: ") + targetCanvas->GetName() + " and canvas " + otherObjects.size() + "but contents are not the same"); + } + + for (const auto& [targetObject, otherObject] : matched) { + merge(targetObject, otherObject); + } + } else { Long64_t errorCode = 0; TObjArray otherCollection; @@ -169,4 +244,4 @@ void deleteTCollections(TObject* obj) } } -} // namespace o2::mergers::algorithm \ No newline at end of file +} // namespace o2::mergers::algorithm diff --git a/Utilities/Mergers/test/test_Algorithm.cxx b/Utilities/Mergers/test/test_Algorithm.cxx index 237d017e6b30b..f087254e00d05 100644 --- a/Utilities/Mergers/test/test_Algorithm.cxx +++ b/Utilities/Mergers/test/test_Algorithm.cxx @@ -14,9 +14,9 @@ /// /// \author Piotr Konopka, piotr.jan.konopka@cern.ch -#include #include #include +#include #define BOOST_TEST_MODULE Test Utilities MergerAlgorithm #define BOOST_TEST_MAIN #define BOOST_TEST_DYN_LINK @@ -39,6 +39,7 @@ #include #include #include +#include // using namespace o2::framework; using namespace o2::mergers; @@ -305,6 +306,117 @@ BOOST_AUTO_TEST_CASE(MergerCollection) delete target; } +TCanvas* createCanvas(std::string name, std::string title, std::vector>& histograms) +{ + auto canvas = new TCanvas(name.c_str(), title.c_str(), 100, 100); + canvas->Divide(histograms.size(), 1); + for (size_t i = 1; const auto& hist : histograms) { + canvas->cd(i); + hist->Draw(); + ++i; + } + return canvas; +} + +auto collectUnderlyingObjects(TCanvas* canvas) -> std::vector +{ + auto collectFromTPad = [](TPad* pad, std::vector& objects, const auto& collectFromTPad) { + if (!pad) { + return; + } + auto* primitives = pad->GetListOfPrimitives(); + for (int i = 0; i < primitives->GetSize(); ++i) { + auto* primitive = primitives->At(i); + if (auto* primitivePad = dynamic_cast(primitive)) { + collectFromTPad(primitivePad, objects, collectFromTPad); + } else { + objects.push_back(primitive); + } + } + }; + + std::vector collectedObjects; + collectFromTPad(canvas, collectedObjects, collectFromTPad); + + return collectedObjects; +} + +BOOST_AUTO_TEST_CASE(MergerTCanvas) +{ + // working example + { + std::vector> histsC1{ + std::make_shared("th1", "obj1", bins, min, max), + std::make_shared("th2", "obj2", bins, min, max), + }; + histsC1[0]->Fill(5); + histsC1[1]->Fill(2); + BOOST_CHECK_EQUAL(histsC1[0]->GetBinContent(histsC1[0]->FindBin(5)), 1); + BOOST_CHECK_EQUAL(histsC1[1]->GetBinContent(histsC1[1]->FindBin(2)), 1); + + std::vector> histsC2{ + std::make_shared("th1", "obj1", bins, min, max), + std::make_shared("th2", "obj2", bins, min, max), + }; + + histsC2[0]->Fill(5); + histsC2[1]->Fill(2); + BOOST_CHECK_EQUAL(histsC2[0]->GetBinContent(histsC2[0]->FindBin(5)), 1); + BOOST_CHECK_EQUAL(histsC2[1]->GetBinContent(histsC2[1]->FindBin(2)), 1); + + auto targetCanvas = createCanvas("c1", "test title 1", histsC1); + auto otherCanvas = createCanvas("c2", "test title 2", histsC2); + + algorithm::merge(targetCanvas, otherCanvas); + + auto targetObjects = collectUnderlyingObjects(targetCanvas); + + BOOST_CHECK_EQUAL(targetObjects.size(), 2); + for (const auto& object : targetObjects) { + auto th = static_cast(object); + if (std::string(th->GetName()) == "th1") { + BOOST_CHECK_EQUAL(th->GetBinContent(th->FindBin(5)), 2); + } + if (std::string(th->GetName()) == "th2") { + BOOST_CHECK_EQUAL(th->GetBinContent(th->FindBin(2)), 2); + } + } + } + + // throw because we try to merge canvases with different number of underlying items + { + std::vector> histsC1{ + std::make_shared("th1", "obj1", bins, min, max), + std::make_shared("th2", "obj2", bins, min, max), + }; + + std::vector> histsC2{ + std::make_shared("th1", "obj1", bins, min, max), + }; + + auto targetCanvas = createCanvas("c1", "test title 1", histsC1); + auto otherCanvas = createCanvas("c2", "test title 2", histsC2); + + BOOST_CHECK_THROW(algorithm::merge(targetCanvas, otherCanvas), std::runtime_error); + } + + // throw because we try to merge canvases with different underlying items + { + std::vector> histsC1{ + std::make_shared("th1", "obj1", bins, min, max), + }; + + std::vector> histsC2{ + std::make_shared("th2", "obj2", bins, min, max), + }; + + auto targetCanvas = createCanvas("c1", "test title 1", histsC1); + auto otherCanvas = createCanvas("c2", "test title 2", histsC2); + + BOOST_CHECK_THROW(algorithm::merge(targetCanvas, otherCanvas), std::runtime_error); + } +} + BOOST_AUTO_TEST_CASE(Deleting) { TObjArray* main = new TObjArray(); From d9c4e190f849c8051bdd53e2db0008cb2c7d613c Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Thu, 23 Jan 2025 19:39:03 +0100 Subject: [PATCH 0013/1914] Disable test_Generator_test_GeneratorPythia8Param (#13893) Currently broken in osx_ARM64 --- Generators/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Generators/CMakeLists.txt b/Generators/CMakeLists.txt index d909b3e604887..02caa63df0d43 100644 --- a/Generators/CMakeLists.txt +++ b/Generators/CMakeLists.txt @@ -130,11 +130,11 @@ if(doBuildSimulation) LABELS generator PUBLIC_LINK_LIBRARIES O2::Generators) - o2_add_test(GeneratorPythia8Param NAME test_Generator_test_GeneratorPythia8Param - SOURCES test/test_GeneratorPythia8Param.cxx - COMPONENT_NAME Generator - LABELS generator - PUBLIC_LINK_LIBRARIES O2::Generators) + # o2_add_test(GeneratorPythia8Param NAME test_Generator_test_GeneratorPythia8Param + # SOURCES test/test_GeneratorPythia8Param.cxx + # COMPONENT_NAME Generator + # LABELS generator + # PUBLIC_LINK_LIBRARIES O2::Generators) endif() From 95d419d26ee904ab66d4709fa69eb051fc17c04b Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 23 Jan 2025 19:26:35 +0100 Subject: [PATCH 0014/1914] Base: allow to create alignment for individual detectors other than ITS Signed-off-by: Felix Schlepper --- Detectors/Base/src/GRPGeomHelper.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/Base/src/GRPGeomHelper.cxx b/Detectors/Base/src/GRPGeomHelper.cxx index 2a76e52b3679e..e7e5248493548 100644 --- a/Detectors/Base/src/GRPGeomHelper.cxx +++ b/Detectors/Base/src/GRPGeomHelper.cxx @@ -235,7 +235,7 @@ void GRPGeomHelper::checkUpdates(ProcessingContext& pc) for (auto id = DetID::First; id <= DetID::Last; id++) { std::string binding = fmt::format("align{}", DetID::getName(id)); if (pc.inputs().getPos(binding.c_str()) < 0) { - return; + continue; } else { pc.inputs().get*>(binding); } From 31358c851d1e6eb610115abd9f851a395c3c9efc Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 22 Jan 2025 19:40:12 +0100 Subject: [PATCH 0015/1914] add TPC occupancy est. to SVStudy output --- .../include/GlobalTrackingStudy/SVStudy.h | 2 +- .../study/src/SVStudy.cxx | 83 +++++++++++++++++-- .../study/src/TrackingStudy.cxx | 32 ++++++- .../study/src/sv-study-workflow.cxx | 7 +- 4 files changed, 112 insertions(+), 12 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/SVStudy.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/SVStudy.h index 9c9453215c9a0..d54513cb07a60 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/SVStudy.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/SVStudy.h @@ -22,7 +22,7 @@ namespace o2::svstudy { /// create a processor spec -o2::framework::DataProcessorSpec getSVStudySpec(o2::dataformats::GlobalTrackID::mask_t srcTracks, bool useMC); +o2::framework::DataProcessorSpec getSVStudySpec(o2::dataformats::GlobalTrackID::mask_t srcTracks, o2::dataformats::GlobalTrackID::mask_t srcCls, bool useMC); } // namespace o2::svstudy diff --git a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx index 17b33c86e61ad..12a883ec991f6 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx @@ -44,6 +44,12 @@ #include "DCAFitter/DCAFitterN.h" #include "MathUtils/fit.h" #include "GlobalTrackingStudy/V0Ext.h" +#include "GPUO2InterfaceConfiguration.h" +// #include "GPUSettingsO2.h" +#include "GPUParam.h" +#include "GPUParam.inc" +#include "GPUO2InterfaceRefit.h" +#include "GPUO2InterfaceUtils.h" namespace o2::svstudy { @@ -64,8 +70,8 @@ using timeEst = o2::dataformats::TimeStampWithError; class SVStudySpec : public Task { public: - SVStudySpec(std::shared_ptr dr, std::shared_ptr gr, GTrackID::mask_t src, bool useMC) - : mDataRequest(dr), mGGCCDBRequest(gr), mTracksSrc(src), mUseMC(useMC) {} + SVStudySpec(std::shared_ptr dr, std::shared_ptr gr, GTrackID::mask_t src, bool useTPCCl, bool useMC) + : mDataRequest(dr), mGGCCDBRequest(gr), mTracksSrc(src), mUseTPCCl(useTPCCl), mUseMC(useMC) {} ~SVStudySpec() final = default; void init(InitContext& ic) final; void run(ProcessingContext& pc) final; @@ -83,11 +89,18 @@ class SVStudySpec : public Task std::unique_ptr mDBGOut; float mSelK0 = -1; bool mRefit = false; + bool mUseTPCCl = false; float mMaxEta = 0.8; float mBz = 0; + int mNHBPerTF = 0; + int mNTPCOccBinLength = 0; ///< TPC occ. histo bin length in TBs + float mNTPCOccBinLengthInv; + float mTPCTBinMUSInv = 0.f; GTrackID::mask_t mTracksSrc{}; o2::vertexing::DCAFitterN<2> mFitterV0; + std::vector mTBinClOccAft, mTBinClOccBef; std::unique_ptr mcReader; // reader of MC information + std::shared_ptr mParam = nullptr; }; void SVStudySpec::init(InitContext& ic) @@ -107,6 +120,48 @@ void SVStudySpec::run(ProcessingContext& pc) o2::globaltracking::RecoContainer recoData; recoData.collectData(pc, *mDataRequest.get()); // select tracks of needed type, with minimal cuts, the real selected will be done in the vertexer updateTimeDependentParams(pc); // Make sure this is called after recoData.collectData, which may load some conditions + + size_t occupancyMapSizeBytes = o2::gpu::GPUO2InterfaceRefit::fillOccupancyMapGetSize(mNHBPerTF, mParam.get()); + gsl::span TPCRefitterOccMap = recoData.occupancyMapTPC; + o2::gpu::GPUO2InterfaceUtils::paramUseExternalOccupancyMap(mParam.get(), mNHBPerTF, TPCRefitterOccMap.data(), occupancyMapSizeBytes); + + mTBinClOccBef.resize(1); + mTBinClOccAft.resize(1); + if (recoData.inputsTPCclusters && mUseTPCCl) { + mNTPCOccBinLength = mParam->rec.tpc.occupancyMapTimeBins; + mTBinClOccBef.clear(); + mTBinClOccAft.clear(); + // prepare TPC occupancy data + if (mNTPCOccBinLength > 1 && recoData.occupancyMapTPC.size()) { + mNTPCOccBinLengthInv = 1. / mNTPCOccBinLength; + int nTPCBins = mNHBPerTF * o2::constants::lhc::LHCMaxBunches / 8, ninteg = 0; + int nTPCOccBins = nTPCBins * mNTPCOccBinLengthInv, sumBins = std::max(1, int(o2::constants::lhc::LHCMaxBunches / 8 * mNTPCOccBinLengthInv)); + mTBinClOccAft.resize(nTPCOccBins); + mTBinClOccBef.resize(nTPCOccBins); + float sm = 0., tb = 0.5 * mNTPCOccBinLength; + std::vector mltHistTB(nTPCOccBins); + for (int i = 0; i < nTPCOccBins; i++) { + mltHistTB[i] = mParam->GetUnscaledMult(tb); + tb += mNTPCOccBinLength; + } + for (int i = nTPCOccBins; i--;) { + sm += mltHistTB[i]; + if (i + sumBins < nTPCOccBins) { + sm -= mltHistTB[i + sumBins]; + } + mTBinClOccAft[i] = sm; + } + sm = 0; + for (int i = 0; i < nTPCOccBins; i++) { + sm += mltHistTB[i]; + if (i - sumBins > 0) { + sm -= mltHistTB[i - sumBins]; + } + mTBinClOccBef[i] = sm; + } + } + } + process(recoData); } @@ -133,6 +188,12 @@ void SVStudySpec::updateTimeDependentParams(ProcessingContext& pc) mFitterV0.setMaxStep(svparam.maxStep); mFitterV0.setMaxSnp(svparam.maxSnp); mFitterV0.setMinXSeed(svparam.minXSeed); + + mNHBPerTF = o2::base::GRPGeomHelper::instance().getGRPECS()->getNHBFPerTF(); + if (!mParam) { + // for occupancy estimator + mParam = o2::gpu::GPUO2InterfaceUtils::getFullParamShared(0.f, mNHBPerTF); + } } mBz = o2::base::Propagator::Instance()->getNominalBz(); mFitterV0.setBz(mBz); @@ -268,8 +329,13 @@ void SVStudySpec::process(o2::globaltracking::RecoContainer& recoData) } if (v0extVec.size()) { const auto& pv = recoData.getPrimaryVertex(pvID); + float tpcOccBef = 0., tpcOccAft = 0.; + int tb = pv.getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv; + tpcOccBef = tb < 0 ? mTBinClOccBef[0] : (tb >= mTBinClOccBef.size() ? mTBinClOccBef.back() : mTBinClOccBef[tb]); + tpcOccAft = tb < 0 ? mTBinClOccAft[0] : (tb >= mTBinClOccAft.size() ? mTBinClOccAft.back() : mTBinClOccAft[tb]); + (*mDBGOut) << "v0" - << "orbit=" << recoData.startIR.orbit << "tfID=" << tfID + << "orbit=" << recoData.startIR.orbit << "tfID=" << tfID << "tpcOccBef=" << tpcOccBef << "tpcOccAft=" << tpcOccAft << "v0Ext=" << v0extVec << "pv=" << pv << "\n"; @@ -334,29 +400,30 @@ void SVStudySpec::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) } } -DataProcessorSpec getSVStudySpec(GTrackID::mask_t srcTracks, bool useMC) +DataProcessorSpec getSVStudySpec(GTrackID::mask_t srcTracks, GTrackID::mask_t srcCls, bool useMC) { std::vector outputs; auto dataRequest = std::make_shared(); dataRequest->requestTracks(srcTracks, useMC); + dataRequest->requestClusters(srcCls, false); dataRequest->requestPrimaryVertices(useMC); dataRequest->requestSecondaryVertices(useMC); dataRequest->inputs.emplace_back("meanvtx", "GLO", "MEANVERTEX", 0, Lifetime::Condition, ccdbParamSpec("GLO/Calib/MeanVertex", {}, 1)); - auto ggRequest = std::make_shared(false, // orbitResetTime - false, // GRPECS=true + auto ggRequest = std::make_shared(true, // orbitResetTime + true, // GRPECS=true false, // GRPLHCIF true, // GRPMagField true, // askMatLUT o2::base::GRPGeomRequest::None, // geometry dataRequest->inputs, true); - + bool useTPCcl = srcCls[GTrackID::TPC]; return DataProcessorSpec{ "sv-study", dataRequest->inputs, outputs, - AlgorithmSpec{adaptFromTask(dataRequest, ggRequest, srcTracks, useMC)}, + AlgorithmSpec{adaptFromTask(dataRequest, ggRequest, srcTracks, useTPCcl, useMC)}, Options{ {"refit", VariantType::Bool, false, {"refit SVertices"}}, {"sel-k0", VariantType::Float, -1.f, {"If positive, select K0s with this mass margin"}}, diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index c6345b128f562..b3ef78bd2eabf 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -49,6 +49,7 @@ #include "GPUParam.inc" #include "Steer/MCKinematicsReader.h" #include "MathUtils/fit.h" +#include namespace o2::trackstudy { @@ -93,7 +94,8 @@ class TrackingStudySpec : public Task std::unique_ptr mDBGOut; std::unique_ptr mDBGOutVtx; std::unique_ptr mTPCRefitter; ///< TPC refitter used for TPC tracks refit during the reconstruction - std::vector mTBinClOccAft, mTBinClOccBef; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength + std::vector mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength + std::unique_ptr mOccWghFun; float mITSROFrameLengthMUS = 0.f; float mTPCTBinMUS = 0.f; // TPC bin in microseconds float mTPCTBinMUSInv = 0.f; @@ -139,6 +141,10 @@ void TrackingStudySpec::init(InitContext& ic) mDCAYFormula = ic.options().get("dcay-vs-pt"); mDCAZFormula = ic.options().get("dcaz-vs-pt"); mDoPairsCorr = ic.options().get("pair-correlations"); + auto str = ic.options().get("occ-weight-fun"); + if (!str.empty()) { + mOccWghFun = std::make_unique("occFun", str.c_str(), -100., 100.); + } } void TrackingStudySpec::run(ProcessingContext& pc) @@ -154,7 +160,9 @@ void TrackingStudySpec::run(ProcessingContext& pc) mNTPCOccBinLength = mTPCRefitter->getParam()->rec.tpc.occupancyMapTimeBins; mTBinClOccBef.clear(); mTBinClOccAft.clear(); + mTBinClOccWgh.clear(); } + // prepare TPC occupancy data if (mNTPCOccBinLength > 1 && recoData.occupancyMapTPC.size()) { mNTPCOccBinLengthInv = 1. / mNTPCOccBinLength; @@ -162,8 +170,27 @@ void TrackingStudySpec::run(ProcessingContext& pc) int nTPCOccBins = nTPCBins * mNTPCOccBinLengthInv, sumBins = std::max(1, int(o2::constants::lhc::LHCMaxBunches / 8 * mNTPCOccBinLengthInv)); mTBinClOccAft.resize(nTPCOccBins); mTBinClOccBef.resize(nTPCOccBins); - std::vector mltHistTB(nTPCOccBins); float sm = 0., tb = 0.5 * mNTPCOccBinLength; + /* // at the moment not used + if (mOccWghFun) { + mTBinClOccWgh.resize(nTPCBins); + float occBin2MUS = 8 * o2::constants::lhc::LHCBunchSpacingMUS; + int covWghTB = TMath::NInt(100./occBin2MUS); // coverage of weighted occ. in TBins + for (int i = 0; i < nTPCBins; i++) { + sm = 0.; + for (int j=-covWghTB;j=nTPCBins) { + continue; + } + sm += mOccWghFun->Eval(j*occBin2MUS)*mTPCRefitter->getParam()->GetUnscaledMult(j+i); + } + mTBinClOccWgh[i] = sm; + } + } else { + mTBinClOccWgh.resize(1); + } + */ + std::vector mltHistTB(nTPCOccBins); for (int i = 0; i < nTPCOccBins; i++) { mltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); tb += mNTPCOccBinLength; @@ -719,6 +746,7 @@ DataProcessorSpec getTrackingStudySpec(GTrackID::mask_t srcTracks, GTrackID::mas {"min-pt", VariantType::Float, 0.1f, {"Cut on track pT"}}, {"with-its-only", VariantType::Bool, false, {"Store tracks with ITS only"}}, {"pair-correlations", VariantType::Bool, false, {"Do pairs correlation"}}, + {"occ-weight-fun", VariantType::String, "(x>=-40&&x<-5) ? (1./1225*pow(x+40,2)) : ((x>-5&&x<15) ? 1. : ((x>=15&&x<40) ? (-0.4/25*x+1.24 ) : ( (x>40&&x<100) ? -0.4/60*x+0.6+0.8/3 : 0)))", {"Occupancy weighting f-n vs time in musec"}}, {"min-x-prop", VariantType::Float, 100.f, {"track should be propagated to this X at least"}}, }; o2::tpc::VDriftHelper::requestCCDBInputs(dataRequest->inputs); diff --git a/Detectors/GlobalTrackingWorkflow/study/src/sv-study-workflow.cxx b/Detectors/GlobalTrackingWorkflow/study/src/sv-study-workflow.cxx index fba5e67452f1f..7e104b82f4854 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/sv-study-workflow.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/sv-study-workflow.cxx @@ -39,6 +39,7 @@ void customize(std::vector& workflowOptions) {"disable-mc", o2::framework::VariantType::Bool, false, {"disable MC propagation"}}, {"track-sources", VariantType::String, std::string{GID::ALL}, {"comma-separated list of track sources to use"}}, {"disable-root-input", VariantType::Bool, false, {"disable root-files input reader"}}, + {"ignore-tpc-occ", VariantType::Bool, false, {"do not fill TPC occupancy (needs TPC clusters)"}}, {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings ..."}}}; o2::raw::HBFUtilsInitializer::addConfigOption(options); std::swap(workflowOptions, options); @@ -61,10 +62,14 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) GID::mask_t srcTrc = allowedSourcesTrc & GID::getSourcesMask(configcontext.options().get("track-sources")); GID::mask_t srcCls{}; + bool fillTPCOcc = !configcontext.options().get("ignore-tpc-occ"); + if (fillTPCOcc) { + srcCls = srcCls | GID::getSourcesMask("TPC"); + } o2::globaltracking::InputHelper::addInputSpecs(configcontext, specs, srcCls, srcTrc, srcTrc, useMC); o2::globaltracking::InputHelper::addInputSpecsPVertex(configcontext, specs, useMC); // P-vertex is always needed o2::globaltracking::InputHelper::addInputSpecsSVertex(configcontext, specs); // S-vertex is always needed - specs.emplace_back(o2::svstudy::getSVStudySpec(srcTrc, useMC)); + specs.emplace_back(o2::svstudy::getSVStudySpec(srcTrc, srcCls, useMC)); // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit o2::raw::HBFUtilsInitializer hbfIni(configcontext, specs); From dd0844686536c6bb014bb0a097705cc2f30a9535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Tich=C3=A1k?= <53997499+justonedev1@users.noreply.github.com> Date: Thu, 23 Jan 2025 23:15:40 +0100 Subject: [PATCH 0016/1914] QC: add TCanvas into mergeable checks (#13897) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Michal Tichák --- Utilities/Mergers/include/Mergers/Mergeable.h | 2 +- Utilities/Mergers/src/Mergeable.cxx | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Utilities/Mergers/include/Mergers/Mergeable.h b/Utilities/Mergers/include/Mergers/Mergeable.h index 12facadb455ea..60bbf9748bb2a 100644 --- a/Utilities/Mergers/include/Mergers/Mergeable.h +++ b/Utilities/Mergers/include/Mergers/Mergeable.h @@ -42,7 +42,7 @@ constexpr bool IsDerivedFrom = (std::derived_from || ...); // // \parameter T type to be restricted template -concept Mergeable = IsDerivedFrom, mergers::MergeInterface, TCollection, TH1, TTree, TGraph, TEfficiency, THnBase>; +concept Mergeable = IsDerivedFrom, mergers::MergeInterface, TCollection, TH1, TTree, TGraph, TEfficiency, THnBase, TCanvas>; // \brief runtime check whether TObject is mergeable bool isMergeable(TObject* obj); diff --git a/Utilities/Mergers/src/Mergeable.cxx b/Utilities/Mergers/src/Mergeable.cxx index 4963240025e1b..b35602b62fbc2 100644 --- a/Utilities/Mergers/src/Mergeable.cxx +++ b/Utilities/Mergers/src/Mergeable.cxx @@ -9,6 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include #include #include #include @@ -30,7 +31,8 @@ bool isMergeable(TObject* obj) obj->InheritsFrom(THnBase::Class()) || obj->InheritsFrom(TTree::Class()) || obj->InheritsFrom(TGraph::Class()) || - obj->InheritsFrom(TEfficiency::Class()); + obj->InheritsFrom(TEfficiency::Class()) || + obj->InheritsFrom(TCanvas::Class()); } } // namespace o2::mergers From 0d3b460442b8d47f0e78a873327dc46bcfaedbc3 Mon Sep 17 00:00:00 2001 From: glromane <95305986+glromane@users.noreply.github.com> Date: Fri, 24 Jan 2025 09:18:48 +0100 Subject: [PATCH 0017/1914] Propagate list of tables also in case of derived data (#13885) --- Framework/AnalysisSupport/src/Plugin.cxx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Framework/AnalysisSupport/src/Plugin.cxx b/Framework/AnalysisSupport/src/Plugin.cxx index 52435375d7e9e..e3a39761e8049 100644 --- a/Framework/AnalysisSupport/src/Plugin.cxx +++ b/Framework/AnalysisSupport/src/Plugin.cxx @@ -155,12 +155,14 @@ struct DiscoverMetadataInAOD : o2::framework::ConfigDiscoveryPlugin { LOGP(fatal, "Couldn't open file \"{}\"!", filename); } std::vector results = readMetadata(currentFile); + const bool metaDataEmpty = results.empty(); + auto tables = getListOfTables(currentFile); + if (tables.empty() == false) { + results.push_back(ConfigParamSpec{"aod-metadata-tables", VariantType::ArrayString, tables, {"Tables in first AOD"}}); + } + // Found metadata already in the main file. - if (!results.empty()) { - auto tables = getListOfTables(currentFile); - if (tables.empty() == false) { - results.push_back(ConfigParamSpec{"aod-metadata-tables", VariantType::ArrayString, tables, {"Tables in first AOD"}}); - } + if (!metaDataEmpty) { results.push_back(ConfigParamSpec{"aod-metadata-source", VariantType::String, filename, {"File from which the metadata was extracted."}}); return results; } From d399beee3464cc4ca97cdd3f9c475f3411d7bcad Mon Sep 17 00:00:00 2001 From: nicolovalle <35177278+nicolovalle@users.noreply.github.com> Date: Fri, 24 Jan 2025 17:27:37 +0100 Subject: [PATCH 0018/1914] ITS - Dead Map Workflow allows for saving single chips (#13898) --- .../workflow/include/ITSMFTWorkflow/DeadMapBuilderSpec.h | 1 + .../ITSMFT/common/workflow/src/DeadMapBuilderSpec.cxx | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/DeadMapBuilderSpec.h b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/DeadMapBuilderSpec.h index 7bce60d172222..2a15c332ecde1 100644 --- a/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/DeadMapBuilderSpec.h +++ b/Detectors/ITSMFT/common/workflow/include/ITSMFTWorkflow/DeadMapBuilderSpec.h @@ -83,6 +83,7 @@ class ITSMFTDeadMapBuilder : public Task bool mRunMFT = false; bool mDoLocalOutput = false; bool mSkipStaticMap = false; + bool mNoGroupITSLanes = false; uint16_t N_CHIPS; uint16_t N_CHIPS_ITSIB = o2::itsmft::ChipMappingITS::getNChips(0); int mTFLength = 32; // TODO find utility for proper value -- o2::base::GRPGeomHelper::getNHBFPerTF() returns 128 see https://github.com/AliceO2Group/AliceO2/blob/051b56f9f136e7977e83f5d26d922db9bd6ecef5/Detectors/Base/src/GRPGeomHelper.cxx#L233 and correct also default option is getSpec diff --git a/Detectors/ITSMFT/common/workflow/src/DeadMapBuilderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/DeadMapBuilderSpec.cxx index c97c3440afcc3..8f249136c54c0 100644 --- a/Detectors/ITSMFT/common/workflow/src/DeadMapBuilderSpec.cxx +++ b/Detectors/ITSMFT/common/workflow/src/DeadMapBuilderSpec.cxx @@ -63,6 +63,7 @@ void ITSMFTDeadMapBuilder::init(InitContext& ic) mLocalOutputDir = ic.options().get("output-dir"); mSkipStaticMap = ic.options().get("skip-static-map"); + mNoGroupITSLanes = ic.options().get("no-group-its-lanes"); isEnded = false; mTimeStart = o2::ccdb::getCurrentTimestamp(); @@ -245,16 +246,15 @@ void ITSMFTDeadMapBuilder::run(ProcessingContext& pc) } } - // do AND operation before unmasking the full ITS lane - + // Save status of single chips in static map before unmasking the full ITS lane if (!mSkipStaticMap) { for (size_t el = 0; el < mStaticChipStatus.size(); el++) { mStaticChipStatus[el] = mStaticChipStatus[el] || ChipStatus[el]; } } - // for ITS, declaring dead only chips belonging to lane with no hits - if (!mRunMFT) { + // for ITS, if requested: declaring dead only chips belonging to lanes with no alive chips + if (!mRunMFT && !mNoGroupITSLanes) { for (uint16_t el = N_CHIPS_ITSIB; el < ChipStatus.size(); el++) { if (ChipStatus.at(el)) { std::vector chipincable = getChipIDsOnSameCable(el); @@ -441,6 +441,7 @@ DataProcessorSpec getITSMFTDeadMapBuilderSpec(std::string datasource, bool doMFT {"tf-sampling-history-size", VariantType::Int, 1000, {"Do not check if new TF is contained in a window that is older than N steps."}}, {"tf-length", VariantType::Int, 32, {"Orbits per TF."}}, {"skip-static-map", VariantType::Bool, false, {"Do not fill static part of the map."}}, + {"no-group-its-lanes", VariantType::Bool, false, {"Do not group ITS OB chips into lanes."}}, {"ccdb-url", VariantType::String, "", {"CCDB url. Ignored if endOfStream is processed."}}, {"outfile", VariantType::String, objectname_default, {"ROOT object file name."}}, {"local-output", VariantType::Bool, false, {"Save ROOT tree file locally."}}, From fb7b17ced1db4831d9cec75a790977c12cf11148 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Sat, 25 Jan 2025 15:34:19 +0100 Subject: [PATCH 0019/1914] ITS::gpu: Update track selection logics to the state of the art (#13816) (#13899) Add processNeighbours GPU kernel and handler Update Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt Fix second iteration Move the whole processNeighbours on GPU --- .../TrackParametrization.h | 4 +- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 2 + .../GPU/ITStrackingGPU/TrackingKernels.h | 10 +- .../ITS/tracking/GPU/cuda/CMakeLists.txt | 2 +- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 63 +---- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 253 ++++++++++-------- 6 files changed, 157 insertions(+), 177 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index a988c96168170..a51ec3b7010a7 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -160,7 +160,7 @@ class TrackParametrization GPUd() value_t getZ() const; GPUd() value_t getSnp() const; GPUd() value_t getTgl() const; - GPUd() value_t getQ2Pt() const; + GPUhd() value_t getQ2Pt() const; GPUd() value_t getCharge2Pt() const; GPUd() int getAbsCharge() const; GPUd() PID getPID() const; @@ -357,7 +357,7 @@ GPUdi() auto TrackParametrization::getTgl() const -> value_t //____________________________________________________________ template -GPUdi() auto TrackParametrization::getQ2Pt() const -> value_t +GPUhdi() auto TrackParametrization::getQ2Pt() const -> value_t { return mP[kQ2Pt]; } diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 4ac22607a580b..066bef7631415 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -116,6 +116,7 @@ class TimeFrameGPU : public TimeFrame int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; } gsl::span getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; } gpuPair* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; } + std::array& getDeviceNeighboursAll() { return mNeighboursDevice; } int* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; } int** getDeviceNeighboursArray() { return mNeighboursDeviceArray; } TrackingFrameInfo* getDeviceTrackingFrameInfo(const int); @@ -142,6 +143,7 @@ class TimeFrameGPU : public TimeFrame // Host-specific getters gsl::span getNTracklets() { return mNTracklets; } gsl::span getNCells() { return mNCells; } + std::array& getArrayNCells() { return mNCells; } // Host-available device getters gsl::span getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; } diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index f50a11a83805f..78636d00788bf 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -186,19 +186,17 @@ void processNeighboursHandler(const int startLayer, const int startLevel, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - const unsigned int nCurrentCells, + std::array& nCells, const unsigned char** usedClusters, - int* neighbours, + std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, + std::vector& seedsHost, const float bz, const float MaxChi2ClusterAttachment, + const float maxChi2NDF, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, - const std::vector& lastCellIdHost, // temporary host vector - const std::vector& lastCellSeedHost, // temporary host vector - std::vector& updatedCellIdHost, // temporary host vector - std::vector& updatedCellSeedHost, // temporary host vector const int nBlocks, const int nThreads); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt index 3cdb107e07438..e2fc1f1388ad0 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt @@ -13,7 +13,7 @@ if(CUDA_ENABLED) find_package(CUDAToolkit) message(STATUS "Building ITS CUDA tracker") -# add_compile_options(-O0 -g -lineinfo -fPIC) +add_compile_options(-O0 -g -lineinfo -fPIC) # add_compile_definitions(ITS_MEASURE_GPU_TIME) o2_add_library(ITStrackingCUDA SOURCES ClusterLinesGPU.cu diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 395aab3a470ac..4821ebb636f54 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -205,9 +205,6 @@ void TrackerTraitsGPU::computeCellsHybrid(const int iteration) conf.nBlocks, conf.nThreads); } - // Needed for processNeighbours() which is still on CPU. - mTimeFrameGPU->downloadCellsDevice(); - mTimeFrameGPU->downloadCellsLUTDevice(); } template @@ -221,11 +218,11 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].clear(); mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].resize(nextLayerCellsNum, 0); - if (mTimeFrameGPU->getCells()[iLayer + 1].empty() || - mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) { - mTimeFrameGPU->getCellsNeighbours()[iLayer].clear(); - continue; - } + // if (mTimeFrameGPU->getCells()[iLayer + 1].empty() || + // mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) { + // mTimeFrameGPU->getCellsNeighbours()[iLayer].clear(); + // continue; + // } mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum); countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), @@ -267,7 +264,6 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) cellsNeighboursLayer[iLayer].size()); } mTimeFrameGPU->createNeighboursDeviceArray(); - mTimeFrameGPU->downloadCellsDevice(); mTimeFrameGPU->unregisterRest(); }; @@ -289,55 +285,21 @@ void TrackerTraitsGPU::findRoads(const int iteration) startLevel, mTimeFrameGPU->getDeviceArrayCells(), mTimeFrameGPU->getDeviceCells()[startLayer], - mTimeFrameGPU->getNCells()[startLayer], + mTimeFrameGPU->getArrayNCells(), mTimeFrameGPU->getDeviceArrayUsedClusters(), - mTimeFrameGPU->getDeviceNeighbours(startLayer - 1), + mTimeFrameGPU->getDeviceNeighboursAll(), mTimeFrameGPU->getDeviceNeighboursLUTs(), mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), + trackSeeds, mBz, - mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment + mTrkParams[0].MaxChi2ClusterAttachment, + mTrkParams[0].MaxChi2NDF, mTimeFrameGPU->getDevicePropagator(), mCorrType, - lastCellId, // temporary host vector - lastCellSeed, // temporary host vector - updatedCellId, // temporary host vectors - updatedCellSeed, // temporary host vectors conf.nBlocks, conf.nThreads); - - int level = startLevel; - for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - lastCellSeed.swap(updatedCellSeed); - lastCellId.swap(updatedCellId); - std::vector().swap(updatedCellSeed); /// tame the memory peaks - updatedCellId.clear(); - processNeighboursHandler(iLayer, - --level, - mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceCells()[iLayer], - mTimeFrameGPU->getNCells()[iLayer], - mTimeFrameGPU->getDeviceArrayUsedClusters(), - mTimeFrameGPU->getDeviceNeighbours(iLayer - 1), - mTimeFrameGPU->getDeviceNeighboursLUTs(), - mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(), - mBz, - mTrkParams[0].MaxChi2ClusterAttachment, // float maxChi2ClusterAttachment - mTimeFrameGPU->getDevicePropagator(), - mCorrType, - lastCellId, // temporary host vector - lastCellSeed, // temporary host vector - updatedCellId, // temporary host vectors - updatedCellSeed, // temporary host vectors - conf.nBlocks, - conf.nThreads); - } - for (auto& seed : updatedCellSeed) { - if (seed.getQ2Pt() > 1.e3 || seed.getChi2() > mTrkParams[0].MaxChi2NDF * ((startLevel + 2) * 2 - 5)) { - continue; - } - trackSeeds.push_back(seed); - } } + // fixme: I don't want to move tracks back and forth, but I need a way to use a thrust::allocator that is aware of our managed memory. if (!trackSeeds.size()) { LOGP(info, "No track seeds found, skipping track finding"); continue; @@ -362,9 +324,6 @@ void TrackerTraitsGPU::findRoads(const int iteration) mTimeFrameGPU->downloadTrackITSExtDevice(trackSeeds); auto& tracks = mTimeFrameGPU->getTrackITSExt(); - std::sort(tracks.begin(), tracks.end(), [](const TrackITSExt& a, const TrackITSExt& b) { - return a.getChi2() < b.getChi2(); - }); for (auto& track : tracks) { if (!track.getChi2()) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 19edef6c40346..10459cf800b6c 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -232,6 +232,24 @@ struct is_valid_pair { } }; +struct seed_selector { + float maxQ2Pt; + float maxChi2; + + GPUhd() seed_selector(float maxQ2Pt, float maxChi2) : maxQ2Pt(maxQ2Pt), maxChi2(maxChi2) {} + GPUhd() bool operator()(const CellSeed& seed) const + { + return !(seed.getQ2Pt() > maxQ2Pt || seed.getChi2() > maxChi2); + } +}; + +struct compare_track_chi2 { + GPUhd() bool operator()(const TrackITSExt& a, const TrackITSExt& b) const + { + return a.getChi2() < b.getChi2(); + } +}; + GPUd() gpuSpan getPrimaryVertices(const int rof, const int* roframesPV, const int nROF, @@ -596,7 +614,7 @@ GPUg() void processNeighboursKernel(const int layer, int* neighboursLUT, const TrackingFrameInfo** foundTrackingFrameInfo, const float bz, - const float MaxChi2ClusterAttachment, + const float maxChi2ClusterAttachment, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType) { @@ -650,7 +668,7 @@ GPUg() void processNeighboursKernel(const int layer, } auto predChi2{seed.getPredictedChi2Quiet(trHit.positionTrackingFrame, trHit.covarianceTrackingFrame)}; - if ((predChi2 > MaxChi2ClusterAttachment) || predChi2 < 0.f) { + if ((predChi2 > maxChi2ClusterAttachment) || predChi2 < 0.f) { continue; } seed.setChi2(seed.getChi2() + predChi2); @@ -1172,149 +1190,152 @@ void processNeighboursHandler(const int startLayer, const int startLevel, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - const unsigned int nCurrentCells, + std::array& nCells, const unsigned char** usedClusters, - int* neighbours, + std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, + std::vector& seedsHost, const float bz, - const float MaxChi2ClusterAttachment, + const float maxChi2ClusterAttachment, + const float maxChi2NDF, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, - const std::vector& lastCellIdHost, // temporary host vector - const std::vector& lastCellSeedHost, // temporary host vector - std::vector& updatedCellIdHost, // temporary host vector - std::vector& updatedCellSeedHost, // temporary host vector const int nBlocks, const int nThreads) { - thrust::device_vector foundSeedsTable(nCurrentCells + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. - thrust::device_vector lastCellIds(lastCellIdHost); - thrust::device_vector lastCellSeed(lastCellSeedHost); + thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. + // thrust::device_vector lastCellIds(lastCellIdHost); + // thrust::device_vector lastCellSeed(lastCellSeedHost); + thrust::device_vector lastCellId, updatedCellId; + thrust::device_vector lastCellSeed, updatedCellSeed; gpu::processNeighboursKernel<<>>(startLayer, startLevel, allCellSeeds, - lastCellIdHost.empty() ? currentCellSeeds : thrust::raw_pointer_cast(&lastCellSeed[0]), // lastCellSeeds - lastCellIdHost.empty() ? nullptr : thrust::raw_pointer_cast(&lastCellIds[0]), // lastCellIds, - lastCellIdHost.empty() ? nCurrentCells : lastCellSeedHost.size(), - nullptr, // updatedCellSeeds, - nullptr, // updatedCellsIds, - thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration - usedClusters, // Used clusters - neighbours, + currentCellSeeds, + nullptr, + nCells[startLayer], + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], neighboursDeviceLUTs[startLayer - 1], foundTrackingFrameInfo, bz, - MaxChi2ClusterAttachment, + maxChi2ClusterAttachment, propagator, matCorrType); - void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; - size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; - gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + gpuCheckError(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCurrentCells + 1, // num_items - 0)); // NOLINT: failure in clang-tidy + nCells[startLayer] + 1, // num_items + 0)); // NOLINT: this is the offset of the sum, not a pointer discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage temp_storage_bytes, // temp_storage_bytes thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCurrentCells + 1, // num_items - 0)); // NOLINT: failure in clang-tidy + nCells[startLayer] + 1, // num_items + 0)); // NOLINT: this is the offset of the sum, not a pointer - thrust::device_vector updatedCellIds(foundSeedsTable.back()) /*, lastCellIds(foundSeedsTable.back())*/; - thrust::device_vector updatedCellSeeds(foundSeedsTable.back()) /*, lastCellSeeds(foundSeedsTable.back())*/; + updatedCellId.resize(foundSeedsTable.back()); + updatedCellSeed.resize(foundSeedsTable.back()); gpu::processNeighboursKernel<<>>(startLayer, startLevel, allCellSeeds, - lastCellIdHost.empty() ? currentCellSeeds : thrust::raw_pointer_cast(&lastCellSeed[0]), // lastCellSeeds - lastCellIdHost.empty() ? nullptr : thrust::raw_pointer_cast(&lastCellIds[0]), // lastCellIds, - lastCellIdHost.empty() ? nCurrentCells : lastCellSeedHost.size(), - thrust::raw_pointer_cast(&updatedCellSeeds[0]), // updatedCellSeeds - thrust::raw_pointer_cast(&updatedCellIds[0]), // updatedCellsIds - thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration - usedClusters, // Used clusters - neighbours, + currentCellSeeds, + nullptr, + nCells[startLayer], + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], neighboursDeviceLUTs[startLayer - 1], foundTrackingFrameInfo, bz, - MaxChi2ClusterAttachment, + maxChi2ClusterAttachment, propagator, matCorrType); - - // Temporary copyback to host to validate the kernel - updatedCellIdHost.resize(updatedCellIds.size()); - updatedCellSeedHost.resize(updatedCellSeeds.size()); - thrust::copy(updatedCellIds.begin(), updatedCellIds.end(), updatedCellIdHost.begin()); - thrust::copy(updatedCellSeeds.begin(), updatedCellSeeds.end(), updatedCellSeedHost.begin()); - - // int level = startLevel; - // for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - // --level; - // lastCellSeeds.swap(updatedCellSeeds); - // lastCellIds.swap(updatedCellIds); - // foundSeedsTable.resize(lastCellSeeds.size() + 1); - // thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - - // gpu::processNeighboursKernel<<<1, 1>>>(iLayer, - // level, - // allCellSeeds, - // thrust::raw_pointer_cast(&lastCellSeeds[0]), - // thrust::raw_pointer_cast(&lastCellIds[0]), // currentCellIds, - // lastCellSeeds.size(), - // nullptr, // updatedCellSeeds, - // nullptr, // updatedCellsIds, - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration - // usedClusters, // Used clusters - // neighbours, - // neighboursDeviceLUTs[iLayer - 1], - // foundTrackingFrameInfo, - // bz, - // MaxChi2ClusterAttachment, - // propagator, - // matCorrType); - - // gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - // temp_storage_bytes, // temp_storage_bytes - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - // foundSeedsTable.size(), // num_items - // 0)); - // discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - // gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - // temp_storage_bytes_2, // temp_storage_bytes - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - // foundSeedsTable.size(), // num_items - // 0)); - // updatedCellIds.resize(foundSeedsTable.back(), 0); - // updatedCellSeeds.resize(foundSeedsTable.back(), CellSeed()); - - // gpu::processNeighboursKernel<<<1, 1>>>(iLayer, - // level, - // allCellSeeds, - // thrust::raw_pointer_cast(&lastCellSeeds[0]), - // thrust::raw_pointer_cast(&lastCellIds[0]), // currentCellIds, - // lastCellSeeds.size(), - // thrust::raw_pointer_cast(&updatedCellSeeds[0]), // updatedCellSeeds - // thrust::raw_pointer_cast(&updatedCellIds[0]), // updatedCellsIds - // thrust::raw_pointer_cast(&foundSeedsTable[0]), // auxiliary only in GPU code to compute the number of cells per iteration - // usedClusters, // Used clusters - // neighbours, - // neighboursDeviceLUTs[iLayer - 1], - // foundTrackingFrameInfo, - // bz, - // MaxChi2ClusterAttachment, - // propagator, - // matCorrType); - // gpu::printCellSeeds<<<1, 1>>>(thrust::raw_pointer_cast(&updatedCellSeeds[0]), updatedCellSeeds.size()); - // } - + auto t1 = updatedCellSeed.size(); gpuCheckError(cudaFree(d_temp_storage)); - gpuCheckError(cudaFree(d_temp_storage_2)); + int level = startLevel; + for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { + temp_storage_bytes = 0; + lastCellSeed.swap(updatedCellSeed); + lastCellId.swap(updatedCellId); + thrust::device_vector().swap(updatedCellSeed); + thrust::device_vector().swap(updatedCellId); + auto lastCellSeedSize{lastCellSeed.size()}; + foundSeedsTable.resize(nCells[iLayer] + 1); + thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); + --level; + gpu::processNeighboursKernel<<>>(iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpuCheckError(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage + temp_storage_bytes, // temp_storage_bytes + thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in + thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out + nCells[iLayer] + 1, // num_items + 0)); // NOLINT: this is the offset of the sum, not a pointer + discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage + temp_storage_bytes, // temp_storage_bytes + thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in + thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out + nCells[iLayer] + 1, // num_items + 0)); // NOLINT: this is the offset of the sum, not a pointer + auto foundSeeds{foundSeedsTable.back()}; + updatedCellId.resize(foundSeeds); + thrust::fill(updatedCellId.begin(), updatedCellId.end(), 0); + updatedCellSeed.resize(foundSeeds); + thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); + + gpu::processNeighboursKernel<<>>(iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpuCheckError(cudaFree(d_temp_storage)); + } + thrust::device_vector outSeeds(updatedCellSeed.size()); + auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); + auto s{end - outSeeds.begin()}; + std::vector outSeedsHost(s); + thrust::copy(updatedCellSeed.begin(), updatedCellSeed.begin() + s, outSeedsHost.begin()); + seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end()); } void trackSeedHandler(CellSeed* trackSeeds, @@ -1344,7 +1365,9 @@ void trackSeedHandler(CellSeed* trackSeeds, maxChi2NDF, // float propagator, // const o2::base::Propagator* matCorrType); // o2::base::PropagatorF::MatCorrType + thrust::device_ptr tr_ptr(tracks); + thrust::sort(tr_ptr, tr_ptr + nSeeds, gpu::compare_track_chi2()); gpuCheckError(cudaPeekAtLastError()); gpuCheckError(cudaDeviceSynchronize()); } @@ -1414,19 +1437,17 @@ template void processNeighboursHandler<7>(const int startLayer, const int startLevel, CellSeed** allCellSeeds, CellSeed* currentCellSeeds, - const unsigned int nCurrentCells, + std::array& nCells, const unsigned char** usedClusters, - int* neighbours, + std::array& neighbours, gsl::span neighboursDeviceLUTs, const TrackingFrameInfo** foundTrackingFrameInfo, + std::vector& seedsHost, const float bz, - const float MaxChi2ClusterAttachment, + const float maxChi2ClusterAttachment, + const float maxChi2NDF, const o2::base::Propagator* propagator, const o2::base::PropagatorF::MatCorrType matCorrType, - const std::vector& lastCellIdHost, // temporary host vector - const std::vector& lastCellSeedHost, // temporary host vector - std::vector& updatedCellIdHost, // temporary host vector - std::vector& updatedCellSeedHost, // temporary host vector const int nBlocks, const int nThreads); } // namespace o2::its \ No newline at end of file From d26f229e99ae557143abc674d5a76516b9b8d009 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 24 Jan 2025 16:48:45 +0100 Subject: [PATCH 0020/1914] Push CTF/SELIRFRAMES/0 even if run was not matched --- Detectors/CTF/workflow/CMakeLists.txt | 2 ++ Detectors/CTF/workflow/src/CTFReaderSpec.cxx | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Detectors/CTF/workflow/CMakeLists.txt b/Detectors/CTF/workflow/CMakeLists.txt index b4fefa894263c..f7adeaed991c0 100644 --- a/Detectors/CTF/workflow/CMakeLists.txt +++ b/Detectors/CTF/workflow/CMakeLists.txt @@ -9,6 +9,8 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. +#add_compile_options(-O0 -g -fPIC) + o2_add_library(CTFWorkflow SOURCES src/CTFWriterSpec.cxx src/CTFReaderSpec.cxx diff --git a/Detectors/CTF/workflow/src/CTFReaderSpec.cxx b/Detectors/CTF/workflow/src/CTFReaderSpec.cxx index bcf3b5d975b74..aadc059ecd4fa 100644 --- a/Detectors/CTF/workflow/src/CTFReaderSpec.cxx +++ b/Detectors/CTF/workflow/src/CTFReaderSpec.cxx @@ -102,6 +102,7 @@ class CTFReaderSpec : public o2::framework::Task std::unique_ptr mCTFTree; bool mRunning = false; bool mUseLocalTFCounter = false; + bool mIFRamesOut = false; int mConvRunTimeRangesToOrbits = -1; // not defined yet int mCTFCounter = 0; int mCTFCounterAcc = 0; @@ -172,9 +173,11 @@ void CTFReaderSpec::init(InitContext& ic) const auto& hbfu = o2::raw::HBFUtils::Instance(); mTFLength = hbfu.nHBFPerTF; LOGP(info, "IRFrames will be selected from {}, assumed TF length: {} HBF", mInput.fileIRFrames, mTFLength); + mIFRamesOut = true; } if (!mInput.fileRunTimeSpans.empty()) { loadRunTimeSpans(mInput.fileRunTimeSpans); + mIFRamesOut = true; } } @@ -418,11 +421,11 @@ bool CTFReaderSpec::processTF(ProcessingContext& pc) runTimeRangesToIRFrameSelector(timingInfo); } mRunNumberPrev = timingInfo.runNumber; - + gsl::span irSpan{}; if (mIRFrameSelector.isSet()) { o2::InteractionRecord ir0(0, timingInfo.firstTForbit); o2::InteractionRecord ir1(o2::constants::lhc::LHCMaxBunches - 1, timingInfo.firstTForbit < 0xffffffff - (mTFLength - 1) ? timingInfo.firstTForbit + (mTFLength - 1) : 0xffffffff); - auto irSpan = mIRFrameSelector.getMatchingFrames({ir0, ir1}); + irSpan = mIRFrameSelector.getMatchingFrames({ir0, ir1}); bool acc = true; if (mInput.skipSkimmedOutTF) { acc = (irSpan.size() > 0) ? !mInput.invertIRFramesSelection : mInput.invertIRFramesSelection; @@ -435,13 +438,14 @@ bool CTFReaderSpec::processTF(ProcessingContext& pc) if (mInput.checkTFLimitBeforeReading) { limiter.check(pc, mInput.tfRateLimit, mInput.minSHM); } - auto outVec = pc.outputs().make>(OutputRef{"selIRFrames"}, irSpan.begin(), irSpan.end()); } else { if (mInput.checkTFLimitBeforeReading) { limiter.check(pc, mInput.tfRateLimit, mInput.minSHM); } } - + if (mIFRamesOut) { + auto outVec = pc.outputs().make>(OutputRef{"selIRFrames"}, irSpan.begin(), irSpan.end()); + } // send CTF Header pc.outputs().snapshot({"header", mInput.subspec}, ctfHeader); From f946c53d5c80133b855a8c251fa44379931c6df4 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 28 Jan 2025 10:39:44 +0100 Subject: [PATCH 0021/1914] DPL: avoid asserting the workflow is empty (#13904) This is a property that depends on user input, so an empty workflow is actually possible and should be handled. This currently breaks if the empty workflow is provided in debug mode. --- Framework/Core/src/DeviceSpecHelpers.cxx | 4 ++++ Framework/Core/src/WorkflowHelpers.cxx | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/DeviceSpecHelpers.cxx b/Framework/Core/src/DeviceSpecHelpers.cxx index f2644ed66ba08..ec0a40e44ac31 100644 --- a/Framework/Core/src/DeviceSpecHelpers.cxx +++ b/Framework/Core/src/DeviceSpecHelpers.cxx @@ -1118,6 +1118,10 @@ void DeviceSpecHelpers::dataProcessorSpecs2DeviceSpecs(const WorkflowSpec& workf { // Always check for validity of the workflow before instanciating it DeviceSpecHelpers::validate(workflow); + // In case the workflow is empty, we simply do not need to instanciate any device. + if (workflow.empty()) { + return; + } std::vector availableForwardsInfo; std::vector logicalEdges; std::vector connections; diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index 597f3d32856c1..b18b559fe99fb 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -756,7 +756,10 @@ void WorkflowHelpers::constructGraph(const WorkflowSpec& workflow, std::vector& outputs, std::vector& forwardedInputsInfo) { - assert(!workflow.empty()); + // In case the workflow is empty, we do not have anything to do. + if (workflow.empty()) { + return; + } // This is the state. Oif is the iterator I use for the searches. std::vector availableOutputsInfo; From f2e7956bbfe9bdd661acbb25cdef722773513ddc Mon Sep 17 00:00:00 2001 From: czhang Date: Mon, 27 Jan 2025 15:27:12 +0000 Subject: [PATCH 0022/1914] Add termination for record writer to save record file correctly --- Detectors/MUON/MCH/Align/src/Aligner.cxx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Detectors/MUON/MCH/Align/src/Aligner.cxx b/Detectors/MUON/MCH/Align/src/Aligner.cxx index 71bafad5b9ff3..a19b7f602cdb8 100644 --- a/Detectors/MUON/MCH/Align/src/Aligner.cxx +++ b/Detectors/MUON/MCH/Align/src/Aligner.cxx @@ -359,14 +359,17 @@ void Aligner::init(TString DataRecFName, TString ConsRecFName) void Aligner::terminate() { fInitialized = kFALSE; - LOG(info) << "Closing Evaluation TFile"; if (fDoEvaluation) { + LOG(info) << "Closing Evaluation TFile"; if (fTFile && fTTree) { fTFile->cd(); fTTree->Write(); fTFile->Close(); } } + if (!fDisableRecordWriter) { + mRecordWriter->terminate(); + } } //_____________________________________________________ From 8baefd989c16b9d926a2a69efe985909cbaa06d4 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 28 Jan 2025 14:26:08 +0100 Subject: [PATCH 0023/1914] DPL Analysis: out of line HistogramRegistry population methods (#13906) --- .../include/Framework/HistogramRegistry.h | 72 ++++++++----------- Framework/Core/src/HistogramRegistry.cxx | 70 ++++++++++++++++++ 2 files changed, 101 insertions(+), 41 deletions(-) diff --git a/Framework/Core/include/Framework/HistogramRegistry.h b/Framework/Core/include/Framework/HistogramRegistry.h index 0801064b6f4cc..9f272be38da0c 100644 --- a/Framework/Core/include/Framework/HistogramRegistry.h +++ b/Framework/Core/include/Framework/HistogramRegistry.h @@ -22,10 +22,12 @@ #include "Framework/SerializationMethods.h" #include "Framework/TableBuilder.h" #include "Framework/RuntimeError.h" +#include "StepTHn.h" #include #include #include +#include #include #include @@ -403,28 +405,6 @@ constexpr HistogramRegistry::HistName::HistName(const ConstStr& hashed { } -template -std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2) -{ - auto histVariant = add(name, title, histConfigSpec, callSumw2); - if (auto histPtr = std::get_if>(&histVariant)) { - return *histPtr; - } else { - throw runtime_error_f(R"(Histogram type specified in add<>("%s") does not match the actual type of the histogram!)", name); - } -} - -template -std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2) -{ - auto histVariant = add(name, title, histType, axes, callSumw2); - if (auto histPtr = std::get_if>(&histVariant)) { - return *histPtr; - } else { - throw runtime_error_f(R"(Histogram type specified in add<>("%s") does not match the actual type of the histogram!)", name); - } -} - template std::shared_ptr HistogramRegistry::add(const std::string& name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2) { @@ -447,25 +427,6 @@ std::shared_ptr HistogramRegistry::operator()(const HistName& histName) return get(histName); } -template -HistPtr HistogramRegistry::insertClone(const HistName& histName, const std::shared_ptr originalHist) -{ - validateHistName(histName.str, histName.hash); - for (auto i = 0u; i < MAX_REGISTRY_SIZE; ++i) { - TObject* rawPtr = nullptr; - std::visit([&](const auto& sharedPtr) { rawPtr = sharedPtr.get(); }, mRegistryValue[imask(histName.idx + i)]); - if (!rawPtr) { - registerName(histName.str); - mRegistryKey[imask(histName.idx + i)] = histName.hash; - mRegistryValue[imask(histName.idx + i)] = std::shared_ptr(static_cast(originalHist->Clone(histName.str))); - lookup += i; - return mRegistryValue[imask(histName.idx + i)]; - } - } - LOGF(fatal, R"(Internal array of HistogramRegistry "%s" is full.)", mName); - return HistPtr(); -} - template uint32_t HistogramRegistry::getHistIndex(const T& histName) { @@ -491,6 +452,35 @@ extern template void HistogramRegistry::fill(const HistName& histName, double); extern template void HistogramRegistry::fill(const HistName& histName, float); extern template void HistogramRegistry::fill(const HistName& histName, int); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +extern template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); + +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +extern template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); + template void HistogramRegistry::fill(const HistName& histName, const T& table, const o2::framework::expressions::Filter& filter) { diff --git a/Framework/Core/src/HistogramRegistry.cxx b/Framework/Core/src/HistogramRegistry.cxx index 0836e72ffa935..0a0cc1fc3a690 100644 --- a/Framework/Core/src/HistogramRegistry.cxx +++ b/Framework/Core/src/HistogramRegistry.cxx @@ -414,4 +414,74 @@ void HistFiller::badHistogramFill(char const* name) LOGF(fatal, "The number of arguments in fill function called for histogram %s is incompatible with histogram dimensions.", name); } +template +HistPtr HistogramRegistry::insertClone(const HistName& histName, const std::shared_ptr originalHist) +{ + validateHistName(histName.str, histName.hash); + for (auto i = 0u; i < MAX_REGISTRY_SIZE; ++i) { + TObject* rawPtr = nullptr; + std::visit([&](const auto& sharedPtr) { rawPtr = sharedPtr.get(); }, mRegistryValue[imask(histName.idx + i)]); + if (!rawPtr) { + registerName(histName.str); + mRegistryKey[imask(histName.idx + i)] = histName.hash; + mRegistryValue[imask(histName.idx + i)] = std::shared_ptr(static_cast(originalHist->Clone(histName.str))); + lookup += i; + return mRegistryValue[imask(histName.idx + i)]; + } + } + LOGF(fatal, R"(Internal array of HistogramRegistry "%s" is full.)", mName); + return HistPtr(); +} + +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); +template HistPtr HistogramRegistry::insertClone(const HistName&, const std::shared_ptr); + +template +std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2) +{ + auto histVariant = add(name, title, histConfigSpec, callSumw2); + if (auto histPtr = std::get_if>(&histVariant)) { + return *histPtr; + } else { + throw runtime_error_f(R"(Histogram type specified in add<>("%s") does not match the actual type of the histogram!)", name); + } +} + +template +std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2) +{ + auto histVariant = add(name, title, histType, axes, callSumw2); + if (auto histPtr = std::get_if>(&histVariant)) { + return *histPtr; + } else { + throw runtime_error_f(R"(Histogram type specified in add<>("%s") does not match the actual type of the histogram!)", name); + } +} + +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, const HistogramConfigSpec& histConfigSpec, bool callSumw2); +template std::shared_ptr HistogramRegistry::add(char const* const name, char const* const title, HistType histType, const std::vector& axes, bool callSumw2); + } // namespace o2::framework From 53ed758f32eaf659b7a9dc0a7c701fcd300a7544 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 28 Jan 2025 14:26:35 +0100 Subject: [PATCH 0024/1914] DPL Analysis: remove unnecessary instances of selected_pack (#13892) --- Framework/Core/include/Framework/ASoA.h | 115 ++++++++++++------ .../Core/include/Framework/Configurable.h | 5 +- .../include/Framework/GroupedCombinations.h | 22 +++- 3 files changed, 101 insertions(+), 41 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 8af872a64176d..8ef7ed9539ec7 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -208,8 +208,6 @@ template struct TableMetadata { using columns = framework::pack; using persistent_columns_t = framework::selected_pack; - using external_index_columns_t = framework::selected_pack; - using internal_index_columns_t = framework::selected_pack; template static consteval std::array getMap(framework::pack) @@ -806,9 +804,6 @@ concept is_dynamic_column = requires(C& c) { template concept is_marker_column = requires { &C::mark; }; -template -using is_dynamic_t = std::conditional_t, std::true_type, std::false_type>; - template concept is_column = is_persistent_column || is_dynamic_column || is_indexing_column || is_marker_column; @@ -1031,6 +1026,17 @@ concept can_bind = requires(T&& t) { template concept has_index = (is_indexing_column || ...); +template + requires(!is_self_index_column) +consteval auto getBinding() -> typename C::binding_t +{ +} + +template +consteval auto getBinding() -> void +{ +} + template struct TableIterator : IP, C... { public: @@ -1038,9 +1044,9 @@ struct TableIterator : IP, C... { using policy_t = IP; using all_columns = framework::pack; using persistent_columns_t = framework::selected_pack; - using external_index_columns_t = framework::selected_pack; - using internal_index_columns_t = framework::selected_pack; - using bindings_pack_t = decltype([](framework::pack) -> framework::pack {}(external_index_columns_t{})); // decltype(extractBindings(external_index_columns_t{})); + using bindings_pack_t = decltype([](framework::pack) { + return framework::pack())...>{}; + }(all_columns{})); TableIterator(arrow::ChunkedArray* columnData[sizeof...(C)], IP&& policy) : IP{policy}, @@ -1133,7 +1139,13 @@ struct TableIterator : IP, C... { template void doSetCurrentIndex(framework::pack, TA* current) { - (CL::setCurrent(current), ...); + (framework::overloaded{ + [¤t, this] + requires(!is_self_index_column) + () { CI::setCurrent(current); }, + []() {}} + .template operator()(), + ...); } template @@ -1145,24 +1157,40 @@ struct TableIterator : IP, C... { template auto getIndexBindingsImpl(framework::pack) const { - return std::vector{static_cast(*this).getCurrentRaw()...}; + std::vector result; + (framework::overloaded{ + [this, &result] + requires(!is_self_index_column) + () mutable { + result.emplace_back(CI::getCurrentRaw()); + }, + []() {}} + .template operator()(), + ...); + return result; } auto getIndexBindings() const { - return getIndexBindingsImpl(external_index_columns_t{}); + return getIndexBindingsImpl(all_columns{}); } template void bindExternalIndices(TA*... current) { - (doSetCurrentIndex(external_index_columns_t{}, current), ...); + (doSetCurrentIndex(all_columns{}, current), ...); } template void doSetCurrentIndexRaw(framework::pack p, std::vector&& ptrs) { - (Cs::setCurrentRaw(ptrs[framework::has_type_at_v(p)]), ...); + (framework::overloaded{ + [&ptrs, p, this] + requires(!is_self_index_column) + () { CI::setCurrentRaw(ptrs[framework::has_type_at_v(p)]); }, + []() {}} + .template operator()(), + ...); } template @@ -1170,18 +1198,22 @@ struct TableIterator : IP, C... { { o2::soa::Binding b; b.bind(ptr); - (Cs::setCurrentRaw(b), ...); + (framework::overloaded{ + [&ptr, &b, this]() { CI::setCurrentRaw(b); }, + []() {}} + .template operator()(), + ...); } void bindExternalIndicesRaw(std::vector&& ptrs) { - doSetCurrentIndexRaw(external_index_columns_t{}, std::forward>(ptrs)); + doSetCurrentIndexRaw(all_columns{}, std::forward>(ptrs)); } template void bindInternalIndices(I const* table) { - doSetCurrentInternal(internal_index_columns_t{}, table); + doSetCurrentInternal(all_columns{}, table); } private: @@ -1365,25 +1397,37 @@ static constexpr std::string getLabelFromTypeForKey(std::string const& key) template consteval static bool hasIndexTo(framework::pack&&) { - return (o2::soa::is_binding_compatible_v() || ...); + return (framework::overloaded{ + [] + requires(!is_self_index_column) + () { return o2::soa::is_binding_compatible_v(); }, + []() { return false; }} + .template operator()() || + ...); } template consteval static bool hasSortedIndexTo(framework::pack&&) { - return ((C::sorted && o2::soa::is_binding_compatible_v()) || ...); + return (framework::overloaded{ + [] + requires(!is_self_index_column) + () { return (CI::sorted && o2::soa::is_binding_compatible_v()); }, + []() {}} + .template operator()() || + ...); } template consteval static bool relatedByIndex() { - return hasIndexTo(typename Z::table_t::external_index_columns_t{}); + return hasIndexTo(typename Z::table_t::columns_t{}); } template consteval static bool relatedBySortedIndex() { - return hasSortedIndexTo(typename Z::table_t::external_index_columns_t{}); + return hasSortedIndexTo(typename Z::table_t::columns_t{}); } } // namespace o2::soa @@ -1726,16 +1770,13 @@ class Table using persistent_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); using column_types = decltype([](framework::pack) -> framework::pack {}(persistent_columns_t{})); - using external_index_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); - using internal_index_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); template using base_iterator = decltype(base_iter(columns_t{})); template struct TableIteratorBase : base_iterator { using columns_t = typename Parent::columns_t; - using external_index_columns_t = typename Parent::external_index_columns_t; - using bindings_pack_t = decltype([](framework::pack) -> framework::pack {}(external_index_columns_t{})); + using bindings_pack_t = typename base_iterator::bindings_pack_t; // static constexpr const std::array originals{T::ref...}; static constexpr auto originals = Parent::originals; using policy_t = IP; @@ -1828,7 +1869,7 @@ class Table using decayed = std::decay_t; if constexpr (framework::has_type(bindings_pack_t{})) { // index to another table constexpr auto idx = framework::has_type_at_v(bindings_pack_t{}); - return framework::pack_element_t::getId(); + return framework::pack_element_t::getId(); } else if constexpr (std::same_as) { // self index return this->globalIndex(); } else if constexpr (is_indexing_column) { // soa::Index<> @@ -1838,20 +1879,17 @@ class Table } } - template + template auto getDynamicColumn() const { - using decayed = std::decay_t; - static_assert(is_dynamic_t(), "Requested column is not a dynamic column"); - return static_cast(*this).template getDynamicValue(); + return static_cast>(*this).template getDynamicValue(); } template + requires(is_dynamic_column || is_persistent_column) auto getValue() const { - using COL = std::decay_t; - static_assert(is_dynamic_t() || soa::is_persistent_column, "Should be persistent or dynamic column with no argument that has a return type convertable to float"); - return static_cast(static_cast(*this).get()); + return static_cast(static_cast>(*this).get()); } template @@ -2056,13 +2094,17 @@ class Table void bindInternalIndicesExplicit(o2::soa::Binding binding) { - doBindInternalIndicesExplicit(internal_index_columns_t{}, binding); + doBindInternalIndicesExplicit(columns_t{}, binding); } template void doBindInternalIndicesExplicit(framework::pack, o2::soa::Binding binding) { - (static_cast(mBegin).setCurrentRaw(binding), ...); + (framework::overloaded{ + [this, &binding]() { static_cast(mBegin).setCurrentRaw(binding); }, + []() {}} + .template operator()(), + ...); } void bindExternalIndicesRaw(std::vector&& ptrs) @@ -2079,7 +2121,7 @@ class Table template void copyIndexBindings(T& dest) const { - doCopyIndexBindings(external_index_columns_t{}, dest); + doCopyIndexBindings(columns_t{}, dest); } auto select(framework::expressions::Filter const& f) const @@ -3298,7 +3340,6 @@ class FilteredBase : public T using T::originals; using columns_t = typename T::columns_t; using persistent_columns_t = typename T::persistent_columns_t; - using external_index_columns_t = typename T::external_index_columns_t; using iterator = T::template iterator_template_o; using unfiltered_iterator = T::template iterator_template_o; @@ -3444,7 +3485,7 @@ class FilteredBase : public T template void copyIndexBindings(T1& dest) const { - doCopyIndexBindings(external_index_columns_t{}, dest); + doCopyIndexBindings(columns_t{}, dest); } template diff --git a/Framework/Core/include/Framework/Configurable.h b/Framework/Core/include/Framework/Configurable.h index 88e50cf3c7c26..930c37e700105 100644 --- a/Framework/Core/include/Framework/Configurable.h +++ b/Framework/Core/include/Framework/Configurable.h @@ -83,6 +83,9 @@ struct Configurable : IP { template using MutableConfigurable = Configurable>; +template +concept is_configurable = requires(T& t) { &T::operator typename T::type; }; + using ConfigurableAxis = Configurable, ConfigParamKind::kAxisSpec, ConfigurablePolicyConst, ConfigParamKind::kAxisSpec>>; template @@ -97,7 +100,7 @@ struct ProcessConfigurable : Configurable { }; template -concept is_process_configurable = base_of_template; +concept is_process_configurable = is_configurable && requires(T& t) { t.process; }; #define PROCESS_SWITCH(_Class_, _Name_, _Help_, _Default_) \ decltype(ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}) do##_Name_ = ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}; diff --git a/Framework/Core/include/Framework/GroupedCombinations.h b/Framework/Core/include/Framework/GroupedCombinations.h index 21d8384e3aa6e..9f450489ac50f 100644 --- a/Framework/Core/include/Framework/GroupedCombinations.h +++ b/Framework/Core/include/Framework/GroupedCombinations.h @@ -34,14 +34,30 @@ auto interleaveTuples(std::tuple& t1, std::tuple& t2) return interleaveTuplesImpl(t1, t2, std::index_sequence_for()); } +template + requires(!soa::is_self_index_column) +consteval auto isIndexTo() +{ + if constexpr (o2::soa::is_binding_compatible_v()) { + return std::true_type{}; + } else { + return std::false_type{}; + } +} + +template +consteval auto isIndexTo() +{ + return std::false_type{}; +} + template -using is_index_to_g_t = typename std::conditional(), std::true_type, std::false_type>::type; +using is_index_to_g_t = decltype(isIndexTo()); template expressions::BindingNode getMatchingIndexNode() { - using external_index_columns_pack = typename A::external_index_columns_t; - using selected_indices_t = selected_pack_multicondition, external_index_columns_pack>; + using selected_indices_t = selected_pack_multicondition, typename A::columns_t>; static_assert(pack_size(selected_indices_t{}) == 1, "No matching index column from associated to grouping"); using index_column_t = pack_head_t; return expressions::BindingNode{index_column_t::mLabel, o2::framework::TypeIdHelpers::uniqueId(), expressions::selectArrowType()}; From 1640a6274e0b4eb1ecf25d0a418cc3aae0a849ea Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 28 Jan 2025 14:37:14 +0100 Subject: [PATCH 0025/1914] DigitizationContext: Prevent a segfault in QED filling --- DataFormats/simulation/src/DigitizationContext.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DataFormats/simulation/src/DigitizationContext.cxx b/DataFormats/simulation/src/DigitizationContext.cxx index 975458c41fcb3..e875cb61951a9 100644 --- a/DataFormats/simulation/src/DigitizationContext.cxx +++ b/DataFormats/simulation/src/DigitizationContext.cxx @@ -289,6 +289,11 @@ DigitizationContext* DigitizationContext::loadFromFile(std::string_view filename void DigitizationContext::fillQED(std::string_view QEDprefix, int max_events, double qedrate) { + if (mEventRecords.size() <= 1) { + // nothing to do + return; + } + o2::steer::InteractionSampler qedInteractionSampler; qedInteractionSampler.setBunchFilling(mBCFilling); From 0a74715ccddebbf7bc2cf23152f9884e3c5adccd Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 28 Jan 2025 11:34:42 +0100 Subject: [PATCH 0026/1914] Add to matching debug info TPC inermost cl. row/pad --- .../include/GlobalTracking/MatchTPCITS.h | 6 +++- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 32 ++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Detectors/GlobalTracking/include/GlobalTracking/MatchTPCITS.h b/Detectors/GlobalTracking/include/GlobalTracking/MatchTPCITS.h index 8a8dbcb8fa9ae..00f2fc157a5ec 100644 --- a/Detectors/GlobalTracking/include/GlobalTracking/MatchTPCITS.h +++ b/Detectors/GlobalTracking/include/GlobalTracking/MatchTPCITS.h @@ -50,6 +50,7 @@ #include "ITSReconstruction/RecoGeomHelper.h" #include "TPCFastTransform.h" #include "GPUO2InterfaceRefit.h" +#include "GPUTPCGeometry.h" #include "GlobalTracking/MatchTPCITSParams.h" #include "DataFormatsITSMFT/TopologyDictionary.h" #include "DataFormatsITSMFT/TrkClusRef.h" @@ -132,6 +133,8 @@ struct TrackLocTPC : public o2::track::TrackParCov { int sourceID = 0; ///< TPC track origin in o2::dataformats::GlobalTrackID gid{}; // global track source ID (TPC track may be part of it) int matchID = MinusOne; ///< entry (non if MinusOne) of its matchTPC struct in the mMatchesTPC + uint8_t lowestRow = -1; + uint8_t padFromEdge = -1; Constraint_t constraint{Constrained}; float getCorrectedTime(float dt) const // return time0 corrected for extra drift (to match certain Z) @@ -143,7 +146,7 @@ struct TrackLocTPC : public o2::track::TrackParCov { return constraint == Constrained ? 0.f : (constraint == ASide ? dt : -dt); } - ClassDefNV(TrackLocTPC, 2); + ClassDefNV(TrackLocTPC, 3); }; ///< ITS track outward parameters propagated to reference X, with time bracket and index of @@ -738,6 +741,7 @@ class MatchTPCITS static constexpr float MaxSnp = 0.9; // max snp of ITS or TPC track at xRef to be matched static constexpr float MaxTgp = 2.064; // max tg corresponting to MaxSnp = MaxSnp/std::sqrt(1.-MaxSnp^2) static constexpr float MinTBToCleanCache = 600.; // keep in AB ITS cluster refs cache at most this number of TPC bins + static const o2::gpu::GPUTPCGeometry TPCGeometry; enum TimerIDs { SWTot, SWPrepITS, diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index eb2a2212edb30..436a12df51352 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -9,6 +9,13 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "GPUO2Interface.h" // Needed for propper settings in GPUParam.h +#include "GPUParam.h" +#include "GPUParam.inc" +#ifdef WITH_OPENMP +#include +#endif + #include #include #include @@ -50,13 +57,6 @@ #include "ITS3Reconstruction/IOUtils.h" #endif -#include "GPUO2Interface.h" // Needed for propper settings in GPUParam.h -#include "GPUParam.h" -#include "GPUParam.inc" -#ifdef WITH_OPENMP -#include -#endif - using namespace o2::globaltracking; using MatrixDSym4 = ROOT::Math::SMatrix>; @@ -68,6 +68,8 @@ constexpr float MatchTPCITS::Tan70, MatchTPCITS::Cos70I2, MatchTPCITS::MaxSnp, M LinksPoolMT* TPCABSeed::gLinksPool = nullptr; +const o2::gpu::GPUTPCGeometry MatchTPCITS::TPCGeometry{}; + //______________________________________________ MatchTPCITS::MatchTPCITS() = default; @@ -428,6 +430,12 @@ int MatchTPCITS::addTPCSeed(const o2::track::TrackParCov& _tr, float t0, float t if (clRow > mParams->askMinTPCRow[clSect]) { return -9; } + const auto& clus = mTPCClusterIdxStruct->clusters[clSect][clRow][clIdx]; + uint8_t padFromEdge = uint8_t(clus.getPad()); + if (padFromEdge > TPCGeometry.NPads(clRow) / 2) { + padFromEdge = TPCGeometry.NPads(clRow) - 1 - padFromEdge; + } + // create working copy of track param bool extConstrained = srcGID.getSource() != GTrackID::TPC; if (extConstrained) { @@ -442,6 +450,8 @@ int MatchTPCITS::addTPCSeed(const o2::track::TrackParCov& _tr, float t0, float t tpcID, srcGID, MinusOne, + clRow, + padFromEdge, (extConstrained || tpcOrig.hasBothSidesClusters()) ? TrackLocTPC::Constrained : (tpcOrig.hasASideClustersOnly() ? TrackLocTPC::ASide : TrackLocTPC::CSide)}); // propagate to matching Xref const auto& trackTune = TrackTuneParams::Instance(); @@ -2872,7 +2882,7 @@ void MatchTPCITS::dumpTPCOrig(bool acc, int tpcIndex) ///< fill debug tree for TPC original tracks (passing pT cut) mTimer[SWDBG].Start(false); const auto& tpcOrig = mTPCTracksArray[tpcIndex]; - uint8_t clSect = 0, clRow = 0, prevRow = 0xff; + uint8_t clSect = 0, clRow = 0, prevRow = 0xff, padFromEdge = -1; uint32_t clIdx = 0; int nshared = 0; std::array shMap{}; @@ -2888,6 +2898,11 @@ void MatchTPCITS::dumpTPCOrig(bool acc, int tpcIndex) prevRawShared = true; } } + const auto& clus = mTPCClusterIdxStruct->clusters[clSect][clRow][clIdx]; + padFromEdge = uint8_t(clus.getPad()); + if (padFromEdge > TPCGeometry.NPads(clRow) / 2) { + padFromEdge = TPCGeometry.NPads(clRow) - 1 - padFromEdge; + } int tb = tpcOrig.getTime0() * mNTPCOccBinLengthInv; float mltTPC = tb < 0 ? mTBinClOcc[0] : (tb >= mTBinClOcc.size() ? mTBinClOcc.back() : mTBinClOcc[tb]); (*mDBGOut) << "tpcOrig" @@ -2900,6 +2915,7 @@ void MatchTPCITS::dumpTPCOrig(bool acc, int tpcIndex) << "time0=" << tpcOrig.getTime0() << "trc=" << ((o2::track::TrackParCov&)tpcOrig) << "minRow=" << clRow + << "padFromEdge=" << padFromEdge << "multTPC=" << mltTPC; if (mMCTruthON) { (*mDBGOut) << "tpcOrig" From e7179fcb15cb0b9852f3abc22b4ebaa883d55cb2 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 29 Jan 2025 08:41:50 +0100 Subject: [PATCH 0027/1914] ITS-GPU: Cleanup for some host code (#13907) * Cleanup * Fix nCells printout --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 12 ++++- .../GPU/ITStrackingGPU/TrackingKernels.h | 26 +++++----- .../ITS/tracking/GPU/cuda/TimeFrameGPU.cu | 14 +++++- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 48 ++++++++----------- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 46 +++++++++--------- 5 files changed, 81 insertions(+), 65 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 066bef7631415..100e49def0d50 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -77,7 +77,8 @@ class TimeFrameGPU : public TimeFrame void createCellsDevice(); void createCellsLUTDevice(); void createNeighboursIndexTablesDevice(); - void createNeighboursDevice(const unsigned int& layer, std::vector>& neighbours); + void createNeighboursDevice(const unsigned int layer, const unsigned int nNeighbours); + void createNeighboursDevice(const unsigned int layer, std::vector>& neighbours); void createNeighboursLUTDevice(const int, const unsigned int); void createNeighboursDeviceArray(); void createTrackITSExtDevice(std::vector&); @@ -151,6 +152,9 @@ class TimeFrameGPU : public TimeFrame gsl::span getDeviceTracklet() { return mTrackletsDevice; } gsl::span getDeviceCells() { return mCellsDevice; } + // Overridden getters + int getNumberOfCells() const; + private: void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations bool mHostRegistered = false; @@ -252,6 +256,12 @@ inline std::vector TimeFrameGPU::getClusterSizes() return sizes; } +template +inline int TimeFrameGPU::getNumberOfCells() const +{ + return std::accumulate(mNCells.begin(), mNCells.end(), 0); +} + } // namespace gpu } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 78636d00788bf..720867ddaba29 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -148,19 +148,19 @@ void computeCellsHandler(const Cluster** sortedClusters, const int nBlocks, const int nThreads); -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUTs, - int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const float maxChi2ClusterAttachment, - const float bz, - const int layerIndex, - const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - const int nBlocks, - const int nThreads); +unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, + int* neighboursLUTs, + int** cellsLUTs, + gpuPair* cellNeighbours, + int* neighboursIndexTable, + const float maxChi2ClusterAttachment, + const float bz, + const int layerIndex, + const unsigned int nCells, + const unsigned int nCellsNext, + const int maxCellNeighbours, + const int nBlocks, + const int nThreads); void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, int* neighboursLUTs, diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu index fd067b9930fd0..b1aa55f533c34 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu @@ -406,7 +406,19 @@ void TimeFrameGPU::loadTrackSeedsDevice(std::vector& seeds) } template -void TimeFrameGPU::createNeighboursDevice(const unsigned int& layer, std::vector>& neighbours) +void TimeFrameGPU::createNeighboursDevice(const unsigned int layer, const unsigned int nNeighbours) +{ + START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "reserving neighbours"); + LOGP(debug, "gpu-allocation: reserving {} neighbours (pairs), for {} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / MB); + allocMemAsync(reinterpret_cast(&mNeighbourPairsDevice[layer]), nNeighbours * sizeof(gpuPair), &(mGpuStreams[0]), getExtAllocator()); + checkGPUError(cudaMemsetAsync(mNeighbourPairsDevice[layer], -1, nNeighbours * sizeof(gpuPair), mGpuStreams[0].get())); + LOGP(debug, "gpu-allocation: reserving {} neighbours, for {} MB.", nNeighbours, nNeighbours * sizeof(gpuPair) / MB); + allocMemAsync(reinterpret_cast(&mNeighboursDevice[layer]), nNeighbours * sizeof(int), &(mGpuStreams[0]), getExtAllocator()); + STOP_GPU_STREAM_TIMER(mGpuStreams[0].get()); +} + +template +void TimeFrameGPU::createNeighboursDevice(const unsigned int layer, std::vector>& neighbours) { START_GPU_STREAM_TIMER(mGpuStreams[0].get(), "reserving neighbours"); mCellsNeighbours[layer].clear(); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 4821ebb636f54..3c65faddcff71 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -212,36 +212,30 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) { mTimeFrameGPU->createNeighboursIndexTablesDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); - std::vector>> cellsNeighboursLayer(mTrkParams[iteration].CellsPerRoad() - 1); for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) { const int nextLayerCellsNum{static_cast(mTimeFrameGPU->getNCells()[iLayer + 1])}; - mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].clear(); - mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].resize(nextLayerCellsNum, 0); - // if (mTimeFrameGPU->getCells()[iLayer + 1].empty() || - // mTimeFrameGPU->getCellsLookupTable()[iLayer].empty()) { - // mTimeFrameGPU->getCellsNeighbours()[iLayer].clear(); - // continue; - // } + if (!nextLayerCellsNum) { + continue; + } mTimeFrameGPU->createNeighboursLUTDevice(iLayer, nextLayerCellsNum); - countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), - mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here. - mTimeFrameGPU->getDeviceArrayCellsLUT(), - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), - mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), - mTrkParams[0].MaxChi2ClusterAttachment, - mBz, - iLayer, - mTimeFrameGPU->getNCells()[iLayer], - nextLayerCellsNum, - 1e2, - conf.nBlocks, - conf.nThreads); - mTimeFrameGPU->downloadNeighboursLUTDevice(mTimeFrameGPU->getCellsNeighboursLUT()[iLayer], iLayer); - // Get the number of found cells from LUT - cellsNeighboursLayer[iLayer].resize(mTimeFrameGPU->getCellsNeighboursLUT()[iLayer].back()); - mTimeFrameGPU->createNeighboursDevice(iLayer, cellsNeighboursLayer[iLayer]); + unsigned int nNeigh = countCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), + mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), // LUT is initialised here. + mTimeFrameGPU->getDeviceArrayCellsLUT(), + mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + mTimeFrameGPU->getDeviceNeighboursIndexTables(iLayer), + mTrkParams[0].MaxChi2ClusterAttachment, + mBz, + iLayer, + mTimeFrameGPU->getNCells()[iLayer], + nextLayerCellsNum, + 1e2, + conf.nBlocks, + conf.nThreads); + + mTimeFrameGPU->createNeighboursDevice(iLayer, nNeigh); + computeCellNeighboursHandler(mTimeFrameGPU->getDeviceArrayCells(), mTimeFrameGPU->getDeviceNeighboursLUT(iLayer), mTimeFrameGPU->getDeviceArrayCellsLUT(), @@ -255,13 +249,11 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) 1e2, conf.nBlocks, conf.nThreads); - mTimeFrameGPU->getCellsNeighbours()[iLayer].clear(); - mTimeFrameGPU->getCellsNeighbours()[iLayer].reserve(cellsNeighboursLayer[iLayer].size()); filterCellNeighboursHandler(mTimeFrameGPU->getCellsNeighbours()[iLayer], mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), - cellsNeighboursLayer[iLayer].size()); + nNeigh); } mTimeFrameGPU->createNeighboursDeviceArray(); mTimeFrameGPU->unregisterRest(); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 10459cf800b6c..4fa7913c10e82 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -1066,19 +1066,19 @@ void computeCellsHandler( nSigmaCut); // const float } -void countCellNeighboursHandler(CellSeed** cellsLayersDevice, - int* neighboursLUT, - int** cellsLUTs, - gpuPair* cellNeighbours, - int* neighboursIndexTable, - const float maxChi2ClusterAttachment, - const float bz, - const int layerIndex, - const unsigned int nCells, - const unsigned int nCellsNext, - const int maxCellNeighbours, - const int nBlocks, - const int nThreads) +unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, + int* neighboursLUT, + int** cellsLUTs, + gpuPair* cellNeighbours, + int* neighboursIndexTable, + const float maxChi2ClusterAttachment, + const float bz, + const int layerIndex, + const unsigned int nCells, + const unsigned int nCellsNext, + const int maxCellNeighbours, + const int nBlocks, + const int nThreads) { gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, @@ -1091,8 +1091,7 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, layerIndex, nCells, maxCellNeighbours); - // gpuCheckError(cudaPeekAtLastError()); - // gpuCheckError(cudaDeviceSynchronize()); + void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; gpuCheckError(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage @@ -1102,17 +1101,19 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, nCellsNext)); // num_items discardResult(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - gpuCheckError(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items + gpuCheckError(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage + temp_storage_bytes, // temp_storage_bytes + neighboursLUT, // d_in + neighboursLUT, // d_out + nCellsNext)); // num_items + gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage temp_storage_bytes_2, // temp_storage_bytes neighboursIndexTable, // d_in neighboursIndexTable, // d_out nCells + 1, // num_items 0)); // NOLINT: this is the offset of the sum, not a pointer + discardResult(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2)); gpuCheckError(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage temp_storage_bytes_2, // temp_storage_bytes @@ -1120,10 +1121,11 @@ void countCellNeighboursHandler(CellSeed** cellsLayersDevice, neighboursIndexTable, // d_out nCells + 1, // num_items 0)); // NOLINT: this is the offset of the sum, not a pointer + unsigned int nNeighbours; + gpuCheckError(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost)); gpuCheckError(cudaFree(d_temp_storage)); gpuCheckError(cudaFree(d_temp_storage_2)); - gpuCheckError(cudaPeekAtLastError()); - gpuCheckError(cudaDeviceSynchronize()); + return nNeighbours; } void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, From dff469f9af9f2a8665b7680015402ba4b5e1637b Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 28 Jan 2025 14:35:16 +0100 Subject: [PATCH 0028/1914] Fix fake/correct status for barrel tracks The fMcMask bit 15 (fake global track label or TOF_label != TPC_lable) was wrong since original TOF cluster label (set in the reconstruction) was compared with TPC remapped label prepared for AOD storage. In fact, we don't need to consider separately the global_label.isFake and TOF-TPC mismach: TOF is the last detector in the matching process and the global track label is determined by the TPC track label. Hence, if the TOF match is present but its cluster is not contributed by the TPC track, the global label isFake will be necessarilly true, and vice versa. Also, the status of bit 13 (flagging ITS-TPC mismatch) was covering only track-to-track matches but not those from the afterburner. Now settings of fakeness relies on the isFake status from the reconstruction. --- .../GlobalTracking/src/RecoContainer.cxx | 2 +- .../AODProducerWorkflowSpec.h | 2 - Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 56 +++++-------------- 3 files changed, 15 insertions(+), 45 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index 60c18b966abed..c26de2bfda896 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1447,7 +1447,7 @@ RecoContainer::GlobalIDSet RecoContainer::getSingleDetectorRefs(GTrackID gidx) c table[GTrackID::MCH] = parent0.getMCHRef(); table[GTrackID::MID] = parent0.getMIDRef(); } - return std::move(table); + return table; } //________________________________________________________ diff --git a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h index 241846f1a9270..ae866b2006e7f 100644 --- a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h +++ b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h @@ -482,8 +482,6 @@ class AODProducerWorkflowDPL : public Task // using -1 as dummies for AOD struct MCLabels { uint32_t labelID = -1; - uint32_t labelITS = -1; - uint32_t labelTPC = -1; uint16_t labelMask = 0; uint8_t fwdLabelMask = 0; }; diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 6083052eb1168..c8e3209cc0e29 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -109,6 +109,7 @@ using PVertex = o2::dataformats::PrimaryVertex; using GIndex = o2::dataformats::VtxTrackIndex; using DataRequest = o2::globaltracking::DataRequest; using GID = o2::dataformats::GlobalTrackID; +using DetID = o2::detectors::DetID; using SMatrix55Sym = ROOT::Math::SMatrix>; namespace o2::aodproducer @@ -1058,9 +1059,9 @@ void AODProducerWorkflowDPL::fillMCTrackLabelsTable(MCTrackLabelCursorType& mcTr int vertexId) { // labelMask (temporary) usage: - // bit 13 -- ITS/TPC or TPC/TOF labels are not equal + // bit 13 -- ITS/TPC with ITS label (track of AB tracklet) different from TPC // bit 14 -- isNoise() == true - // bit 15 -- isFake() == true + // bit 15 -- isFake() == true (defined by the fakeness of the top level global track, i.e. if TOF is present, fake means that the track of the TPC label does not contribute to TOF cluster) // labelID = -1 -- label is not set for (int src = GIndex::NSources; src--;) { @@ -1084,7 +1085,7 @@ void AODProducerWorkflowDPL::fillMCTrackLabelsTable(MCTrackLabelCursorType& mcTr if (GIndex::includesSource(src, mInputSources)) { auto mcTruth = data.getTrackMCLabel(trackIndex); - MCLabels labelHolder; + MCLabels labelHolder{}; if ((src == GIndex::Source::MFT) || (src == GIndex::Source::MFTMCH) || (src == GIndex::Source::MCH) || (src == GIndex::Source::MCHMID)) { // treating mft and fwd labels separately if (!needToStore(src == GIndex::Source::MFT ? mGIDToTableMFTID : mGIDToTableFwdID)) { continue; @@ -1110,51 +1111,22 @@ void AODProducerWorkflowDPL::fillMCTrackLabelsTable(MCTrackLabelCursorType& mcTr continue; } if (mcTruth.isValid()) { // if not set, -1 will be stored - labelHolder.labelID = (mToStore[mcTruth.getSourceID()][mcTruth.getEventID()])[mcTruth.getTrackID()]; - } - // treating possible mismatches and fakes for global tracks - auto contributorsGID = data.getSingleDetectorRefs(trackIndex); - bool isSetTPC = contributorsGID[GIndex::Source::TPC].isIndexSet(); - bool isSetITS = contributorsGID[GIndex::Source::ITS].isIndexSet(); - bool isSetTOF = contributorsGID[GIndex::Source::TOF].isIndexSet(); - bool isTOFFake = true; - if (isSetTPC && (isSetITS || isSetTOF)) { - auto mcTruthTPC = data.getTrackMCLabel(contributorsGID[GIndex::Source::TPC]); - if (mcTruthTPC.isValid()) { - labelHolder.labelTPC = (mToStore[mcTruthTPC.getSourceID()][mcTruthTPC.getEventID()])[mcTruthTPC.getTrackID()]; - labelHolder.labelID = labelHolder.labelTPC; - } - if (isSetITS) { - auto mcTruthITS = data.getTrackMCLabel(contributorsGID[GIndex::Source::ITS]); - if (mcTruthITS.isValid()) { - labelHolder.labelITS = (mToStore[mcTruthITS.getSourceID()][mcTruthITS.getEventID()])[mcTruthITS.getTrackID()]; - } - if (labelHolder.labelITS != labelHolder.labelTPC) { - LOG(debug) << "ITS-TPC MCTruth: labelIDs do not match at " << trackIndex.getIndex() << ", src = " << src; - labelHolder.labelMask |= (0x1 << 13); - } + labelHolder.labelID = (mToStore[mcTruth.getSourceID()][mcTruth.getEventID()])[mcTruth.getTrackID()]; // defined by TPC if it contributes, otherwise: by ITS + if (mcTruth.isFake()) { + labelHolder.labelMask |= (0x1 << 15); } - if (isSetTOF) { - const auto& labelsTOF = data.getTOFClustersMCLabels()->getLabels(contributorsGID[GIndex::Source::TOF]); - for (auto& mcLabel : labelsTOF) { - if (!mcLabel.isValid()) { - continue; - } - if (mcLabel == labelHolder.labelTPC) { - isTOFFake = false; - break; + if (trackIndex.includesDet(DetID::TPC) && trackIndex.getSource() != GIndex::Source::TPC) { // this is global track + auto contributorsGID = data.getSingleDetectorRefs(trackIndex); + if (contributorsGID[GIndex::Source::ITSTPC].isIndexSet()) { // there is a match to ITS tracks or ITSAB tracklet! + if (data.getTrackMCLabel(contributorsGID[GIndex::Source::ITSTPC]).isFake()) { + labelHolder.labelMask |= (0x1 << 13); } } } - } - if (mcTruth.isFake() || (isSetTOF && isTOFFake)) { - labelHolder.labelMask |= (0x1 << 15); - } - if (mcTruth.isNoise()) { + } else if (mcTruth.isNoise()) { labelHolder.labelMask |= (0x1 << 14); } - mcTrackLabelCursor(labelHolder.labelID, - labelHolder.labelMask); + mcTrackLabelCursor(labelHolder.labelID, labelHolder.labelMask); } } } From f6ce9aecb14845880e7d649e8ff91d095611a6a1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 29 Jan 2025 09:37:14 +0100 Subject: [PATCH 0029/1914] Revert "DPL Analysis: remove unnecessary instances of selected_pack (#13892)" (#13913) This reverts commit 53ed758f32eaf659b7a9dc0a7c701fcd300a7544. --- Framework/Core/include/Framework/ASoA.h | 115 ++++++------------ .../Core/include/Framework/Configurable.h | 5 +- .../include/Framework/GroupedCombinations.h | 22 +--- 3 files changed, 41 insertions(+), 101 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 8ef7ed9539ec7..8af872a64176d 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -208,6 +208,8 @@ template struct TableMetadata { using columns = framework::pack; using persistent_columns_t = framework::selected_pack; + using external_index_columns_t = framework::selected_pack; + using internal_index_columns_t = framework::selected_pack; template static consteval std::array getMap(framework::pack) @@ -804,6 +806,9 @@ concept is_dynamic_column = requires(C& c) { template concept is_marker_column = requires { &C::mark; }; +template +using is_dynamic_t = std::conditional_t, std::true_type, std::false_type>; + template concept is_column = is_persistent_column || is_dynamic_column || is_indexing_column || is_marker_column; @@ -1026,17 +1031,6 @@ concept can_bind = requires(T&& t) { template concept has_index = (is_indexing_column || ...); -template - requires(!is_self_index_column) -consteval auto getBinding() -> typename C::binding_t -{ -} - -template -consteval auto getBinding() -> void -{ -} - template struct TableIterator : IP, C... { public: @@ -1044,9 +1038,9 @@ struct TableIterator : IP, C... { using policy_t = IP; using all_columns = framework::pack; using persistent_columns_t = framework::selected_pack; - using bindings_pack_t = decltype([](framework::pack) { - return framework::pack())...>{}; - }(all_columns{})); + using external_index_columns_t = framework::selected_pack; + using internal_index_columns_t = framework::selected_pack; + using bindings_pack_t = decltype([](framework::pack) -> framework::pack {}(external_index_columns_t{})); // decltype(extractBindings(external_index_columns_t{})); TableIterator(arrow::ChunkedArray* columnData[sizeof...(C)], IP&& policy) : IP{policy}, @@ -1139,13 +1133,7 @@ struct TableIterator : IP, C... { template void doSetCurrentIndex(framework::pack, TA* current) { - (framework::overloaded{ - [¤t, this] - requires(!is_self_index_column) - () { CI::setCurrent(current); }, - []() {}} - .template operator()(), - ...); + (CL::setCurrent(current), ...); } template @@ -1157,40 +1145,24 @@ struct TableIterator : IP, C... { template auto getIndexBindingsImpl(framework::pack) const { - std::vector result; - (framework::overloaded{ - [this, &result] - requires(!is_self_index_column) - () mutable { - result.emplace_back(CI::getCurrentRaw()); - }, - []() {}} - .template operator()(), - ...); - return result; + return std::vector{static_cast(*this).getCurrentRaw()...}; } auto getIndexBindings() const { - return getIndexBindingsImpl(all_columns{}); + return getIndexBindingsImpl(external_index_columns_t{}); } template void bindExternalIndices(TA*... current) { - (doSetCurrentIndex(all_columns{}, current), ...); + (doSetCurrentIndex(external_index_columns_t{}, current), ...); } template void doSetCurrentIndexRaw(framework::pack p, std::vector&& ptrs) { - (framework::overloaded{ - [&ptrs, p, this] - requires(!is_self_index_column) - () { CI::setCurrentRaw(ptrs[framework::has_type_at_v(p)]); }, - []() {}} - .template operator()(), - ...); + (Cs::setCurrentRaw(ptrs[framework::has_type_at_v(p)]), ...); } template @@ -1198,22 +1170,18 @@ struct TableIterator : IP, C... { { o2::soa::Binding b; b.bind(ptr); - (framework::overloaded{ - [&ptr, &b, this]() { CI::setCurrentRaw(b); }, - []() {}} - .template operator()(), - ...); + (Cs::setCurrentRaw(b), ...); } void bindExternalIndicesRaw(std::vector&& ptrs) { - doSetCurrentIndexRaw(all_columns{}, std::forward>(ptrs)); + doSetCurrentIndexRaw(external_index_columns_t{}, std::forward>(ptrs)); } template void bindInternalIndices(I const* table) { - doSetCurrentInternal(all_columns{}, table); + doSetCurrentInternal(internal_index_columns_t{}, table); } private: @@ -1397,37 +1365,25 @@ static constexpr std::string getLabelFromTypeForKey(std::string const& key) template consteval static bool hasIndexTo(framework::pack&&) { - return (framework::overloaded{ - [] - requires(!is_self_index_column) - () { return o2::soa::is_binding_compatible_v(); }, - []() { return false; }} - .template operator()() || - ...); + return (o2::soa::is_binding_compatible_v() || ...); } template consteval static bool hasSortedIndexTo(framework::pack&&) { - return (framework::overloaded{ - [] - requires(!is_self_index_column) - () { return (CI::sorted && o2::soa::is_binding_compatible_v()); }, - []() {}} - .template operator()() || - ...); + return ((C::sorted && o2::soa::is_binding_compatible_v()) || ...); } template consteval static bool relatedByIndex() { - return hasIndexTo(typename Z::table_t::columns_t{}); + return hasIndexTo(typename Z::table_t::external_index_columns_t{}); } template consteval static bool relatedBySortedIndex() { - return hasSortedIndexTo(typename Z::table_t::columns_t{}); + return hasSortedIndexTo(typename Z::table_t::external_index_columns_t{}); } } // namespace o2::soa @@ -1770,13 +1726,16 @@ class Table using persistent_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); using column_types = decltype([](framework::pack) -> framework::pack {}(persistent_columns_t{})); + using external_index_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); + using internal_index_columns_t = decltype([](framework::pack&&) -> framework::selected_pack {}(columns_t{})); template using base_iterator = decltype(base_iter(columns_t{})); template struct TableIteratorBase : base_iterator { using columns_t = typename Parent::columns_t; - using bindings_pack_t = typename base_iterator::bindings_pack_t; + using external_index_columns_t = typename Parent::external_index_columns_t; + using bindings_pack_t = decltype([](framework::pack) -> framework::pack {}(external_index_columns_t{})); // static constexpr const std::array originals{T::ref...}; static constexpr auto originals = Parent::originals; using policy_t = IP; @@ -1869,7 +1828,7 @@ class Table using decayed = std::decay_t; if constexpr (framework::has_type(bindings_pack_t{})) { // index to another table constexpr auto idx = framework::has_type_at_v(bindings_pack_t{}); - return framework::pack_element_t::getId(); + return framework::pack_element_t::getId(); } else if constexpr (std::same_as) { // self index return this->globalIndex(); } else if constexpr (is_indexing_column) { // soa::Index<> @@ -1879,17 +1838,20 @@ class Table } } - template + template auto getDynamicColumn() const { - return static_cast>(*this).template getDynamicValue(); + using decayed = std::decay_t; + static_assert(is_dynamic_t(), "Requested column is not a dynamic column"); + return static_cast(*this).template getDynamicValue(); } template - requires(is_dynamic_column || is_persistent_column) auto getValue() const { - return static_cast(static_cast>(*this).get()); + using COL = std::decay_t; + static_assert(is_dynamic_t() || soa::is_persistent_column, "Should be persistent or dynamic column with no argument that has a return type convertable to float"); + return static_cast(static_cast(*this).get()); } template @@ -2094,17 +2056,13 @@ class Table void bindInternalIndicesExplicit(o2::soa::Binding binding) { - doBindInternalIndicesExplicit(columns_t{}, binding); + doBindInternalIndicesExplicit(internal_index_columns_t{}, binding); } template void doBindInternalIndicesExplicit(framework::pack, o2::soa::Binding binding) { - (framework::overloaded{ - [this, &binding]() { static_cast(mBegin).setCurrentRaw(binding); }, - []() {}} - .template operator()(), - ...); + (static_cast(mBegin).setCurrentRaw(binding), ...); } void bindExternalIndicesRaw(std::vector&& ptrs) @@ -2121,7 +2079,7 @@ class Table template void copyIndexBindings(T& dest) const { - doCopyIndexBindings(columns_t{}, dest); + doCopyIndexBindings(external_index_columns_t{}, dest); } auto select(framework::expressions::Filter const& f) const @@ -3340,6 +3298,7 @@ class FilteredBase : public T using T::originals; using columns_t = typename T::columns_t; using persistent_columns_t = typename T::persistent_columns_t; + using external_index_columns_t = typename T::external_index_columns_t; using iterator = T::template iterator_template_o; using unfiltered_iterator = T::template iterator_template_o; @@ -3485,7 +3444,7 @@ class FilteredBase : public T template void copyIndexBindings(T1& dest) const { - doCopyIndexBindings(columns_t{}, dest); + doCopyIndexBindings(external_index_columns_t{}, dest); } template diff --git a/Framework/Core/include/Framework/Configurable.h b/Framework/Core/include/Framework/Configurable.h index 930c37e700105..88e50cf3c7c26 100644 --- a/Framework/Core/include/Framework/Configurable.h +++ b/Framework/Core/include/Framework/Configurable.h @@ -83,9 +83,6 @@ struct Configurable : IP { template using MutableConfigurable = Configurable>; -template -concept is_configurable = requires(T& t) { &T::operator typename T::type; }; - using ConfigurableAxis = Configurable, ConfigParamKind::kAxisSpec, ConfigurablePolicyConst, ConfigParamKind::kAxisSpec>>; template @@ -100,7 +97,7 @@ struct ProcessConfigurable : Configurable { }; template -concept is_process_configurable = is_configurable && requires(T& t) { t.process; }; +concept is_process_configurable = base_of_template; #define PROCESS_SWITCH(_Class_, _Name_, _Help_, _Default_) \ decltype(ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}) do##_Name_ = ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}; diff --git a/Framework/Core/include/Framework/GroupedCombinations.h b/Framework/Core/include/Framework/GroupedCombinations.h index 9f450489ac50f..21d8384e3aa6e 100644 --- a/Framework/Core/include/Framework/GroupedCombinations.h +++ b/Framework/Core/include/Framework/GroupedCombinations.h @@ -34,30 +34,14 @@ auto interleaveTuples(std::tuple& t1, std::tuple& t2) return interleaveTuplesImpl(t1, t2, std::index_sequence_for()); } -template - requires(!soa::is_self_index_column) -consteval auto isIndexTo() -{ - if constexpr (o2::soa::is_binding_compatible_v()) { - return std::true_type{}; - } else { - return std::false_type{}; - } -} - -template -consteval auto isIndexTo() -{ - return std::false_type{}; -} - template -using is_index_to_g_t = decltype(isIndexTo()); +using is_index_to_g_t = typename std::conditional(), std::true_type, std::false_type>::type; template expressions::BindingNode getMatchingIndexNode() { - using selected_indices_t = selected_pack_multicondition, typename A::columns_t>; + using external_index_columns_pack = typename A::external_index_columns_t; + using selected_indices_t = selected_pack_multicondition, external_index_columns_pack>; static_assert(pack_size(selected_indices_t{}) == 1, "No matching index column from associated to grouping"); using index_column_t = pack_head_t; return expressions::BindingNode{index_column_t::mLabel, o2::framework::TypeIdHelpers::uniqueId(), expressions::selectArrowType()}; From 20eb9d23bc6cfa09b0b9fd0d438269689197f9c7 Mon Sep 17 00:00:00 2001 From: noferini <9963644+noferini@users.noreply.github.com> Date: Thu, 9 Jan 2025 19:31:08 +0100 Subject: [PATCH 0030/1914] improve TOF infos in TPCtimeseries --- .../ReconstructionDataFormats/MatchInfoTOF.h | 11 +++ .../include/TPCWorkflow/TPCTimeSeriesSpec.h | 2 +- .../TPC/workflow/src/TPCTimeSeriesSpec.cxx | 80 +++++++++++++++++-- .../TPC/workflow/src/tpc-time-series.cxx | 4 +- prodtests/full-system-test/calib-workflow.sh | 2 +- 5 files changed, 90 insertions(+), 9 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/MatchInfoTOF.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/MatchInfoTOF.h index 3b872374dd35a..1816e8604c0be 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/MatchInfoTOF.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/MatchInfoTOF.h @@ -75,6 +75,17 @@ class MatchInfoTOF double getT0true() const { return mT0true; } void setT0true(double val) { mT0true = val; } + enum QualityFlags { isMultiHitX = 0x1 << 0, + isMultiHitZ = 0x1 << 1, + badDy = 0x1 << 2, + isMultiStrip = 0x1 << 3, + isNotInPad = 0x1 << 4, + chiGT3 = 0x1 << 5, + chiGT5 = 0x1 << 6, + hasT0sameBC = 0x1 << 7, + hasT0_1BCbefore = 0x1 << 8, + hasT0_2BCbefore = 0x1 << 9 }; + private: int mIdLocal; // track id in sector of the pair track-TOFcluster float mChi2; // chi2 of the pair track-TOFcluster diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h index d7da0b9acb343..3a61472d4bd8a 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h @@ -23,7 +23,7 @@ namespace tpc static constexpr header::DataDescription getDataDescriptionTimeSeries() { return header::DataDescription{"TIMESERIES"}; } static constexpr header::DataDescription getDataDescriptionTPCTimeSeriesTFId() { return header::DataDescription{"ITPCTSTFID"}; } -o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, o2::dataformats::GlobalTrackID::mask_t src); +o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, o2::dataformats::GlobalTrackID::mask_t src, bool useft0 = false); } // end namespace tpc } // end namespace o2 diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index c34cc0b7e9863..b2bf6d824ab86 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -45,6 +45,7 @@ #include "TROOT.h" #include "ReconstructionDataFormats/MatchInfoTOF.h" #include "DataFormatsTOF/Cluster.h" +#include "DataFormatsFT0/RecPoints.h" using namespace o2::globaltracking; using GTrackID = o2::dataformats::GlobalTrackID; @@ -206,25 +207,86 @@ class TPCTimeSeries : public Task indicesITSTPC[tracksITSTPC[i].getRefTPC().getIndex()] = {i, idxVtx}; } - std::vector> idxTPCTrackToTOFCluster; // store for each tpc track index the index to the TOF cluster + std::vector> idxTPCTrackToTOFCluster; // store for each tpc track index the index to the TOF cluster // get matches to TOF in case skimmed data is produced if (mUnbinnedWriter) { // getLTIntegralOut(), ///< L,TOF integral calculated during the propagation // getSignal() mSignal = 0.0; ///< TOF time in ps o2::track::TrackLTIntegral defLT; - idxTPCTrackToTOFCluster = std::vector>(tracksTPC.size(), {-1, -999, -999, defLT, 0, 0}); + idxTPCTrackToTOFCluster = std::vector>(tracksTPC.size(), {-1, -999, -999, defLT, 0, 0, 0}); const std::vector> tofMatches{recoData.getTPCTOFMatches(), recoData.getTPCTRDTOFMatches(), recoData.getITSTPCTOFMatches(), recoData.getITSTPCTRDTOFMatches()}; + const auto& ft0rec = recoData.getFT0RecPoints(); + // fill available FT0-AC event times vs BClong + std::map t0array; + for (const auto& t0 : ft0rec) { + if (!(t0.isValidTime(1) && t0.isValidTime(2))) { // skip if !(A & C) + continue; + } + + auto bclong = t0.mIntRecord.differenceInBC(recoData.startIR); + if (t0array.find(bclong) == t0array.end()) { // add if it doesn't exist + t0array.emplace(std::make_pair(bclong, t0.getCollisionTime(0))); + } + } + + static const double BC_TIME_INPS_INV = 1E-3 / o2::constants::lhc::LHCBunchSpacingNS; + // loop over ITS-TPC-TRD-TOF and ITS-TPC-TOF tracks an store for each ITS-TPC track the TOF track index for (const auto& tofMatch : tofMatches) { for (const auto& tpctofmatch : tofMatch) { auto refTPC = recoData.getTPCContributorGID(tpctofmatch.getTrackRef()); if (refTPC.isIndexSet()) { o2::track::TrackLTIntegral ltIntegral = tpctofmatch.getLTIntegralOut(); - double signal = tpctofmatch.getSignal(); + ULong64_t bclongtof = (tpctofmatch.getSignal() - 10000) * BC_TIME_INPS_INV; + double t0 = 0; // bclongtof * o2::constants::lhc::LHCBunchSpacingNS * 1E3; // if you want to subtract also the BC uncomment this part (-> tofsignal can be a float) + unsigned int mask = 0; + if (!(t0array.find(bclongtof) == t0array.end())) { // subtract FT0-AC if it exists in the same BC + t0 += t0array.find(bclongtof)->second; + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::hasT0sameBC; // 8th bit if FT0-AC in same BC + } + + double signal = tpctofmatch.getSignal() - t0; float deltaT = tpctofmatch.getDeltaT(); - idxTPCTrackToTOFCluster[refTPC] = {tpctofmatch.getIdxTOFCl(), tpctofmatch.getDXatTOF(), tpctofmatch.getDZatTOF(), ltIntegral, signal, deltaT}; + + float dy = tpctofmatch.getDYatTOF(); // residual orthogonal to the strip (it should be close to zero) + bool isMultiHitZ = tpctofmatch.getHitPatternUpDown(); + bool isMultiHitX = tpctofmatch.getHitPatternLeftRight(); + bool isMultiStripMatch = tpctofmatch.getChi2() < 1E-9; + float chi2 = tpctofmatch.getChi2(); + bool hasT0_1BCbefore = (t0array.find(bclongtof - 1) != t0array.end()); + bool hasT0_2BCbefore = (t0array.find(bclongtof - 2) != t0array.end()); + + if (isMultiHitX) { // 1nd bit on if multiple hits along X + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::isMultiHitX; + } + if (isMultiHitZ) { // 2nd bit on if multiple hits along Z + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::isMultiHitZ; + } + if (fabs(dy) > 0.5) { // 3rd bit on if Y-residual too large + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::badDy; + } + if (isMultiStripMatch) { // 4th bit on if two strips fired + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::isMultiStrip; + } + if (chi2 > 1E-4) { // 5th bit on if chi2 > 1E-4 -> not inside the pad + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::isNotInPad; + } + if (chi2 > 3) { // 6th bit on if chi2 > 3 + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::chiGT3; + } + if (chi2 > 5) { // 7th bit on if chi2 > 5 + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::chiGT5; + } + if (hasT0_1BCbefore) { // 9th bit if FT0-AC also BC before + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::hasT0_1BCbefore; + } + if (hasT0_2BCbefore) { // 10th bit if FT0-AC also 2BCs before + mask |= o2::dataformats::MatchInfoTOF::QualityFlags::hasT0_1BCbefore; + } + + idxTPCTrackToTOFCluster[refTPC] = {tpctofmatch.getIdxTOFCl(), tpctofmatch.getDXatTOF(), tpctofmatch.getDZatTOF(), ltIntegral, signal, deltaT, mask}; } } } @@ -1055,7 +1117,7 @@ class TPCTimeSeries : public Task return isGoodTrack; } - void fillDCA(const gsl::span tracksTPC, const gsl::span tracksITSTPC, const gsl::span vertices, const int iTrk, const int iThread, const std::unordered_map>& indicesITSTPC, const gsl::span tracksITS, const std::vector>& idxTPCTrackToTOFCluster, const gsl::span tofClusters) + void fillDCA(const gsl::span tracksTPC, const gsl::span tracksITSTPC, const gsl::span vertices, const int iTrk, const int iThread, const std::unordered_map>& indicesITSTPC, const gsl::span tracksITS, const std::vector>& idxTPCTrackToTOFCluster, const gsl::span tofClusters) { const auto& trackFull = tracksTPC[iTrk]; const bool isGoodTrack = checkTrack(trackFull); @@ -1444,6 +1506,7 @@ class TPCTimeSeries : public Task << "mDeltaTTOFTPC=" << std::get<5>(idxTPCTrackToTOFCluster[iTrk]) /// delta T- TPC TOF << "vertexTime=" << vertexTime /// time stamp assigned to the vertex << "trackTime0=" << trackTime0 /// time stamp assigned to the track + << "TOFmask=" << std::get<6>(idxTPCTrackToTOFCluster[iTrk]) /// delta T- TPC TOF // TPC delta param << "deltaTPCParamInOutTgl=" << deltaTPCParamInOutTgl << "deltaTPCParamInOutQPt=" << deltaTPCParamInOutQPt @@ -1751,7 +1814,7 @@ class TPCTimeSeries : public Task } }; -o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, GTrackID::mask_t src) +o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, GTrackID::mask_t src, bool useft0) { auto dataRequest = std::make_shared(); bool useMC = false; @@ -1759,6 +1822,11 @@ o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, srcTracks.set(GTrackID::TPC); // TPC must be always there dataRequest->requestTracks(srcTracks, useMC); dataRequest->requestClusters(GTrackID::getSourcesMask("TPC"), useMC); + + if (useft0) { + dataRequest->requestFT0RecPoints(false); + } + bool tpcOnly = srcTracks == GTrackID::getSourcesMask("TPC"); if (!tpcOnly) { dataRequest->requestPrimaryVertices(useMC); diff --git a/Detectors/TPC/workflow/src/tpc-time-series.cxx b/Detectors/TPC/workflow/src/tpc-time-series.cxx index f7bcf00cb27ea..65345fbfefb10 100644 --- a/Detectors/TPC/workflow/src/tpc-time-series.cxx +++ b/Detectors/TPC/workflow/src/tpc-time-series.cxx @@ -29,6 +29,7 @@ void customize(std::vector& workflowOptions) {"disable-root-output", VariantType::Bool, false, {"disable root-files output writers"}}, {"enable-unbinned-root-output", VariantType::Bool, false, {"writing out unbinned track data"}}, {"track-sources", VariantType::String, std::string{o2::dataformats::GlobalTrackID::ALL}, {"comma-separated list of sources to use"}}, + {"use-ft0", VariantType::Bool, false, {"enable FT0 rec-points"}}, {"material-type", VariantType::Int, 2, {"Type for the material budget during track propagation: 0=None, 1=Geo, 2=LUT"}}}; std::swap(workflowOptions, options); } @@ -43,7 +44,8 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) const bool enableUnbinnedWriter = config.options().get("enable-unbinned-root-output"); auto src = o2::dataformats::GlobalTrackID::getSourcesMask(config.options().get("track-sources")); auto materialType = static_cast(config.options().get("material-type")); - workflow.emplace_back(o2::tpc::getTPCTimeSeriesSpec(disableWriter, materialType, enableUnbinnedWriter, src)); + const bool useft0 = config.options().get("use-ft0"); + workflow.emplace_back(o2::tpc::getTPCTimeSeriesSpec(disableWriter, materialType, enableUnbinnedWriter, src, useft0)); if (!disableWriter) { workflow.emplace_back(o2::tpc::getTPCTimeSeriesWriterSpec()); } diff --git a/prodtests/full-system-test/calib-workflow.sh b/prodtests/full-system-test/calib-workflow.sh index 5687a7e121ed7..16a5209ed3514 100755 --- a/prodtests/full-system-test/calib-workflow.sh +++ b/prodtests/full-system-test/calib-workflow.sh @@ -54,7 +54,7 @@ if [[ $CALIB_ASYNC_EXTRACTTPCCURRENTS == 1 ]]; then add_W o2-tpc-integrate-cluster-workflow "${CONFIG_CTPTPC}" fi if [[ $CALIB_ASYNC_EXTRACTTIMESERIES == 1 ]] ; then - CONFIG_TPCTIMESERIES= + CONFIG_TPCTIMESERIES=" --use-ft0" : ${CALIB_ASYNC_SAMPLINGFACTORTIMESERIES:=0.001} if [[ ! -z ${CALIB_ASYNC_ENABLEUNBINNEDTIMESERIES:-} ]]; then CONFIG_TPCTIMESERIES+=" --enable-unbinned-root-output --sample-unbinned-tsallis --threads ${TPCTIMESERIES_THREADS:-1}" From d1f1fe9884c8f7a9006b02c29b027fa72f58d4f5 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 29 Jan 2025 13:29:23 +0100 Subject: [PATCH 0031/1914] DPL: drop duplicated code (#13914) --- Framework/Core/src/DPLMonitoringBackend.cxx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Framework/Core/src/DPLMonitoringBackend.cxx b/Framework/Core/src/DPLMonitoringBackend.cxx index 354c9fceef963..5fc0ff99703ba 100644 --- a/Framework/Core/src/DPLMonitoringBackend.cxx +++ b/Framework/Core/src/DPLMonitoringBackend.cxx @@ -13,20 +13,13 @@ #include "Framework/DriverClient.h" #include "Framework/ServiceRegistry.h" #include "Framework/RuntimeError.h" +#include "Framework/VariantHelpers.h" #include #include namespace o2::framework { -template -struct overloaded : Ts... { - using Ts::operator()...; -}; -template -overloaded(Ts...) -> overloaded; - - DPLMonitoringBackend::DPLMonitoringBackend(ServiceRegistryRef registry) : mRegistry{registry} { From 933c8d01e667ea5816f9b6e7b40d1c83e5494aea Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 27 Aug 2024 13:39:59 +0200 Subject: [PATCH 0032/1914] COMMON: Fix flat idx (signed flip) Signed-off-by: Felix Schlepper --- Common/MathUtils/include/MathUtils/LegendrePols.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/MathUtils/include/MathUtils/LegendrePols.h b/Common/MathUtils/include/MathUtils/LegendrePols.h index dca71c1275962..d5e6a259573f4 100644 --- a/Common/MathUtils/include/MathUtils/LegendrePols.h +++ b/Common/MathUtils/include/MathUtils/LegendrePols.h @@ -200,7 +200,7 @@ class Legendre2DPolynominal final : public TNamed, boost::math::legendre_p(i - j, y); } - inline int getFlatIdx(int i, int j) const { return i * (i - 1) / 2 + j; } + inline int getFlatIdx(int i, int j) const { return i * (i + 1) / 2 + j; } unsigned int fOrder{0}; std::vector fParams; From 7d4b3dc05325fef68bb47cebe6287627197efc38 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 29 Jan 2025 01:06:52 +0100 Subject: [PATCH 0033/1914] Move XRD debug settings to copy shell command To avoid interference of ALIENPY_DEBUG and XRD_LOGLEVEL set for CTF copy failures debugging with CCDB fetches from alien, which pollute stdout. --- Common/Utils/src/FileFetcher.cxx | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Common/Utils/src/FileFetcher.cxx b/Common/Utils/src/FileFetcher.cxx index 048402712ccd7..32c51ac704d4b 100644 --- a/Common/Utils/src/FileFetcher.cxx +++ b/Common/Utils/src/FileFetcher.cxx @@ -329,6 +329,7 @@ bool FileFetcher::copyFile(size_t id) bool aliencpMode = false; std::string uuid{}; std::vector logsToClean; + std::string dbgset{}; if (mCopyCmd.find("alien") != std::string::npos) { if (!gGrid && !TGrid::Connect("alien://")) { LOG(error) << "Copy command refers to alien but connection to Grid failed"; @@ -339,18 +340,18 @@ bool FileFetcher::copyFile(size_t id) c = '_'; } } - gSystem->Setenv("ALIENPY_DEBUG", "1"); - logsToClean.push_back(fmt::format("log_alienpy_{}.txt", uuid)); - gSystem->Setenv("ALIENPY_DEBUG_FILE", logsToClean.back().c_str()); - gSystem->Setenv("XRD_LOGLEVEL", "Dump"); - logsToClean.push_back(fmt::format("log_xrd_{}.txt", uuid)); - gSystem->Setenv("XRD_LOGFILE", logsToClean.back().c_str()); - LOGP(info, "debug log files for {}: ALIENPY_DEBUG_FILE={} XRD_LOGFILE={}", mInputFiles[id].getOrigName(), - getenv("ALIENPY_DEBUG_FILE") ? getenv("ALIENPY_DEBUG_FILE") : "", - getenv("XRD_LOGFILE") ? getenv("XRD_LOGFILE") : ""); + if (!(getenv("ALIENPY_DEBUG") && std::stoi(getenv("ALIENPY_DEBUG")) == 1)) { + logsToClean.push_back(fmt::format("log_alienpy_{}.txt", uuid)); + dbgset += fmt::format("ALIENPY_DEBUG=1 ALIENPY_DEBUG_FILE={} ", logsToClean.back()); + } + if (!(getenv("XRD_LOGLEVEL") && strcmp(getenv("XRD_LOGLEVEL"), "Dump") == 0)) { + logsToClean.push_back(fmt::format("log_xrd_{}.txt", uuid)); + dbgset += fmt::format("XRD_LOGLEVEL=Dump XRD_LOGFILE={} ", logsToClean.back()); + } + LOGP(debug, "debug setting for for {}: {}", mInputFiles[id].getOrigName(), dbgset); } auto realCmd = std::regex_replace(std::regex_replace(mCopyCmd, std::regex(R"(\?src)"), mInputFiles[id].getOrigName()), std::regex(R"(\?dst)"), mInputFiles[id].getLocalName()); - auto fullCmd = fmt::format(R"(sh -c "{}" >> {} 2>&1)", realCmd, mCopyCmdLogFile); + auto fullCmd = fmt::format(R"(sh -c "{}{}" >> {} 2>&1)", dbgset, realCmd, mCopyCmdLogFile); LOG(info) << "Executing " << fullCmd; const auto sysRet = gSystem->Exec(fullCmd.c_str()); if (sysRet != 0) { From b00bfe599fb0e569d609e9b21e2ea360080c0f95 Mon Sep 17 00:00:00 2001 From: Maximiliano Puccio Date: Thu, 30 Jan 2025 10:00:44 +0100 Subject: [PATCH 0034/1914] Optionally add time of FIT channels to AO2D (#13857) Add output spec --- .../AODProducerWorkflowSpec.h | 8 ++- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 65 ++++++++++++++----- Detectors/AOD/src/aod-producer-workflow.cxx | 4 +- .../include/Framework/AnalysisDataModel.h | 20 ++++++ 4 files changed, 78 insertions(+), 19 deletions(-) diff --git a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h index ae866b2006e7f..959aed28cab6b 100644 --- a/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h +++ b/Detectors/AOD/include/AODProducerWorkflow/AODProducerWorkflowSpec.h @@ -215,7 +215,7 @@ enum struct AODProducerStreamerFlags : uint8_t { class AODProducerWorkflowDPL : public Task { public: - AODProducerWorkflowDPL(GID::mask_t src, std::shared_ptr dataRequest, std::shared_ptr gr, bool enableSV, bool useMC = true) : mUseMC(useMC), mEnableSV(enableSV), mInputSources(src), mDataRequest(dataRequest), mGGCCDBRequest(gr) {} + AODProducerWorkflowDPL(GID::mask_t src, std::shared_ptr dataRequest, std::shared_ptr gr, bool enableSV, bool useMC = true, bool enableFITextra = false) : mUseMC(useMC), mEnableSV(enableSV), mEnableFITextra(enableFITextra), mInputSources(src), mDataRequest(dataRequest), mGGCCDBRequest(gr) {} ~AODProducerWorkflowDPL() override = default; void init(InitContext& ic) final; void run(ProcessingContext& pc) final; @@ -254,6 +254,7 @@ class AODProducerWorkflowDPL : public Task int mNThreads = 1; bool mUseMC = true; bool mEnableSV = true; // enable secondary vertices + bool mEnableFITextra = false; bool mFieldON = false; const float cSpeed = 0.029979246f; // speed of light in TOF units @@ -370,8 +371,11 @@ class AODProducerWorkflowDPL : public Task uint32_t mMuonCl = 0xFFFFFF00; // 15 bits uint32_t mMuonClErr = 0xFFFF0000; // 7 bits uint32_t mV0Time = 0xFFFFF000; // 11 bits + uint32_t mV0ChannelTime = 0xFFFFFF00; // 15 bits uint32_t mFDDTime = 0xFFFFF000; // 11 bits + uint32_t mFDDChannelTime = 0xFFFFFF00; // 15 bits uint32_t mT0Time = 0xFFFFFF00; // 15 bits + uint32_t mT0ChannelTime = 0xFFFFFFF0; // 19 bits uint32_t mV0Amplitude = 0xFFFFF000; // 11 bits uint32_t mFDDAmplitude = 0xFFFFF000; // 11 bits uint32_t mT0Amplitude = 0xFFFFF000; // 11 bits @@ -668,7 +672,7 @@ class AODProducerWorkflowDPL : public Task }; /// create a processor spec -framework::DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, bool enableST, bool useMC, bool CTPConfigPerRun); +framework::DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, bool enableST, bool useMC, bool CTPConfigPerRun, bool enableFITextra); // helper interface for calo cells to "befriend" emcal and phos cells class CellHelper diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index c8e3209cc0e29..9f0a64f96b876 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -1743,8 +1743,11 @@ void AODProducerWorkflowDPL::init(InitContext& ic) mMuonCl = 0xFFFFFFFF; mMuonClErr = 0xFFFFFFFF; mV0Time = 0xFFFFFFFF; + mV0ChannelTime = 0xFFFFFFFF; mFDDTime = 0xFFFFFFFF; + mFDDChannelTime = 0xFFFFFFFF; mT0Time = 0xFFFFFFFF; + mT0ChannelTime = 0xFFFFFFFF; mV0Amplitude = 0xFFFFFFFF; mFDDAmplitude = 0xFFFFFFFF; mT0Amplitude = 0xFFFFFFFF; @@ -1829,8 +1832,11 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) auto trackedV0Cursor = createTableCursor(pc); auto tracked3BodyCurs = createTableCursor(pc); auto fddCursor = createTableCursor(pc); + auto fddExtraCursor = createTableCursor(pc); auto ft0Cursor = createTableCursor(pc); + auto ft0ExtraCursor = createTableCursor(pc); auto fv0aCursor = createTableCursor(pc); + auto fv0aExtraCursor = createTableCursor(pc); auto fwdTracksCursor = createTableCursor(pc); auto fwdTracksCovCursor = createTableCursor(pc); auto fwdTrkClsCursor = createTableCursor(pc); @@ -1897,16 +1903,18 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) tfNumber = mTFNumber; } - std::vector aAmplitudes; + std::vector aAmplitudes, aTimes; std::vector aChannels; fv0aCursor.reserve(fv0RecPoints.size()); for (auto& fv0RecPoint : fv0RecPoints) { aAmplitudes.clear(); aChannels.clear(); + aTimes.clear(); const auto channelData = fv0RecPoint.getBunchChannelData(fv0ChData); for (auto& channel : channelData) { if (channel.charge > 0) { aAmplitudes.push_back(truncateFloatFraction(channel.charge, mV0Amplitude)); + aTimes.push_back(truncateFloatFraction(channel.time * 1.E-3, mV0ChannelTime)); aChannels.push_back(channel.channel); } } @@ -1923,6 +1931,11 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) aChannels, truncateFloatFraction(fv0RecPoint.getCollisionGlobalMeanTime() * 1E-3, mV0Time), // ps to ns fv0RecPoint.getTrigger().getTriggersignals()); + + if (mEnableFITextra) { + fv0aExtraCursor(bcID, + aTimes); + } } std::vector zdcEnergy, zdcAmplitudes, zdcTime; @@ -2026,25 +2039,17 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) [](const std::vector& left, const std::vector& right) { return (left[0] < right[0]); }); // vector of FDD amplitudes - int16_t aFDDAmplitudesA[8] = {0u}; - int16_t aFDDAmplitudesC[8] = {0u}; + int16_t aFDDAmplitudesA[8] = {0u}, aFDDAmplitudesC[8] = {0u}; + float aFDDTimesA[8] = {0.f}, aFDDTimesC[8] = {0.f}; // filling FDD table fddCursor.reserve(fddRecPoints.size()); for (const auto& fddRecPoint : fddRecPoints) { for (int i = 0; i < 8; i++) { aFDDAmplitudesA[i] = 0; aFDDAmplitudesC[i] = 0; + aFDDTimesA[i] = 0.f; + aFDDTimesC[i] = 0.f; } - - const auto channelData = fddRecPoint.getBunchChannelData(fddChData); - for (const auto& channel : channelData) { - if (channel.mPMNumber < 8) { - aFDDAmplitudesC[channel.mPMNumber] = channel.mChargeADC; // amplitude - } else { - aFDDAmplitudesA[channel.mPMNumber - 8] = channel.mChargeADC; // amplitude - } - } - uint64_t globalBC = fddRecPoint.getInteractionRecord().toLong(); uint64_t bc = globalBC; auto item = bcsMap.find(bc); @@ -2054,21 +2059,39 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) } else { LOG(fatal) << "Error: could not find a corresponding BC ID for a FDD rec. point; BC = " << bc; } + const auto channelData = fddRecPoint.getBunchChannelData(fddChData); + for (const auto& channel : channelData) { + if (channel.mPMNumber < 8) { + aFDDAmplitudesC[channel.mPMNumber] = channel.mChargeADC; // amplitude + aFDDTimesC[channel.mPMNumber] = truncateFloatFraction(channel.mTime * 1E-3, mFDDChannelTime); // time + } else { + aFDDAmplitudesA[channel.mPMNumber - 8] = channel.mChargeADC; // amplitude + aFDDTimesA[channel.mPMNumber - 8] = truncateFloatFraction(channel.mTime * 1E-3, mFDDChannelTime); // time + } + } + fddCursor(bcID, aFDDAmplitudesA, aFDDAmplitudesC, truncateFloatFraction(fddRecPoint.getCollisionTimeA() * 1E-3, mFDDTime), // ps to ns truncateFloatFraction(fddRecPoint.getCollisionTimeC() * 1E-3, mFDDTime), // ps to ns fddRecPoint.getTrigger().getTriggersignals()); + if (mEnableFITextra) { + fddExtraCursor(bcID, + aFDDTimesA, + aFDDTimesC); + } } // filling FT0 table - std::vector aAmplitudesA, aAmplitudesC; + std::vector aAmplitudesA, aAmplitudesC, aTimesA, aTimesC; std::vector aChannelsA, aChannelsC; ft0Cursor.reserve(ft0RecPoints.size()); for (auto& ft0RecPoint : ft0RecPoints) { aAmplitudesA.clear(); aAmplitudesC.clear(); + aTimesA.clear(); + aTimesC.clear(); aChannelsA.clear(); aChannelsC.clear(); const auto channelData = ft0RecPoint.getBunchChannelData(ft0ChData); @@ -2079,9 +2102,11 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) if (channel.ChId < nFT0ChannelsAside) { aChannelsA.push_back(channel.ChId); aAmplitudesA.push_back(truncateFloatFraction(channel.QTCAmpl, mT0Amplitude)); + aTimesA.push_back(truncateFloatFraction(channel.CFDTime * 1E-3, mT0ChannelTime)); } else { aChannelsC.push_back(channel.ChId - nFT0ChannelsAside); aAmplitudesC.push_back(truncateFloatFraction(channel.QTCAmpl, mT0Amplitude)); + aTimesC.push_back(truncateFloatFraction(channel.CFDTime * 1E-3, mT0ChannelTime)); } } } @@ -2102,6 +2127,11 @@ void AODProducerWorkflowDPL::run(ProcessingContext& pc) truncateFloatFraction(ft0RecPoint.getCollisionTimeA() * 1E-3, mT0Time), // ps to ns truncateFloatFraction(ft0RecPoint.getCollisionTimeC() * 1E-3, mT0Time), // ps to ns ft0RecPoint.getTrigger().getTriggersignals()); + if (mEnableFITextra) { + ft0ExtraCursor(bcID, + aTimesA, + aTimesC); + } } if (mUseMC) { @@ -3073,7 +3103,7 @@ void AODProducerWorkflowDPL::endOfStream(EndOfStreamContext& /*ec*/) mStreamer.reset(); } -DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, bool enableStrangenessTracking, bool useMC, bool CTPConfigPerRun) +DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, bool enableStrangenessTracking, bool useMC, bool CTPConfigPerRun, bool enableFITextra) { auto dataRequest = std::make_shared(); dataRequest->inputs.emplace_back("ctpconfig", "CTP", "CTPCONFIG", 0, Lifetime::Condition, ccdbParamSpec("CTP/Config/Config", CTPConfigPerRun)); @@ -3133,8 +3163,11 @@ DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, boo OutputForTable::spec(), OutputForTable::spec(), OutputForTable::spec(), + OutputForTable::spec(), OutputForTable::spec(), + OutputForTable::spec(), OutputForTable::spec(), + OutputForTable::spec(), OutputForTable::spec(), OutputForTable::spec(), OutputForTable::spec(), @@ -3184,7 +3217,7 @@ DataProcessorSpec getAODProducerWorkflowSpec(GID::mask_t src, bool enableSV, boo "aod-producer-workflow", dataRequest->inputs, outputs, - AlgorithmSpec{adaptFromTask(src, dataRequest, ggRequest, enableSV, useMC)}, + AlgorithmSpec{adaptFromTask(src, dataRequest, ggRequest, enableSV, useMC, enableFITextra)}, Options{ ConfigParamSpec{"run-number", VariantType::Int64, -1L, {"The run-number. If left default we try to get it from DPL header."}}, ConfigParamSpec{"aod-timeframe-id", VariantType::Int64, -1L, {"Set timeframe number"}}, diff --git a/Detectors/AOD/src/aod-producer-workflow.cxx b/Detectors/AOD/src/aod-producer-workflow.cxx index 1f39f11218be3..81e178642e403 100644 --- a/Detectors/AOD/src/aod-producer-workflow.cxx +++ b/Detectors/AOD/src/aod-producer-workflow.cxx @@ -37,6 +37,7 @@ void customize(std::vector& workflowOptions) {"disable-mc", o2::framework::VariantType::Bool, false, {"disable MC propagation"}}, {"disable-secondary-vertices", o2::framework::VariantType::Bool, false, {"disable filling secondary vertices"}}, {"disable-strangeness-tracker", o2::framework::VariantType::Bool, false, {"disable filling strangeness tracking"}}, + {"enable-FIT-extra", o2::framework::VariantType::Bool, false, {"enable FIT extra output"}}, {"info-sources", VariantType::String, std::string{GID::ALL}, {"comma-separated list of sources to use"}}, {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings ..."}}, {"combine-source-devices", o2::framework::VariantType::Bool, false, {"merge DPL source devices"}}, @@ -54,6 +55,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) bool enableSV = !configcontext.options().get("disable-secondary-vertices"); bool enableST = !configcontext.options().get("disable-strangeness-tracker"); bool ctpcfgperrun = !configcontext.options().get("ctpconfig-run-independent"); + bool enableFITextra = configcontext.options().get("enable-FIT-extra"); GID::mask_t allowedSrc = GID::getSourcesMask("ITS,MFT,MCH,MID,MCH-MID,TPC,TRD,ITS-TPC,TPC-TOF,TPC-TRD,ITS-TPC-TOF,ITS-TPC-TRD,TPC-TRD-TOF,ITS-TPC-TRD-TOF,MFT-MCH,FT0,FV0,FDD,ZDC,EMC,CTP,PHS,CPV,HMP"); GID::mask_t src = allowedSrc & GID::getSourcesMask(configcontext.options().get("info-sources")); @@ -64,7 +66,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) } WorkflowSpec specs; - specs.emplace_back(o2::aodproducer::getAODProducerWorkflowSpec(src, enableSV, enableST, useMC, ctpcfgperrun)); + specs.emplace_back(o2::aodproducer::getAODProducerWorkflowSpec(src, enableSV, enableST, useMC, ctpcfgperrun, enableFITextra)); auto srcCls = src & ~(GID::getSourceMask(GID::MCH) | GID::getSourceMask(GID::MID)); // Don't read global MID and MCH clusters (those attached to tracks are always read) auto srcMtc = src; diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index 1096e151c019e..8c9f323f3dcc6 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1467,6 +1467,7 @@ namespace fv0a { DECLARE_SOA_INDEX_COLUMN(BC, bc); //! BC index DECLARE_SOA_COLUMN(Amplitude, amplitude, std::vector); //! Amplitudes of non-zero channels. The channel IDs are given in Channel (at the same index) +DECLARE_SOA_COLUMN(TimeFV0A, timeFV0A, std::vector); //! Time of non-zero channels. The channel IDs are given in Channel (at the same index). Only for the FITExtra table DECLARE_SOA_COLUMN(Channel, channel, std::vector); //! Channel IDs which had non-zero amplitudes. There are at maximum 48 channels. DECLARE_SOA_COLUMN(Time, time, float); //! Time in ns DECLARE_SOA_COLUMN(TriggerMask, triggerMask, uint8_t); //! @@ -1476,6 +1477,10 @@ DECLARE_SOA_TABLE(FV0As, "AOD", "FV0A", //! o2::soa::Index<>, fv0a::BCId, fv0a::Amplitude, fv0a::Channel, fv0a::Time, fv0a::TriggerMask); using FV0A = FV0As::iterator; +DECLARE_SOA_TABLE(FV0AsExtra, "AOD", "FV0AEXTRA", //! FV0AsExtra table + o2::soa::Index<>, fv0a::BCId, fv0a::TimeFV0A); +using FV0AExtra = FV0AsExtra::iterator; + // V0C table for Run2 only namespace fv0c { @@ -1493,8 +1498,10 @@ namespace ft0 { DECLARE_SOA_INDEX_COLUMN(BC, bc); //! BC index DECLARE_SOA_COLUMN(AmplitudeA, amplitudeA, std::vector); //! Amplitudes of non-zero channels on the A-side. The channel IDs are given in ChannelA (at the same index) +DECLARE_SOA_COLUMN(TimeFT0A, timeFT0A, std::vector); //! Time of non-zero channels on the A-side. The channel IDs are given in ChannelA (at the same index). Only for the FITExtra table DECLARE_SOA_COLUMN(ChannelA, channelA, std::vector); //! Channel IDs on the A side which had non-zero amplitudes. There are at maximum 96 channels. DECLARE_SOA_COLUMN(AmplitudeC, amplitudeC, std::vector); //! Amplitudes of non-zero channels on the C-side. The channel IDs are given in ChannelC (at the same index) +DECLARE_SOA_COLUMN(TimeFT0C, timeFT0C, std::vector); //! Time of non-zero channels on the C-side. The channel IDs are given in ChannelC (at the same index). Only for the FITExtra table DECLARE_SOA_COLUMN(ChannelC, channelC, std::vector); //! Channel IDs on the C side which had non-zero amplitudes. There are at maximum 112 channels. DECLARE_SOA_COLUMN(TimeA, timeA, float); //! Average A-side time DECLARE_SOA_COLUMN(TimeC, timeC, float); //! Average C-side time @@ -1538,6 +1545,11 @@ DECLARE_SOA_TABLE(FT0s, "AOD", "FT0", //! ft0::SumAmpA, ft0::SumAmpC); using FT0 = FT0s::iterator; +DECLARE_SOA_TABLE(FT0sExtra, "AOD", "FT0EXTRA", //! FT0sExtra table + o2::soa::Index<>, ft0::BCId, + ft0::TimeFT0A, ft0::TimeFT0C); +using FT0Extra = FT0sExtra::iterator; + namespace fdd { DECLARE_SOA_INDEX_COLUMN(BC, bc); //! BC index @@ -1547,6 +1559,9 @@ DECLARE_SOA_COLUMN(AmplitudeC, amplitudeC, float[4]); //! Amplitude in adjacent DECLARE_SOA_COLUMN(ChargeA, chargeA, int16_t[8]); //! Amplitude per channel A-side DECLARE_SOA_COLUMN(ChargeC, chargeC, int16_t[8]); //! Amplitude per channel C-side +DECLARE_SOA_COLUMN(TimeFDDA, timeFDDA, float[8]); //! Time per channel A-side, only for the FITExtra table +DECLARE_SOA_COLUMN(TimeFDDC, timeFDDC, float[8]); //! Time per channel C-side, only for the FITExtra table + DECLARE_SOA_COLUMN(TimeA, timeA, float); //! DECLARE_SOA_COLUMN(TimeC, timeC, float); //! DECLARE_SOA_COLUMN(TriggerMask, triggerMask, uint8_t); //! @@ -1568,6 +1583,11 @@ DECLARE_SOA_TABLE_VERSIONED(FDDs_001, "AOD", "FDD", 1, //! FDD table, version 00 using FDDs = FDDs_001; //! this defines the current default version using FDD = FDDs::iterator; +DECLARE_SOA_TABLE(FDDsExtra, "AOD", "FDDEXTRA", //! FDDsExtra table + o2::soa::Index<>, fdd::BCId, + fdd::TimeFDDA, fdd::TimeFDDC); +using FDDExtra = FDDsExtra::iterator; + namespace v0 { DECLARE_SOA_INDEX_COLUMN_FULL(PosTrack, posTrack, int, Tracks, "_Pos"); //! Positive track From e24ee8831d27a4ff217fdb733bcd4066d3a5d8d6 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 30 Jan 2025 15:47:26 +0100 Subject: [PATCH 0035/1914] DPL: implement distinction between physical and dataset schema (#13917) This will come handy to do zero copy, actually. --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 205 ++++++++++-------- Framework/Core/test/test_Root2ArrowTable.cxx | 52 ++++- 2 files changed, 164 insertions(+), 93 deletions(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index e84a053d58d60..abc08526815cc 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -14,6 +14,8 @@ #include "Framework/Signpost.h" #include "Framework/Endian.h" #include +#include +#include #include #include #include @@ -23,6 +25,8 @@ #include #include #include +#include +#include O2_DECLARE_DYNAMIC_LOG(root_arrow_fs); @@ -91,6 +95,7 @@ arrow::Result SingleTreeFileSystem::GetFileInfo(std::string return result; } +// A fragment which holds a tree class TTreeFileFragment : public arrow::dataset::FileFragment { public: @@ -101,6 +106,13 @@ class TTreeFileFragment : public arrow::dataset::FileFragment : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) { } + + std::unique_ptr& GetTree() + { + auto topFs = std::dynamic_pointer_cast(source().filesystem()); + auto treeFs = std::dynamic_pointer_cast(topFs->GetSubFilesystem(source())); + return treeFs->GetTree(source()); + } }; class TTreeFileFormat : public arrow::dataset::FileFormat @@ -158,9 +170,9 @@ class TTreeFileFormat : public arrow::dataset::FileFormat class TTreeOutputStream : public arrow::io::OutputStream { public: - // Using a pointer means that the tree itself is owned by another + // Using a pointer means that the tree itself is owned by another // class - TTreeOutputStream(TTree *, std::string branchPrefix); + TTreeOutputStream(TTree*, std::string branchPrefix); arrow::Status Close() override; @@ -245,33 +257,70 @@ struct TTreeObjectReadingImplementation : public RootArrowFactoryPlugin { } }; +struct BranchFieldMapping { + int mainBranchIdx; + int vlaIdx; + int datasetFieldIdx; +}; + arrow::Result TTreeFileFormat::ScanBatchesAsync( const std::shared_ptr& options, const std::shared_ptr& fragment) const { - // Get the fragment as a TTreeFragment. This might be PART of a TTree. - auto treeFragment = std::dynamic_pointer_cast(fragment); // This is the schema we want to read auto dataset_schema = options->dataset_schema; - auto generator = [pool = options->pool, treeFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, + auto generator = [pool = options->pool, fragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { - auto schema = treeFragment->format()->Inspect(treeFragment->source()); - std::vector> columns; std::vector> fields = dataset_schema->fields(); - auto physical_schema = *treeFragment->ReadPhysicalSchema(); + auto physical_schema = *fragment->ReadPhysicalSchema(); + + auto fs = std::dynamic_pointer_cast(fragment->source().filesystem()); + // Actually get the TTree from the ROOT file. + auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(fragment->source())); + + if (dataset_schema->num_fields() > physical_schema->num_fields()) { + throw runtime_error_f("One TTree must have all the fields requested in a table"); + } + + // Register physical fields into the cache + std::vector mappings; + + for (int fi = 0; fi < dataset_schema->num_fields(); ++fi) { + auto dataset_field = dataset_schema->field(fi); + int physicalFieldIdx = physical_schema->GetFieldIndex(dataset_field->name()); + + if (physicalFieldIdx < 0) { + throw runtime_error_f("Cannot find physical field associated to %s", dataset_field->name().c_str()); + } + if (physicalFieldIdx > 1 && physical_schema->field(physicalFieldIdx - 1)->name().ends_with("_size")) { + mappings.push_back({physicalFieldIdx, physicalFieldIdx - 1, fi}); + } else { + mappings.push_back({physicalFieldIdx, -1, fi}); + } + } + + auto& tree = treeFs->GetTree(fragment->source()); + tree->SetCacheSize(25000000); + auto branches = tree->GetListOfBranches(); + for (auto& mapping : mappings) { + tree->AddBranchToCache((TBranch*)branches->At(mapping.mainBranchIdx), false); + if (mapping.vlaIdx != -1) { + tree->AddBranchToCache((TBranch*)branches->At(mapping.vlaIdx), false); + } + } + tree->StopCacheLearningPhase(); static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; - auto containerFS = std::dynamic_pointer_cast(treeFragment->source().filesystem()); - auto fs = std::dynamic_pointer_cast(containerFS->GetSubFilesystem(treeFragment->source())); int64_t rows = -1; - auto& tree = fs->GetTree(treeFragment->source()); - for (auto& field : fields) { + for (size_t mi = 0; mi < mappings.size(); ++mi) { + BranchFieldMapping mapping = mappings[mi]; // The field actually on disk - auto physicalField = physical_schema->GetFieldByName(field->name()); - TBranch* branch = tree->GetBranch(physicalField->name().c_str()); + auto datasetField = dataset_schema->field(mapping.datasetFieldIdx); + auto physicalField = physical_schema->field(mapping.mainBranchIdx); + auto* branch = (TBranch*)branches->At(mapping.mainBranchIdx); assert(branch); buffer.Reset(); auto totalEntries = branch->GetEntries(); @@ -284,12 +333,12 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( arrow::Status status; int readEntries = 0; std::shared_ptr array; - auto listType = std::dynamic_pointer_cast(physicalField->type()); - if (physicalField->type() == arrow::boolean() || - (listType && physicalField->type()->field(0)->type() == arrow::boolean())) { + auto listType = std::dynamic_pointer_cast(datasetField->type()); + if (datasetField->type() == arrow::boolean() || + (listType && datasetField->type()->field(0)->type() == arrow::boolean())) { if (listType) { std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, physicalField->type()->field(0)->type(), &builder); + auto status = arrow::MakeBuilder(pool, datasetField->type()->field(0)->type(), &builder); if (!status.ok()) { throw runtime_error("Cannot create value builder"); } @@ -316,7 +365,7 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( } } else if (listType == nullptr) { std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, physicalField->type(), &builder); + auto status = arrow::MakeBuilder(pool, datasetField->type(), &builder); if (!status.ok()) { throw runtime_error("Cannot create builder"); } @@ -340,8 +389,6 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( } } } else { - // other types: use serialized read to build arrays directly. - auto typeSize = physicalField->type()->byte_width(); // This is needed for branches which have not been persisted. auto bytes = branch->GetTotBytes(); auto branchSize = bytes ? bytes : 1000000; @@ -349,7 +396,7 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( if (!result.ok()) { throw runtime_error("Cannot allocate values buffer"); } - std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); + std::shared_ptr arrowValuesBuffer = result.MoveValueUnsafe(); auto ptr = arrowValuesBuffer->mutable_data(); if (ptr == nullptr) { throw runtime_error("Invalid buffer"); @@ -363,23 +410,14 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( std::span offsets; int size = 0; uint32_t totalSize = 0; - TBranch* mSizeBranch = nullptr; - int64_t listSize = 1; - if (auto fixedSizeList = std::dynamic_pointer_cast(physicalField->type())) { - listSize = fixedSizeList->list_size(); - typeSize = fixedSizeList->field(0)->type()->byte_width(); - } else if (auto vlaListType = std::dynamic_pointer_cast(physicalField->type())) { - listSize = -1; - typeSize = vlaListType->field(0)->type()->byte_width(); - } - if (listSize == -1) { - mSizeBranch = branch->GetTree()->GetBranch((std::string{branch->GetName()} + "_size").c_str()); + if (mapping.vlaIdx != -1) { + auto* mSizeBranch = (TBranch*)branches->At(mapping.vlaIdx); offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), pool); if (!result.ok()) { throw runtime_error("Cannot allocate offset buffer"); } - arrowOffsetBuffer = std::move(result).ValueUnsafe(); + arrowOffsetBuffer = result.MoveValueUnsafe(); unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); auto* tPtrOffset = reinterpret_cast(ptrOffset); offsets = std::span{tPtrOffset, tPtrOffset + totalEntries + 1}; @@ -398,9 +436,19 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( readEntries = 0; } + int typeSize = physicalField->type()->byte_width(); + int64_t listSize = 1; + if (auto fixedSizeList = std::dynamic_pointer_cast(datasetField->type())) { + listSize = fixedSizeList->list_size(); + typeSize = physicalField->type()->field(0)->type()->byte_width(); + } else if (mapping.vlaIdx != -1) { + typeSize = physicalField->type()->field(0)->type()->byte_width(); + listSize = -1; + } + while (readEntries < totalEntries) { auto readLast = branch->GetBulkRead().GetEntriesSerialized(readEntries, buffer); - if (listSize == -1) { + if (mapping.vlaIdx != -1) { size = offsets[readEntries + readLast] - offsets[readEntries]; } else { size = readLast * listSize; @@ -412,18 +460,15 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( if (listSize >= 1) { totalSize = readEntries * listSize; } - std::shared_ptr varray; - switch (listSize) { - case -1: - varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(physicalField->type(), readEntries, arrowOffsetBuffer, varray); - break; - case 1: - array = std::make_shared(physicalField->type(), readEntries, arrowValuesBuffer); - break; - default: - varray = std::make_shared(physicalField->type()->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(physicalField->type(), readEntries, varray); + if (listSize == 1) { + array = std::make_shared(datasetField->type(), readEntries, arrowValuesBuffer); + } else { + auto varray = std::make_shared(datasetField->type()->field(0)->type(), totalSize, arrowValuesBuffer); + if (mapping.vlaIdx != -1) { + array = std::make_shared(datasetField->type(), readEntries, arrowOffsetBuffer, varray); + } else { + array = std::make_shared(datasetField->type(), readEntries, varray); + } } } @@ -534,9 +579,12 @@ auto arrowTypeFromROOT(EDataType type, int size) } } +// This is a datatype for branches which implies +struct RootTransientIndexType : arrow::ExtensionType { +}; + arrow::Result> TTreeFileFormat::Inspect(const arrow::dataset::FileSource& source) const { - arrow::Schema schema{{}}; auto fs = std::dynamic_pointer_cast(source.filesystem()); // Actually get the TTree from the ROOT file. auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); @@ -548,51 +596,37 @@ arrow::Result> TTreeFileFormat::Inspect(const arr auto branches = tree->GetListOfBranches(); auto n = branches->GetEntries(); - std::vector branchInfos; + std::vector> fields; + + bool prevIsSize = false; for (auto i = 0; i < n; ++i) { auto branch = static_cast(branches->At(i)); - auto name = std::string{branch->GetName()}; - auto pos = name.find("_size"); - if (pos != std::string::npos) { - name.erase(pos); - branchInfos.emplace_back(BranchInfo{name, (TBranch*)nullptr, true}); + std::string name = branch->GetName(); + if (prevIsSize && fields.back()->name() != name + "_size") { + throw runtime_error_f("Unexpected layout for VLA container %s.", branch->GetName()); + } + + if (name.ends_with("_size")) { + fields.emplace_back(std::make_shared(name, arrow::int32())); + prevIsSize = true; } else { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return bi.name == name; - }); - if (lookup == branchInfos.end()) { - branchInfos.emplace_back(BranchInfo{name, branch, false}); + static TClass* cls; + EDataType type; + branch->GetExpectedType(cls, type); + + if (prevIsSize) { + fields.emplace_back(std::make_shared(name, arrowTypeFromROOT(type, -1))); } else { - lookup->ptr = branch; + auto listSize = static_cast(branch->GetListOfLeaves()->At(0))->GetLenStatic(); + fields.emplace_back(std::make_shared(name, arrowTypeFromROOT(type, listSize))); } + prevIsSize = false; } } - std::vector> fields; - tree->SetCacheSize(25000000); - for (auto& bi : branchInfos) { - static TClass* cls; - EDataType type; - bi.ptr->GetExpectedType(cls, type); - auto listSize = -1; - if (!bi.mVLA) { - listSize = static_cast(bi.ptr->GetListOfLeaves()->At(0))->GetLenStatic(); - } - auto field = std::make_shared(bi.ptr->GetName(), arrowTypeFromROOT(type, listSize)); - fields.push_back(field); - - tree->AddBranchToCache(bi.ptr); - if (strncmp(bi.ptr->GetName(), "fIndexArray", strlen("fIndexArray")) == 0) { - std::string sizeBranchName = bi.ptr->GetName(); - sizeBranchName += "_size"; - auto* sizeBranch = (TBranch*)tree->GetBranch(sizeBranchName.c_str()); - if (sizeBranch) { - tree->AddBranchToCache(sizeBranch); - } - } + if (fields.back()->name().ends_with("_size")) { + throw runtime_error_f("Missing values for VLA indices %s.", fields.back()->name().c_str()); } - tree->StopCacheLearningPhase(); - return std::make_shared(fields); } @@ -601,9 +635,8 @@ arrow::Result> TTreeFileFormat::Ma arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, std::shared_ptr physical_schema) { - std::shared_ptr format = std::make_shared(mTotCompressedSize, mTotUncompressedSize); - auto fragment = std::make_shared(std::move(source), std::move(format), + auto fragment = std::make_shared(std::move(source), std::dynamic_pointer_cast(shared_from_this()), std::move(partition_expression), std::move(physical_schema)); return std::dynamic_pointer_cast(fragment); diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index 04a8d91303f0e..ebc854d1d6dc0 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -384,6 +384,24 @@ bool validateSchema(std::shared_ptr schema) return true; } +bool validatePhysicalSchema(std::shared_ptr schema) +{ + REQUIRE(schema->num_fields() == 12); + REQUIRE(schema->field(0)->type()->id() == arrow::float32()->id()); + REQUIRE(schema->field(1)->type()->id() == arrow::float32()->id()); + REQUIRE(schema->field(2)->type()->id() == arrow::float32()->id()); + REQUIRE(schema->field(3)->type()->id() == arrow::float64()->id()); + REQUIRE(schema->field(4)->type()->id() == arrow::int32()->id()); + REQUIRE(schema->field(5)->type()->id() == arrow::fixed_size_list(arrow::float32(), 3)->id()); + REQUIRE(schema->field(6)->type()->id() == arrow::fixed_size_list(arrow::int32(), 2)->id()); + REQUIRE(schema->field(7)->type()->id() == arrow::boolean()->id()); + REQUIRE(schema->field(8)->type()->id() == arrow::fixed_size_list(arrow::boolean(), 2)->id()); + REQUIRE(schema->field(9)->type()->id() == arrow::int32()->id()); + REQUIRE(schema->field(10)->type()->id() == arrow::list(arrow::int32())->id()); + REQUIRE(schema->field(11)->type()->id() == arrow::int8()->id()); + return true; +} + TEST_CASE("RootTree2Dataset") { using namespace o2::framework; @@ -502,12 +520,22 @@ TEST_CASE("RootTree2Dataset") arrow::dataset::FileSource source("DF_2/tracks", fs); REQUIRE(format->IsSupported(source) == true); - auto schemaOpt = format->Inspect(source); - REQUIRE(schemaOpt.ok()); - auto schema = *schemaOpt; + auto physicalSchema = format->Inspect(source); + REQUIRE(physicalSchema.ok()); + REQUIRE(validatePhysicalSchema(*physicalSchema)); + // Create the dataset schema rather than using the physical one + std::vector> fields; + for (auto& field : (*(physicalSchema))->fields()) { + if (field->name().ends_with("_size")) { + continue; + } + fields.push_back(field); + } + std::shared_ptr schema = std::make_shared(fields); + validateSchema(schema); - auto fragment = format->MakeFragment(source, {}, schema); + auto fragment = format->MakeFragment(source, {}, *physicalSchema); REQUIRE(fragment.ok()); auto options = std::make_shared(); options->dataset_schema = schema; @@ -545,12 +573,22 @@ TEST_CASE("RootTree2Dataset") auto schemaOptWritten = format->Inspect(source); REQUIRE(schemaOptWritten.ok()); auto schemaWritten = *schemaOptWritten; - REQUIRE(validateSchema(schemaWritten)); - auto fragmentWritten = format->MakeFragment(source, {}, schema); + REQUIRE(validatePhysicalSchema(schemaWritten)); + std::vector> fields; + for (auto& field : schemaWritten->fields()) { + if (field->name().ends_with("_size")) { + continue; + } + fields.push_back(field); + } + std::shared_ptr schema = std::make_shared(fields); + REQUIRE(validateSchema(schema)); + + auto fragmentWritten = format->MakeFragment(source, {}, *physicalSchema); REQUIRE(fragmentWritten.ok()); auto optionsWritten = std::make_shared(); - options->dataset_schema = schemaWritten; + options->dataset_schema = schema; auto scannerWritten = format->ScanBatchesAsync(optionsWritten, *fragment); REQUIRE(scannerWritten.ok()); auto batchesWritten = (*scanner)(); From 03ea86a1e06e6fdccf3004e1efb1adb9135af0b9 Mon Sep 17 00:00:00 2001 From: Daniel Battistini <60930860+danielbattistini@users.noreply.github.com> Date: Thu, 30 Jan 2025 15:56:43 +0100 Subject: [PATCH 0036/1914] ALICE 3: Add staggered staves to OT (#13910) * Improve modularity * Add Staggered layers * Configure middle and outer layers separately * [Fix] Correct double-width staves for middle layers * Add overlap between the modules * rename config variable * Please consider the following formatting changes * Update Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx * Update Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx * Update Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx * Update Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx --------- Co-authored-by: ALICE Action Bot Co-authored-by: Matteo Concas --- .../TRK/base/include/TRKBase/TRKBaseParam.h | 4 +- .../include/TRKSimulation/TRKLayer.h | 3 + .../ALICE3/TRK/simulation/src/Detector.cxx | 15 +- .../ALICE3/TRK/simulation/src/TRKLayer.cxx | 169 ++++++++++++++---- 4 files changed, 154 insertions(+), 37 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h index 6c655571b3e4e..9ea4bd1072d91 100644 --- a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h +++ b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h @@ -23,12 +23,14 @@ namespace trk enum eLayout { kCylinder = 0, kTurboStaves, + kStaggered, }; struct TRKBaseParam : public o2::conf::ConfigurableParamHelper { std::string configFile = ""; float serviceTubeX0 = 0.02f; // X0 Al2O3 - eLayout layout = kCylinder; // Type of segmentation of the layers into staves + eLayout layoutML = kCylinder; // Type of segmentation for the middle layers + eLayout layoutOL = kCylinder; // Type of segmentation for the outer layers O2ParamDef(TRKBaseParam, "TRKBase"); }; diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h index 2ddf38352ae8c..ef355ec36ce2f 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h @@ -39,6 +39,9 @@ class TRKLayer auto getNumber() const { return mLayerNumber; } auto getName() const { return mLayerName; } + TGeoVolume* createSensor(std::string type, double width = -1); + TGeoVolume* createChip(std::string type, double width = -1); + TGeoVolume* createStave(std::string type, double width = -1); void createLayer(TGeoVolume* motherVolume); private: diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx index 9e69a3bd8a88f..b9015ce578caf 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx @@ -116,9 +116,18 @@ void Detector::buildTRKNewVacuumVessel() mLayers.emplace_back(10, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(10)}, 80.f, 258.f, 100.e-3); auto& trkPars = TRKBaseParam::Instance(); - mLayers[8].setLayout(trkPars.layout); - mLayers[9].setLayout(trkPars.layout); - mLayers[10].setLayout(trkPars.layout); + + // Middle layers + mLayers[3].setLayout(trkPars.layoutML); + mLayers[4].setLayout(trkPars.layoutML); + mLayers[5].setLayout(trkPars.layoutML); + mLayers[6].setLayout(trkPars.layoutML); + + // Outer tracker + mLayers[7].setLayout(trkPars.layoutOL); + mLayers[8].setLayout(trkPars.layoutOL); + mLayers[9].setLayout(trkPars.layoutOL); + mLayers[10].setLayout(trkPars.layoutOL); } void Detector::configFromFile(std::string fileName) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx index 0d7930c77bb49..e6b00f6e96425 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx @@ -41,6 +41,108 @@ TRKLayer::TRKLayer(int layerNumber, std::string layerName, float rInn, float zLe LOGP(info, "Creating layer: id: {} rInner: {} rOuter: {} zLength: {} x2X0: {}", mLayerNumber, mInnerRadius, mOuterRadius, mZ, mX2X0); } +TGeoVolume* TRKLayer::createSensor(std::string type, double width) +{ + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + std::string sensName = Form("%s%d", GeometryTGeo::getTRKSensorPattern(), this->mLayerNumber); + + TGeoShape* sensor; + + if (type == "cylinder") { + sensor = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + } else if (type == "flat") { + if (width < 0) { + LOGP(fatal, "Attempting to create sensor with invalid width"); + } + sensor = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + } else { + LOGP(fatal, "Sensor of type '{}' is not implemented", type); + } + + TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); + sensVol->SetLineColor(kYellow); + + return sensVol; +}; + +TGeoVolume* TRKLayer::createChip(std::string type, double width) +{ + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + std::string chipName = o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber); + + TGeoShape* chip; + TGeoVolume* sensVol; + + if (type == "cylinder") { + chip = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + sensVol = createSensor("cylinder"); + } else if (type == "flat") { + if (width < 0) { + LOGP(fatal, "Attempting to create chip with invalid width"); + } + chip = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + sensVol = createSensor("flat", width); + } else { + LOGP(fatal, "Sensor of type '{}' is not implemented", type); + } + + TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); + LOGP(info, "Inserting {} in {} ", sensVol->GetName(), chipVol->GetName()); + chipVol->AddNode(sensVol, 1, nullptr); + chipVol->SetLineColor(kYellow); + + return chipVol; +} + +TGeoVolume* TRKLayer::createStave(std::string type, double width) +{ + TGeoMedium* medAir = gGeoManager->GetMedium("TRK_AIR$"); + std::string staveName = o2::trk::GeometryTGeo::getTRKStavePattern() + std::to_string(mLayerNumber); + + TGeoShape* stave; + TGeoVolume* staveVol; + TGeoVolume* chipVol; + + if (type == "cylinder") { + stave = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + chipVol = createChip("cylinder"); + staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); + LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVol, 1, nullptr); + } else if (type == "flat") { + if (width < 0) { + LOGP(fatal, "Attempting to create stave with invalid width"); + } + stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + chipVol = createChip("flat", width); + staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); + LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVol, 1, nullptr); + } else if (type == "staggered") { + double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) + stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + TGeoVolume* chipVolLeft = createChip("flat", mModuleWidth); + TGeoVolume* chipVolRight = createChip("flat", mModuleWidth); + staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); + + TGeoCombiTrans* transLeft = new TGeoCombiTrans(); + transLeft->SetTranslation(-mModuleWidth / 2 + 0.05, 0, 0); // 1mm overlap between the modules + LOGP(info, "Inserting {} in {} ", chipVolLeft->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVolLeft, 0, transLeft); + + TGeoCombiTrans* transRight = new TGeoCombiTrans(); + transRight->SetTranslation(mModuleWidth / 2 - 0.05, 0.2, 0); + LOGP(info, "Inserting {} in {} ", chipVolRight->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVolRight, 1, transRight); + } else { + LOGP(fatal, "Chip of type '{}' is not implemented", type); + } + + staveVol->SetLineColor(kYellow); + + return staveVol; +} + void TRKLayer::createLayer(TGeoVolume* motherVolume) { TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); @@ -56,28 +158,16 @@ void TRKLayer::createLayer(TGeoVolume* motherVolume) layerVol->SetLineColor(kYellow); if (mLayout == eLayout::kCylinder) { - TGeoTube* stave = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); - TGeoTube* chip = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); - TGeoTube* sensor = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); - - TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); - sensVol->SetLineColor(kYellow); - TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); - chipVol->SetLineColor(kYellow); - TGeoVolume* staveVol = new TGeoVolume(staveName.c_str(), stave, medSi); - staveVol->SetLineColor(kYellow); - - LOGP(info, "Inserting {} in {} ", sensVol->GetName(), chipVol->GetName()); - chipVol->AddNode(sensVol, 1, nullptr); - - LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); - staveVol->AddNode(chipVol, 1, nullptr); - + auto staveVol = createStave("cylinder"); LOGP(info, "Inserting {} in {} ", staveVol->GetName(), layerVol->GetName()); layerVol->AddNode(staveVol, 1, nullptr); } else if (mLayout == eLayout::kTurboStaves) { // Compute the number of staves - double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) + double width = mModuleWidth; // Each stave has two modules (based on the LOI design) + if (mInnerRadius > 25) { + width *= 2; // Outer layers have two modules per stave + } + int nStaves = (int)std::ceil(mInnerRadius * 2 * TMath::Pi() / width); nStaves += nStaves % 2; // Require an even number of staves @@ -91,29 +181,42 @@ void TRKLayer::createLayer(TGeoVolume* motherVolume) LOGP(info, "Creating a layer with {} staves and {} mm overlap", nStaves, overlap * 10); for (int iStave = 0; iStave < nStaves; iStave++) { - TGeoBBox* sensor = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); - TGeoBBox* chip = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); - TGeoBBox* stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); - - TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); - sensVol->SetLineColor(kYellow); - TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); - chipVol->SetLineColor(kYellow); - TGeoVolume* staveVol = new TGeoVolume(staveName.c_str(), stave, medSi); - staveVol->SetLineColor(kYellow); + TGeoVolume* staveVol = createStave("flat", width); // Put the staves in the correct position and orientation TGeoCombiTrans* trans = new TGeoCombiTrans(); double theta = 360. * iStave / nStaves; - TGeoRotation* rot = new TGeoRotation("rot", theta + 90 + 2, 0, 0); + TGeoRotation* rot = new TGeoRotation("rot", theta + 90 + 3, 0, 0); trans->SetRotation(rot); trans->SetTranslation(mInnerRadius * std::cos(2. * TMath::Pi() * iStave / nStaves), mInnerRadius * std::sin(2 * TMath::Pi() * iStave / nStaves), 0); - LOGP(info, "Inserting {} in {} ", sensVol->GetName(), chipVol->GetName()); - chipVol->AddNode(sensVol, 1, nullptr); + LOGP(info, "Inserting {} in {} ", staveVol->GetName(), layerVol->GetName()); + layerVol->AddNode(staveVol, iStave, trans); + } + } else if (mLayout == kStaggered) { + // Compute the number of staves + double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) + int nStaves = (int)std::ceil(mInnerRadius * 2 * TMath::Pi() / width); + nStaves += nStaves % 2; // Require an even number of staves - LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); - staveVol->AddNode(chipVol, 1, nullptr); + // Compute the size of the overlap region + double theta = 2 * TMath::Pi() / nStaves; + double theta1 = std::atan(width / 2 / mInnerRadius); + double st = std::sin(theta); + double ct = std::cos(theta); + double theta2 = std::atan((mInnerRadius * st - width / 2 * ct) / (mInnerRadius * ct + width / 2 * st)); + double overlap = (theta1 - theta2) * mInnerRadius; + LOGP(info, "Creating a layer with {} staves and {} mm overlap", nStaves, overlap * 10); + + for (int iStave = 0; iStave < nStaves; iStave++) { + TGeoVolume* staveVol = createStave("staggered"); + + // Put the staves in the correct position and orientation + TGeoCombiTrans* trans = new TGeoCombiTrans(); + double theta = 360. * iStave / nStaves; + TGeoRotation* rot = new TGeoRotation("rot", theta + 90, 0, 0); + trans->SetRotation(rot); + trans->SetTranslation(mInnerRadius * std::cos(2. * TMath::Pi() * iStave / nStaves), mInnerRadius * std::sin(2 * TMath::Pi() * iStave / nStaves), 0); LOGP(info, "Inserting {} in {} ", staveVol->GetName(), layerVol->GetName()); layerVol->AddNode(staveVol, iStave, trans); From 9dee03c9422892ec16a21b7b0b618d3580d20cf4 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 30 Jan 2025 18:58:34 +0100 Subject: [PATCH 0037/1914] DPL: optimise mapping between FairLogger and InfoLogger (#13919) --- Framework/DataTakingSupport/src/Plugin.cxx | 105 ++++++++++++--------- 1 file changed, 60 insertions(+), 45 deletions(-) diff --git a/Framework/DataTakingSupport/src/Plugin.cxx b/Framework/DataTakingSupport/src/Plugin.cxx index f719fd5ef7d1f..e80e3f359f1be 100644 --- a/Framework/DataTakingSupport/src/Plugin.cxx +++ b/Framework/DataTakingSupport/src/Plugin.cxx @@ -16,6 +16,7 @@ #include "Framework/RawDeviceService.h" #include "Framework/DeviceSpec.h" #include +#include #include #include #include @@ -66,51 +67,65 @@ auto createInfoLoggerSinkHelper(InfoLogger* logger, InfoLoggerContext* ctx) InfoLogger::InfoLogger::Severity severity = InfoLogger::Severity::Undefined; int level = InfoLogger::undefinedMessageOption.level; - if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::nolog)) { - // discard - return; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::fatal)) { - severity = InfoLogger::Severity::Fatal; - level = 1; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::error)) { - severity = InfoLogger::Severity::Error; - level = 2; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::alarm)) { - severity = InfoLogger::Severity::Warning; - level = 6; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::important)) { - severity = InfoLogger::Severity::Info; - level = 7; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::warn)) { - severity = InfoLogger::Severity::Warning; - level = 11; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::state)) { - severity = InfoLogger::Severity::Info; - level = 12; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::info)) { - severity = InfoLogger::Severity::Info; - level = 13; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::debug)) { - severity = InfoLogger::Severity::Debug; - level = 14; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::detail)) { - severity = InfoLogger::Severity::Debug; - level = 15; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::debug1)) { - severity = InfoLogger::Severity::Debug; - level = 16; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::debug2)) { - severity = InfoLogger::Severity::Debug; - level = 17; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::debug3)) { - severity = InfoLogger::Severity::Debug; - level = 18; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::debug4)) { - severity = InfoLogger::Severity::Debug; - level = 19; - } else if (metadata.severity_name == fair::Logger::SeverityName(fair::Severity::trace)) { - severity = InfoLogger::Severity::Debug; - level = 21; + switch (metadata.severity) { + case fair::Severity::fatal: + severity = InfoLogger::Severity::Fatal; + level = 1; + break; + case fair::Severity::error: + severity = InfoLogger::Severity::Error; + level = 2; + break; + case fair::Severity::alarm: + severity = InfoLogger::Severity::Warning; + level = 6; + break; + case fair::Severity::important: + severity = InfoLogger::Severity::Info; + level = 7; + break; + case fair::Severity::warn: + severity = InfoLogger::Severity::Warning; + level = 11; + break; + case fair::Severity::state: + severity = InfoLogger::Severity::Info; + level = 12; + break; + case fair::Severity::info: + severity = InfoLogger::Severity::Info; + level = 13; + break; + case fair::Severity::debug: + severity = InfoLogger::Severity::Debug; + level = 14; + break; + case fair::Severity::detail: + severity = InfoLogger::Severity::Debug; + level = 15; + break; + case fair::Severity::debug1: + severity = InfoLogger::Severity::Debug; + level = 16; + break; + case fair::Severity::debug2: + severity = InfoLogger::Severity::Debug; + level = 17; + break; + case fair::Severity::debug3: + severity = InfoLogger::Severity::Debug; + level = 18; + break; + case fair::Severity::debug4: + severity = InfoLogger::Severity::Debug; + level = 19; + break; + case fair::Severity::trace: + severity = InfoLogger::Severity::Debug; + level = 21; + break; + case fair::Severity::nolog: // discard + return; } InfoLogger::InfoLoggerMessageOption opt = { From ef2b3c1e1b8ec9702db7293cc66f84bb7f608b70 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 30 Jan 2025 19:00:12 +0100 Subject: [PATCH 0038/1914] Signpost for TableTreeHelpers (#13918) --- Framework/Core/src/TableTreeHelpers.cxx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/TableTreeHelpers.cxx b/Framework/Core/src/TableTreeHelpers.cxx index d0fdd0ced5779..2f23c07aea451 100644 --- a/Framework/Core/src/TableTreeHelpers.cxx +++ b/Framework/Core/src/TableTreeHelpers.cxx @@ -11,6 +11,7 @@ #include "Framework/TableTreeHelpers.h" #include "Framework/Logger.h" #include "Framework/Endian.h" +#include "Framework/Signpost.h" #include "arrow/type_traits.h" #include @@ -21,6 +22,9 @@ #include #include + +O2_DECLARE_DYNAMIC_LOG(tabletree_helpers); + namespace TableTreeHelpers { static constexpr char const* sizeBranchSuffix = "_size"; @@ -134,6 +138,7 @@ BranchToColumn::BranchToColumn(TBranch* branch, bool VLA, std::string name, EDat std::pair, std::shared_ptr> BranchToColumn::read(TBuffer* buffer) { + O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, buffer); auto totalEntries = mBranch->GetEntries(); arrow::Status status; int readEntries = 0; @@ -170,7 +175,9 @@ std::pair, std::shared_ptr> B } } else { // other types: use serialized read to build arrays directly + size_t branchSize = mBranch->GetTotBytes(); auto&& result = arrow::AllocateResizableBuffer(mBranch->GetTotBytes(), mPool); + O2_SIGNPOST_EVENT_EMIT(tabletree_helpers, sid, "BranchToColumn", "Allocating %ld bytes for %{public}s", branchSize, mBranch->GetName()); if (!result.ok()) { throw runtime_error("Cannot allocate values buffer"); } @@ -526,17 +533,20 @@ void TreeToTable::setLabel(const char* label) mTableLabel = label; } -void TreeToTable::fill(TTree*) +void TreeToTable::fill(TTree*tree) { std::vector> columns; std::vector> fields; static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; + O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, &buffer); + O2_SIGNPOST_START(tabletree_helpers, sid, "TreeToTable", "Filling %{public}s", tree->GetName()); for (auto& reader : mBranchReaders) { buffer.Reset(); auto arrayAndField = reader->read(&buffer); columns.push_back(arrayAndField.first); fields.push_back(arrayAndField.second); } + O2_SIGNPOST_END(tabletree_helpers, sid, "TreeToTable", "Done filling."); auto schema = std::make_shared(fields, std::make_shared(std::vector{std::string{"label"}}, std::vector{mTableLabel})); mTable = arrow::Table::Make(schema, columns); From c1509f893ccad8a2cabd741ff43b476faac160ac Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 31 Jan 2025 12:14:03 +0100 Subject: [PATCH 0039/1914] GLO: QC: Add 3d plot and widen eta cuts (#13692) * GLO: QC: Add eta/phi/pt matching eff plot to QC * GLO: QC: set reasonable defaults and steering * GLO: QC: minor cleanup (removal of some headers and other stuff) Signed-off-by: Felix Schlepper --------- Signed-off-by: Felix Schlepper --- .../include/GLOQC/ITSTPCMatchingQCParams.h | 30 +-- Detectors/GLOQC/include/GLOQC/MatchITSTPCQC.h | 81 +++--- Detectors/GLOQC/src/MatchITSTPCQC.cxx | 253 ++++++++++-------- .../include/GlobalTracking/TrackCuts.h | 2 +- .../qc/src/ITSTPCMatchingQCSpec.cxx | 1 + 5 files changed, 202 insertions(+), 165 deletions(-) diff --git a/Detectors/GLOQC/include/GLOQC/ITSTPCMatchingQCParams.h b/Detectors/GLOQC/include/GLOQC/ITSTPCMatchingQCParams.h index 6bd88a9be64c0..024497b1b918e 100644 --- a/Detectors/GLOQC/include/GLOQC/ITSTPCMatchingQCParams.h +++ b/Detectors/GLOQC/include/GLOQC/ITSTPCMatchingQCParams.h @@ -17,26 +17,26 @@ #include "CommonUtils/ConfigurableParam.h" #include "CommonUtils/ConfigurableParamHelper.h" -namespace o2 -{ -namespace gloqc +namespace o2::gloqc { // There are configurable params for TPC-ITS matching struct ITSTPCMatchingQCParams : public o2::conf::ConfigurableParamHelper { - float minPtITSCut = 0.f; - float etaITSCut = 1e10f; + int nBinsPt = 100; + float minPtITSCut = 0.1; + float etaITSCut = 1.4; int32_t minNITSClustersCut = 0; - int32_t maxChi2PerClusterITS = 100000; - float minPtTPCCut = 0.1f; - float etaTPCCut = 0.9f; + float maxChi2PerClusterITS = 1e10; + float minPtTPCCut = 0.1; + float etaTPCCut = 1.4; int32_t minNTPCClustersCut = 60; - float minDCACut = 100.f; - float minDCACutY = 10.f; - float minPtCut = 0.f; - float maxPtCut = 1e10f; - float etaCut = 1.e10f; + float minDCACut = 100.; + float minDCACutY = 10.; + float minPtCut = 0.1; + float maxPtCut = 20; + float etaCut = 1.4; + float etaNo0Cut = 0.05; float cutK0Mass = 0.05f; float maxEtaK0 = 0.8f; float K0Scaling = 1.f; @@ -52,7 +52,7 @@ struct ITSTPCMatchingQCParams : public o2::conf::ConfigurableParamHelperstartPublishing(m1OverPtDen[i]); publisher->startPublishing(mFractionITSTPCmatch1OverPt[i]); + // 3D eta/phi/pt + publisher->startPublishing(mEtaPhiPtNum[i]); + publisher->startPublishing(mEtaPhiPtDen[i]); + if (mUseTrkPID) { // Vs Tracking PID hypothesis for (int j = 0; j < o2::track::PID::NIDs; ++j) { // Pt @@ -253,14 +262,14 @@ class MatchITSTPCQC // ITS track void setMinPtITSCut(float v) { mPtITSCut = v; }; void setEtaITSCut(float v) { mEtaITSCut = v; }; // TODO: define 2 different values for min and max (**) - void setMinNClustersITS(float v) { mMinNClustersITS = v; } + void setMinNClustersITS(int v) { mMinNClustersITS = v; } void setMaxChi2PerClusterITS(float v) { mMaxChi2PerClusterITS = v; } // TO DO: define an agreed way to implement the setter for ITS matching (min. # layers, which layers) // [...] --> exploit the method TrackCuts::setRequireHitsInITSLayers(...) // TPC track void setMinPtTPCCut(float v) { mPtTPCCut = v; }; void setEtaTPCCut(float v) { mEtaTPCCut = v; }; // TODO: define 2 different values for min and max (***) - void setMinNTPCClustersCut(float v) { mNTPCClustersCut = v; } + void setMinNTPCClustersCut(int v) { mNTPCClustersCut = v; } void setMinDCAtoBeamPipeCut(std::array v) { setMinDCAtoBeamPipeDistanceCut(v[0]); @@ -269,9 +278,11 @@ class MatchITSTPCQC void setMinDCAtoBeamPipeDistanceCut(float v) { mDCATPCCut = v; } void setMinDCAtoBeamPipeYCut(float v) { mDCATPCCutY = v; } // ITS-TPC kinematics + void setNBinsPt(int v) { mPtBins = v; } void setPtCut(float v) { mPtCut = v; } void setMaxPtCut(float v) { mPtMaxCut = v; } - void setEtaCut(float v) { mEtaCut = v; } // TODO: define 2 different values for min and max (*) + void setEtaCut(float v) { mEtaCut = v; } + void setEtaNo0Cut(float v) { mEtaNo0Cut = v; } // K0 void setMaxK0Eta(float v) { mMaxEtaK0 = v; } @@ -286,33 +297,11 @@ class MatchITSTPCQC void setK0Scaling(float v) { mK0Scaling = v; } float getK0Scaling() const { return mK0Scaling; } void setK0MaxDCA(float v) { mK0MaxDCA = v; } - float getK0MaxDCA() { return mK0MaxDCA; } + float getK0MaxDCA() const { return mK0MaxDCA; } void setK0MinCosPA(float v) { mK0MinCosPA = v; } float getK0MinCosPA() const { return mK0MinCosPA; } - void printParams() - { - LOG(info) << "minPtITSCut = " << mPtITSCut; - LOG(info) << "etaITSCut = " << mEtaITSCut; - LOG(info) << "minNITSClustersCut = " << mMinNClustersITS; - LOG(info) << "maxChi2PerClusterITS = " << mMaxChi2PerClusterITS; - LOG(info) << "minPtTPCCut = " << mPtTPCCut; - LOG(info) << "etaTPCCut = " << mEtaTPCCut; - LOG(info) << "minNTPCClustersCut = " << mNTPCClustersCut; - LOG(info) << "minDCACut = " << mDCATPCCut; - LOG(info) << "minDCACutY = " << mDCATPCCutY; - LOG(info) << "minPtCut = " << mPtCut; - LOG(info) << "maxPtCut = " << mPtMaxCut; - LOG(info) << "etaCut = " << mEtaCut; - LOG(info) << "cutK0Mass = " << mCutK0Mass; - LOG(info) << "maxEtaK0 = " << mMaxEtaK0; - LOG(info) << "minTPCOccpp = " << mMinTPCOccpp; - LOG(info) << "maxTPCOccpp = " << mMaxTPCOccpp; - LOG(info) << "nBinsTPCOccpp = " << mNBinsTPCOccpp; - LOG(info) << "minTPCOccPbPb = " << mMinTPCOccPbPb; - LOG(info) << "maxTPCOccPbPb = " << mMaxTPCOccPbPb; - LOG(info) << "nBinsTPCOccPbPb = " << mNBinsTPCOccPbPb; - } + void printParams() const; private: std::shared_ptr mDataRequest; @@ -406,8 +395,12 @@ class MatchITSTPCQC TH1D* m1OverPtPhysPrimNum[matchType::SIZE] = {}; TH1D* m1OverPtPhysPrimDen[matchType::SIZE] = {}; TEfficiency* mFractionITSTPCmatchPhysPrim1OverPt[matchType::SIZE] = {}; + // 3D Efficiency in eta/phi/pt + TH3F* mEtaPhiPtNum[matchType::SIZE] = {}; + TH3F* mEtaPhiPtDen[matchType::SIZE] = {}; - void setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool is2D = false); + template + void setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden); int mNTPCSelectedTracks = 0; int mNITSSelectedTracks = 0; @@ -415,11 +408,11 @@ class MatchITSTPCQC // cut values // ITS track - float mPtITSCut = 0.f; // min pT for ITS track - float mEtaITSCut = 1e10f; // eta window for ITS track --> TODO: define 2 different values for min and max (**) - int mMinNClustersITS = 0; // min number of ITS clusters - float mMaxChi2PerClusterITS{1e10f}; // max its fit chi2 per ITS cluster - std::vector>> mRequiredITSHits{}; // vector of ITS requirements (minNRequiredHits in specific requiredLayers) + float mPtITSCut = 0.1f; // min pT for ITS track + float mEtaITSCut = 1.4f; // eta window for ITS track --> TODO: define 2 different values for min and max (**) + int mMinNClustersITS = 0; // min number of ITS clusters + float mMaxChi2PerClusterITS{1e10f}; // max its fit chi2 per ITS cluster + std::vector>> mRequiredITSHits; // vector of ITS requirements (minNRequiredHits in specific requiredLayers) // TPC track float mPtTPCCut = 0.1f; // min pT for TPC track float mEtaTPCCut = 1.4f; // eta window for TPC track --> TODO: define 2 different values for min and max (***) @@ -427,10 +420,12 @@ class MatchITSTPCQC float mDCATPCCut = 100.f; // max DCA 3D to PV for TPC track float mDCATPCCutY = 10.f; // max DCA xy to PV for TPC track // ITS-TPC kinematics + int mPtBins = 100; float mPtCut = 0.1f; - float mPtMaxCut = 1e10f; - float mEtaCut = 1e10f; // 1e10f as defaults of Detectors/GlobalTracking/include/GlobalTracking/TrackCuts.h - // TODO: define 2 different values for min and max (*) + float mPtMaxCut = 20; + float mEtaCut = 1.4f; + float mEtaNo0Cut = 0.05f; + // TODO: define 2 different values for min and max (*) // for V0s o2::vertexing::DCAFitterN<2> mFitterV0; @@ -447,7 +442,7 @@ class MatchITSTPCQC std::shared_ptr mParam = nullptr; int mNHBPerTF = 0; int mNTPCOccBinLength = 0; ///< TPC occ. histo bin length in TBs - float mNTPCOccBinLengthInv; + float mNTPCOccBinLengthInv{}; std::vector mTBinClOcc; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting from the TB = i*mNTPCOccBinLength gsl::span mTPCRefitterOccMap; ///< externally set TPC clusters occupancy map bool mIsHI = false; @@ -462,7 +457,7 @@ class MatchITSTPCQC float mK0MaxDCA = 0.01; // max DCA to select the K0 float mK0MinCosPA = 0.995; // min cosPA to select the K0 - ClassDefNV(MatchITSTPCQC, 3); + ClassDefNV(MatchITSTPCQC, 4); }; } // namespace gloqc } // namespace o2 diff --git a/Detectors/GLOQC/src/MatchITSTPCQC.cxx b/Detectors/GLOQC/src/MatchITSTPCQC.cxx index 6e14f9eb16c8b..121df5e134817 100644 --- a/Detectors/GLOQC/src/MatchITSTPCQC.cxx +++ b/Detectors/GLOQC/src/MatchITSTPCQC.cxx @@ -9,28 +9,24 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include + #include "GLOQC/MatchITSTPCQC.h" #include "ReconstructionDataFormats/TrackTPCITS.h" #include "DataFormatsTPC/TrackTPC.h" -#include "Framework/InputSpec.h" -#include "ReconstructionDataFormats/TrackParametrization.h" #include "DetectorsBase/Propagator.h" #include "SimulationDataFormat/MCUtils.h" -#include -#include "TGraphAsymmErrors.h" #include "GlobalTracking/TrackCuts.h" #include +#include #include "ReconstructionDataFormats/PrimaryVertex.h" #include "ReconstructionDataFormats/V0.h" -// #include "GlobalTrackingStudy/V0Ext.h" #include "DetectorsVertexing/SVertexerParams.h" #include "Framework/InputRecord.h" #include "Framework/TimingInfo.h" #include "GPUO2InterfaceUtils.h" #include "CommonConstants/LHCConstants.h" -#include "DataFormatsTPC/Constants.h" #include "DetectorsCommonDataFormats/DetID.h" - #include "GPUO2InterfaceRefit.h" using namespace o2::gloqc; @@ -40,7 +36,6 @@ using DetID = o2::detectors::DetID; MatchITSTPCQC::~MatchITSTPCQC() { - deleteHistograms(); } @@ -116,6 +111,10 @@ void MatchITSTPCQC::deleteHistograms() delete m1OverPtPhysPrimNum[i]; delete m1OverPtPhysPrimDen[i]; delete mFractionITSTPCmatchPhysPrim1OverPt[i]; + + // 3D eta/phi/pt + delete mEtaPhiPtNum[i]; + delete mEtaPhiPtDen[i]; } // Residuals @@ -171,6 +170,10 @@ void MatchITSTPCQC::reset() m1OverPtNum[i]->Reset(); m1OverPtDen[i]->Reset(); + // 3D eta/phi/pt + mEtaPhiPtNum[i]->Reset(); + mEtaPhiPtDen[i]->Reset(); + if (mUseTrkPID) { // Vs Tracking PID hypothesis for (int j = 0; j < o2::track::PID::NIDs; ++j) { // Pt @@ -224,12 +227,12 @@ bool MatchITSTPCQC::init() { LOGP(debug, "Creating Variable Binning"); std::array title{"TPC", "ITS"}; - std::array etaSel{"", ", |eta| < 0.9"}; + std::array etaSel{Form(", |#eta| < %.1f", mEtaTPCCut), Form(", |#eta| < %.1f", mEtaCut)}; std::array maxNCls{156, 7}; // log binning for pT - const Int_t nbinsPt = 100; - const Double_t xminPt = 0.01; - const Double_t xmaxPt = 20; + const Int_t nbinsPt = mPtBins; + const Double_t xminPt = (mPtCut > 0) ? mPtCut : 0.01; + const Double_t xmaxPt = mPtMaxCut; Double_t* xbinsPt = new Double_t[nbinsPt + 1]; Double_t xlogminPt = TMath::Log10(xminPt); Double_t xlogmaxPt = TMath::Log10(xmaxPt); @@ -243,24 +246,24 @@ bool MatchITSTPCQC::init() // Data and MC for (int i = 0; i < matchType::SIZE; ++i) { // Pt - mPtNum[i] = new TH1D(Form("mPtNum_%s", title[i].c_str()), Form("Pt distribution of ITSTPC matched tracks, wrt %s tracks %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); + mPtNum[i] = new TH1D(Form("mPtNum_%s", title[i].c_str()), Form("Pt distribution of ITSTPC matched tracks, wrt %s tracks %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); mPtNum[i]->Sumw2(); mPtNum[i]->SetOption("logy"); mPtNum[i]->GetYaxis()->SetTitleOffset(1.4); - mPtDen[i] = new TH1D(Form("mPtDen_%s", title[i].c_str()), Form("Pt distribution of %s tracks %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); + mPtDen[i] = new TH1D(Form("mPtDen_%s", title[i].c_str()), Form("Pt distribution of %s tracks %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); mPtDen[i]->Sumw2(); mPtDen[i]->SetOption("logy"); mPtDen[i]->GetYaxis()->SetTitleOffset(1.4); - mFractionITSTPCmatch[i] = new TEfficiency(Form("mFractionITSTPCmatch_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt %s; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); - mPtNum_noEta0[i] = new TH1D(Form("mPtNum_noEta0_%s", title[i].c_str()), Form("Pt distribution of ITSTPC matched tracks without |eta| < 0.05, wrt %s tracks %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); + mFractionITSTPCmatch[i] = new TEfficiency(Form("mFractionITSTPCmatch_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt %s; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); + mPtNum_noEta0[i] = new TH1D(Form("mPtNum_noEta0_%s", title[i].c_str()), Form("Pt distribution of ITSTPC matched tracks without |eta| < %.2f, wrt %s tracks %s; Pt [GeV/c]; dNdPt", mEtaNo0Cut, title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); mPtNum_noEta0[i]->Sumw2(); mPtNum_noEta0[i]->SetOption("logy"); mPtNum_noEta0[i]->GetYaxis()->SetTitleOffset(1.4); - mPtDen_noEta0[i] = new TH1D(Form("mPtDen_noEta0_%s", title[i].c_str()), Form("Pt distribution of %s tracks without |eta| < 0.05 %s; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); + mPtDen_noEta0[i] = new TH1D(Form("mPtDen_noEta0_%s", title[i].c_str()), Form("Pt distribution of %s tracks without |eta| < %.2f %s; Pt [GeV/c]; dNdPt", title[i].c_str(), mEtaNo0Cut, etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); mPtDen_noEta0[i]->Sumw2(); mPtDen_noEta0[i]->SetOption("logy"); mPtDen_noEta0[i]->GetYaxis()->SetTitleOffset(1.4); - mFractionITSTPCmatch_noEta0[i] = new TEfficiency(Form("mFractionITSTPCmatch_noEta0_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt without |eta| < 0.05 %s; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f); + mFractionITSTPCmatch_noEta0[i] = new TEfficiency(Form("mFractionITSTPCmatch_noEta0_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt without |eta| < %.2f %s; Pt [GeV/c]; Eff", title[i].c_str(), mEtaNo0Cut, etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut); // Phi mPhiNum[i] = new TH1F(Form("mPhiNum_%s", title[i].c_str()), Form("Phi distribution of ITSTPC matched tracks, wrt %s tracks %s; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 2 * TMath::Pi()); @@ -268,11 +271,11 @@ bool MatchITSTPCQC::init() mPhiDen[i] = new TH1F(Form("mPhiDen_%s", title[i].c_str()), Form("Phi distribution of %s tracks %s; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 2 * TMath::Pi()); mPhiDen[i]->Sumw2(); mFractionITSTPCmatchPhi[i] = new TEfficiency(Form("mFractionITSTPCmatchPhi_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs Phi wrt %s tracks %s; Phi [rad]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 2 * TMath::Pi()); - mPhiVsPtNum[i] = new TH2F(Form("mPhiVsPtNum_%s", title[i].c_str()), Form("Phi vs Pt distribution of ITSTPC matched tracks wrt %s %s; #it{p}_{T} [GeV#it{c}]; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f, 100, 0.f, 2 * TMath::Pi()); + mPhiVsPtNum[i] = new TH2F(Form("mPhiVsPtNum_%s", title[i].c_str()), Form("Phi vs Pt distribution of ITSTPC matched tracks wrt %s %s; #it{p}_{T} [GeV#it{c}]; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, 0.f, 2 * TMath::Pi()); mPhiVsPtNum[i]->Sumw2(); - mPhiVsPtDen[i] = new TH2F(Form("mPhiVsPtDen_%s", title[i].c_str()), Form("Phi vs Pt distribution of %s tracks %s; #it{p}_{T} [GeV#it{c}]; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f, 100, 0.f, 2 * TMath::Pi()); + mPhiVsPtDen[i] = new TH2F(Form("mPhiVsPtDen_%s", title[i].c_str()), Form("Phi vs Pt distribution of %s tracks %s; #it{p}_{T} [GeV#it{c}]; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, 0.f, 2 * TMath::Pi()); mPhiVsPtDen[i]->Sumw2(); - mFractionITSTPCmatchPhiVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhiVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks %s, Phi vs Pt; #it{p}_{T} [GeV#it{c}]; Phi [rad]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, 0.f, 20.f, 100, 0.f, 2 * TMath::Pi()); + mFractionITSTPCmatchPhiVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhiVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks wrt %s tracks %s, Phi vs Pt; #it{p}_{T} [GeV#it{c}]; Phi [rad]; Eff", title[i].c_str(), etaSel[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, 0.f, 2 * TMath::Pi()); // Eta mEtaNum[i] = new TH1F(Form("mEtaNum_%s", title[i].c_str()), Form("Eta distribution of ITSTPC matched tracks, wrt %s tracks; Eta; dNdEta", title[i].c_str()), 100, -2.f, 2.f); @@ -282,25 +285,25 @@ bool MatchITSTPCQC::init() mEtaDen[i]->Sumw2(); mEtaDen[i]->GetYaxis()->SetTitleOffset(1.4); mFractionITSTPCmatchEta[i] = new TEfficiency(Form("mFractionITSTPCmatchEta_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks , wrt %s tracks, vs Eta; Eta; Eff", title[i].c_str()), 100, -2.f, 2.f); - mEtaVsPtNum[i] = new TH2F(Form("mEtaVsPtNum_%s", title[i].c_str()), Form("Eta vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; Eta", title[i].c_str()), 100, 0.f, 20.f, 100, -2.f, 2.f); + mEtaVsPtNum[i] = new TH2F(Form("mEtaVsPtNum_%s", title[i].c_str()), Form("Eta vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; Eta", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, -2.f, 2.f); mEtaVsPtNum[i]->Sumw2(); - mEtaVsPtDen[i] = new TH2F(Form("mEtaVsPtDen_%s", title[i].c_str()), Form("Eta vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; Eta", title[i].c_str()), 100, 0.f, 20.f, 100, -2.f, 2.f); + mEtaVsPtDen[i] = new TH2F(Form("mEtaVsPtDen_%s", title[i].c_str()), Form("Eta vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; Eta", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, -2.f, 2.f); mEtaVsPtDen[i]->Sumw2(); - mFractionITSTPCmatchEtaVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchEtaVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, Eta vs Pt; #it{p}_{T} [GeV#it{c}]; Eta; Eff", title[i].c_str()), 100, 0.f, 20.f, 100, -2.f, 2.f); + mFractionITSTPCmatchEtaVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchEtaVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, Eta vs Pt; #it{p}_{T} [GeV#it{c}]; Eta; Eff", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 100, -2.f, 2.f); // Clusters - mClsVsPtNum[i] = new TH2F(Form("mClsVsPtNum_%s", title[i].c_str()), Form("#Clusters vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; #Clusters", title[i].c_str()), 100, 0.f, 20.f, maxNCls[i], 0, maxNCls[i]); + mClsVsPtNum[i] = new TH2F(Form("mClsVsPtNum_%s", title[i].c_str()), Form("#Clusters vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; #Clusters", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, maxNCls[i], 0, maxNCls[i]); mClsVsPtNum[i]->Sumw2(); - mClsVsPtDen[i] = new TH2F(Form("mClsVsPtDen_%s", title[i].c_str()), Form("#Clusters vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; #Clusters", title[i].c_str()), 100, 0.f, 20.f, maxNCls[i], 0, maxNCls[i]); + mClsVsPtDen[i] = new TH2F(Form("mClsVsPtDen_%s", title[i].c_str()), Form("#Clusters vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; #Clusters", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, maxNCls[i], 0, maxNCls[i]); mClsVsPtDen[i]->Sumw2(); - mFractionITSTPCmatchClsVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchClsVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, #Clusters vs Pt; #it{p}_{T} [GeV#it{c}]; #Clusters; Eff", title[i].c_str()), 100, 0.f, 20.f, maxNCls[i], 0, maxNCls[i]); + mFractionITSTPCmatchClsVsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchClsVsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, #Clusters vs Pt; #it{p}_{T} [GeV#it{c}]; #Clusters; Eff", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, maxNCls[i], 0, maxNCls[i]); // Chi2 - mChi2VsPtNum[i] = new TH2F(Form("mChi2VsPtNum_%s", title[i].c_str()), Form("Chi2 vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; Chi2", title[i].c_str()), 100, 0.f, 20.f, 200, 0, 300); + mChi2VsPtNum[i] = new TH2F(Form("mChi2VsPtNum_%s", title[i].c_str()), Form("Chi2 vs Pt distribution of ITSTPC matched tracks, wrt %s tracks; #it{p}_{T} [GeV#it{c}]; Chi2", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 200, 0, 300); mChi2VsPtNum[i]->Sumw2(); - mChi2VsPtDen[i] = new TH2F(Form("mChi2VsPtDen_%s", title[i].c_str()), Form("Chi2 vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; Chi2", title[i].c_str()), 100, 0.f, 20.f, 200, 0, 300); + mChi2VsPtDen[i] = new TH2F(Form("mChi2VsPtDen_%s", title[i].c_str()), Form("Chi2 vs Pt distribution of %s tracks; #it{p}_{T} [GeV#it{c}]; Chi2", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 200, 0, 300); mChi2VsPtDen[i]->Sumw2(); - mFractionITSTPCmatchChi2VsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchChi2VsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, Chi2 vs Pt; #it{p}_{T} [GeV#it{c}]; Chi2; Eff", title[i].c_str()), 100, 0.f, 20.f, 200, 0, 300); + mFractionITSTPCmatchChi2VsPt[i] = new TEfficiency(Form("mFractionITSTPCmatchChi2VsPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks, wrt %s tracks, Chi2 vs Pt; #it{p}_{T} [GeV#it{c}]; Chi2; Eff", title[i].c_str()), mPtBins, mPtCut, mPtMaxCut, 200, 0, 300); // 1/pt m1OverPtNum[i] = new TH1D(Form("m1OverPtNum_%s", title[i].c_str()), Form("1/Pt distribution of matched tracks, wrt %s tracks %s; 1/Pt [c/GeV]; dNdPt", title[i].c_str(), etaSel[i].c_str()), 100, -20.f, 20.f); @@ -309,14 +312,20 @@ bool MatchITSTPCQC::init() m1OverPtDen[i]->Sumw2(); mFractionITSTPCmatch1OverPt[i] = new TEfficiency(Form("mFractionITSTPCmatch1OverPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs 1/Pt, wrt %s tracks %s; 1/Pt [c/GeV]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, -20.f, 20.f); + // 3d eta/phi/pt + mEtaPhiPtNum[i] = new TH3F(Form("mEtaPhiPtNum_%s", title[i].c_str()), Form("Numerator #eta vs #varphi vs #it{p}_{T}, wrt %s;#eta %s;#varphi;#it{p}_{T} [GeV#it{c}];Entries", title[i].c_str(), etaSel[i].c_str()), 100, -2., 2., 100, 0., 2 * TMath::Pi(), 100, 0.01, 20.); + mEtaPhiPtNum[i]->Sumw2(); + mEtaPhiPtDen[i] = new TH3F(Form("mEtaPhiPtDen_%s", title[i].c_str()), Form("Denominator #eta vs #varphi vs #it{p}_{T}, wrt %s;#eta %s;#varphi;#it{p}_{T} [GeV#it{c}];Entries", title[i].c_str(), etaSel[i].c_str()), 100, -2., 2., 100, 0., 2 * TMath::Pi(), 100, 0.01, 20.); + mEtaPhiPtDen[i]->Sumw2(); + if (mUseTrkPID) { // Vs Tracking PID hypothesis for (int j = 0; j < o2::track::PID::NIDs; ++j) { // Pt - mPtNumVsTrkPID[i][j] = new TH1D(Form("mPtNumVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Pt distribution of ITSTPC matched tracks, wrt %s tracks %s, TrkPID %i; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str(), j), 100, 0.f, 20.f); + mPtNumVsTrkPID[i][j] = new TH1D(Form("mPtNumVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Pt distribution of ITSTPC matched tracks, wrt %s tracks %s, TrkPID %i; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str(), j), mPtBins, mPtCut, mPtMaxCut); mPtNumVsTrkPID[i][j]->Sumw2(); - mPtDenVsTrkPID[i][j] = new TH1D(Form("mPtDenVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Pt distribution of %s tracks %s, TrkPID %i; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str(), j), 100, 0.f, 20.f); + mPtDenVsTrkPID[i][j] = new TH1D(Form("mPtDenVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Pt distribution of %s tracks %s, TrkPID %i; Pt [GeV/c]; dNdPt", title[i].c_str(), etaSel[i].c_str(), j), mPtBins, mPtCut, mPtMaxCut); mPtDenVsTrkPID[i][j]->Sumw2(); - mFractionITSTPCmatchPtVsTrkPID[i][j] = new TEfficiency(Form("mFractionITSTPCmatchPtVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt %s, TrkPID %i; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str(), j), 100, 0.f, 20.f); + mFractionITSTPCmatchPtVsTrkPID[i][j] = new TEfficiency(Form("mFractionITSTPCmatchPtVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Fraction of ITSTPC matched tracks wrt %s tracks vs Pt %s, TrkPID %i; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str(), j), mPtBins, mPtCut, mPtMaxCut); // Phi mPhiNumVsTrkPID[i][j] = new TH1D(Form("mPhiNumVsTrkPID_%s_PID%i", title[i].c_str(), j), Form("Phi distribution of ITSTPC matched tracks, wrt %s tracks %s, TrkPID %i; Phi [rad]; dNdPhi", title[i].c_str(), etaSel[i].c_str(), j), 100, 0.f, 2 * TMath::Pi()); @@ -335,7 +344,7 @@ bool MatchITSTPCQC::init() } } - mResidualPt = new TH2F("mResidualPt", "Residuals of ITS-TPC matching in #it{p}_{T}; #it{p}_{T}^{ITS-TPC} [GeV/c]; #it{p}_{T}^{ITS-TPC} - #it{p}_{T}^{TPC} [GeV/c]", 100, 0.f, 20.f, 100, -1.f, 1.f); + mResidualPt = new TH2F("mResidualPt", "Residuals of ITS-TPC matching in #it{p}_{T}; #it{p}_{T}^{ITS-TPC} [GeV/c]; #it{p}_{T}^{ITS-TPC} - #it{p}_{T}^{TPC} [GeV/c]", mPtBins, mPtCut, mPtMaxCut, 100, -1.f, 1.f); mResidualPhi = new TH2F("mResidualPhi", "Residuals of ITS-TPC matching in #it{#phi}; #it{#phi}^{ITS-TPC} [rad]; #it{#phi}^{ITS-TPC} - #it{#phi}^{TPC} [rad]", 100, 0.f, 2 * TMath::Pi(), 100, -1.f, 1.f); mResidualEta = new TH2F("mResidualEta", "Residuals of ITS-TPC matching in #it{#eta}; #it{#eta}^{ITS-TPC}; #it{#eta}^{ITS-TPC} - #it{#eta}^{TPC}", 100, -2.f, 2.f, 100, -1.f, 1.f); mChi2Matching = new TH1F("mChi2Matching", "Chi2 of matching; chi2", 200, 0, 300); @@ -377,11 +386,11 @@ bool MatchITSTPCQC::init() mPhiPhysPrimDen[i]->Sumw2(); mFractionITSTPCmatchPhysPrim[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs Pt (physical primary), wrt %s tracks %s; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str()), nbinsPt, xbinsPt); - m1OverPtPhysPrimNum[i] = new TH1D(Form("m1OverPtPhysPrimNum_%s", title[i].c_str()), Form("1/Pt distribution of matched tracks (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 100, -20.f, 20.f); + m1OverPtPhysPrimNum[i] = new TH1D(Form("m1OverPtPhysPrimNum_%s", title[i].c_str()), Form("1/Pt distribution of matched tracks (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); m1OverPtPhysPrimNum[i]->Sumw2(); - m1OverPtPhysPrimDen[i] = new TH1D(Form("m1OverPtPhysPrimDen_%s", title[i].c_str()), Form("1/PtPt distribution of %s tracks (physical primary) %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 100, -20.f, 20.f); + m1OverPtPhysPrimDen[i] = new TH1D(Form("m1OverPtPhysPrimDen_%s", title[i].c_str()), Form("1/PtPt distribution of %s tracks (physical primary) %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); m1OverPtPhysPrimDen[i]->Sumw2(); - mFractionITSTPCmatchPhysPrim1OverPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim1OverPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs 1/Pt (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; Eff", title[i].c_str(), etaSel[i].c_str()), 100, -20.f, 20.f); + mFractionITSTPCmatchPhysPrim1OverPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim1OverPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs 1/Pt (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; Eff", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); } } @@ -452,7 +461,7 @@ void MatchITSTPCQC::initDataRequest() mSrc &= mAllowedSources; - if (mSrc[GID::Source::ITSTPC] == 0 || mSrc[GID::Source::TPC] == 0 || mSrc[GID::Source::ITS] == 0) { + if (!mSrc[GID::Source::ITSTPC] || !mSrc[GID::Source::TPC] || !mSrc[GID::Source::ITS]) { LOG(fatal) << "We cannot do ITSTPC QC, some sources are missing, check sources in " << mSrc; } @@ -503,7 +512,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } static int evCount = 0; - mRecoCont.collectData(ctx, *mDataRequest.get()); + mRecoCont.collectData(ctx, *mDataRequest); mTPCTracks = mRecoCont.getTPCTracks(); mITSTracks = mRecoCont.getITSTracks(); mITSTPCTracks = mRecoCont.getTPCITSTracks(); @@ -566,7 +575,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) if (trk.getRefITS().getSource() != GID::ITS) { continue; } - if (isTPCTrackSelectedEntry[idxTrkTpc] == true) { + if (isTPCTrackSelectedEntry[idxTrkTpc]) { auto lbl = mRecoCont.getTrackMCLabel({(unsigned int)(itrk), GID::Source::ITSTPC}); if (!lbl.isValid()) { continue; @@ -577,9 +586,9 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) const std::vector& pcontainer = mcReader.getTracks(source, event); const o2::MCTrack& p = pcontainer[lbl.getTrackID()]; if (MCTrackNavigator::isPhysicalPrimary(p, pcontainer)) { - mMapLabels[matchType::TPC].insert({lbl, {itrk, true}}); + mMapLabels[matchType::TPC].insert({lbl, {.mIdx = itrk, .mIsPhysicalPrimary = true}}); } else { - mMapLabels[matchType::TPC].insert({lbl, {itrk, false}}); + mMapLabels[matchType::TPC].insert({lbl, {.mIdx = itrk, .mIsPhysicalPrimary = false}}); } } else { // winner (if more tracks have the same label) has the highest pt @@ -589,7 +598,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } } auto idxTrkIts = trk.getRefITS().getIndex(); - if (isITSTrackSelectedEntry[idxTrkIts] == true) { + if (isITSTrackSelectedEntry[idxTrkIts]) { auto lbl = mRecoCont.getTrackMCLabel({(unsigned int)(itrk), GID::Source::ITSTPC}); if (!lbl.isValid()) { continue; @@ -600,9 +609,9 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) const std::vector& pcontainer = mcReader.getTracks(source, event); const o2::MCTrack& p = pcontainer[lbl.getTrackID()]; if (MCTrackNavigator::isPhysicalPrimary(p, pcontainer)) { - mMapLabels[matchType::ITS].insert({lbl, {itrk, true}}); + mMapLabels[matchType::ITS].insert({lbl, {.mIdx = itrk, .mIsPhysicalPrimary = true}}); } else { - mMapLabels[matchType::ITS].insert({lbl, {itrk, false}}); + mMapLabels[matchType::ITS].insert({lbl, {.mIdx = itrk, .mIsPhysicalPrimary = false}}); } } else { // winner (if more tracks have the same label) has the highest pt @@ -624,27 +633,29 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) trkDen = mTPCTracks[trk.getRefTPC()]; } else { trkDen = mITSTracks[trk.getRefITS()]; - if (std::abs(trkDen.getEta()) > 0.9) { + if (std::abs(trkDen.getEta()) > mEtaITSCut) { // ITS track outside |eta | < 0.9, we don't fill pt, nor phi , nor phi vs pt histos isEtaITSOk = false; } } if (isEtaITSOk) { mPtNum[i]->Fill(trkDen.getPt()); - if (std::abs(trkDen.getEta()) > 0.05) { + if (std::abs(trkDen.getEta()) > mEtaNo0Cut) { mPtNum_noEta0[i]->Fill(trkDen.getPt()); } mPhiNum[i]->Fill(trkDen.getPhi()); mPhiVsPtNum[i]->Fill(trkDen.getPt(), trkDen.getPhi()); m1OverPtNum[i]->Fill(trkDen.getSign() * trkDen.getPtInv()); + mEtaPhiPtNum[i]->Fill(trkDen.getEta(), trkDen.getPhi(), trkDen.getPt()); // we fill also the denominator mPtDen[i]->Fill(trkDen.getPt()); - if (std::abs(trkDen.getEta()) > 0.05) { + if (std::abs(trkDen.getEta()) > mEtaNo0Cut) { mPtDen_noEta0[i]->Fill(trkDen.getPt()); } mPhiDen[i]->Fill(trkDen.getPhi()); mPhiVsPtDen[i]->Fill(trkDen.getPt(), trkDen.getPhi()); m1OverPtDen[i]->Fill(trkDen.getSign() * trkDen.getPtInv()); + mEtaPhiPtDen[i]->Fill(trkDen.getEta(), trkDen.getPhi(), trkDen.getPt()); if (mUseTrkPID) { // Vs Tracking PID hypothesis mPtNumVsTrkPID[i][trkDen.getPID()]->Fill(trkDen.getPt()); mPhiNumVsTrkPID[i][trkDen.getPID()]->Fill(trkDen.getPhi()); @@ -708,13 +719,13 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) std::array title{"TPC", "ITS"}; for (int i = 0; i < matchType::SIZE; ++i) { o2::track::TrackParCov trkRef; - int idxTrkRef; + unsigned int idxTrkRef{0}; bool fillHisto = false; bool isEtaITSOk = true; if (i == matchType::TPC) { trkRef = mTPCTracks[trk.getRefTPC()]; idxTrkRef = trk.getRefTPC().getIndex(); - if (isTPCTrackSelectedEntry[idxTrkRef] == true) { + if (isTPCTrackSelectedEntry[idxTrkRef]) { fillHisto = true; ++mNITSTPCSelectedTracks[i]; } @@ -730,25 +741,25 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } trkRef = mITSTracks[trk.getRefITS()]; LOG(debug) << "Checking track (ITS) with id " << idxTrkRef << " for ITSTPC track " << iITSTPC << " and pt = " << trkRef.getPt(); - if (isITSTrackSelectedEntry[idxTrkRef] == true) { + if (isITSTrackSelectedEntry[idxTrkRef]) { LOG(debug) << "Track was selected (ITS), with id " << idxTrkRef << " for ITSTPC track " << iITSTPC << " , we keep it in the numerator, pt = " << trkRef.getPt(); fillHisto = true; ++mNITSTPCSelectedTracks[i]; } else { LOG(debug) << "Track was not selected (ITS), with id " << idxTrkRef << " for ITSTPC track " << iITSTPC << " , we don't keep it in the numerator, pt = " << trkRef.getPt(); } - if (std::abs(trkRef.getEta()) > 0.9) { + if (std::abs(trkRef.getEta()) > mEtaITSCut) { // ITS track outside |eta | < 0.9, we don't fill pt, nor phi , nor phi vs pt histos isEtaITSOk = false; LOG(debug) << "Track (ITS), with id " << idxTrkRef << " for ITSTPC track " << iITSTPC << " will be discarded when filling pt of phi related histograms, since eta = " << trkRef.getEta() << " , we don't keep it in the numerator, pt = " << trkRef.getPt(); } } - if (fillHisto == true) { + if (fillHisto) { if (!mUseMC) { LOG(debug) << "Filling num (" << title[i] << ") with track with id " << idxTrkRef << " for ITSTPC track " << iITSTPC << " with pt = " << trkRef.getPt(); if (isEtaITSOk) { mPtNum[i]->Fill(trkRef.getPt()); - if (std::abs(trkRef.getEta()) > 0.05) { + if (std::abs(trkRef.getEta()) > mEtaNo0Cut) { mPtNum_noEta0[i]->Fill(trkRef.getPt()); } mPhiNum[i]->Fill(trkRef.getPhi()); @@ -758,6 +769,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } mPhiVsPtNum[i]->Fill(trkRef.getPt(), trkRef.getPhi()); m1OverPtNum[i]->Fill(trkRef.getSign() * trkRef.getPtInv()); + mEtaPhiPtNum[i]->Fill(trkRef.getEta(), trkRef.getPhi(), trkRef.getPt()); } mEtaNum[i]->Fill(trkRef.getEta()); if (mUseTrkPID) { // Vs Tracking PID hypothesis @@ -782,7 +794,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) mChi2Refit->Fill(trk.getChi2Refit()); mTimeResVsPt->Fill(trkRef.getPt(), trk.getTimeMUS().getTimeStampError()); math_utils::Point3D v{}; - std::array dca; + std::array dca{-999, -999}; if (trkRef.propagateParamToDCA(v, mBz, &dca)) { mDCAr->Fill(dca[0]); if (!mUseMC) { @@ -807,7 +819,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) // track with the highest number of TPC clusters for (int itrk = 0; itrk < static_cast(mTPCTracks.size()); ++itrk) { auto const& trk = mTPCTracks[itrk]; - if (isTPCTrackSelectedEntry[itrk] == true) { + if (isTPCTrackSelectedEntry[itrk]) { auto lbl = mRecoCont.getTrackMCLabel({(unsigned int)(itrk), GID::Source::TPC}); if (!lbl.isValid()) { continue; @@ -839,7 +851,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) // track with the highest number of ITS clusters for (int itrk = 0; itrk < static_cast(mITSTracks.size()); ++itrk) { auto const& trk = mITSTracks[itrk]; - if (isITSTrackSelectedEntry[itrk] == true) { + if (isITSTrackSelectedEntry[itrk]) { auto lbl = mRecoCont.getTrackMCLabel({(unsigned int)(itrk), GID::Source::ITS}); if (!lbl.isValid()) { continue; @@ -873,11 +885,12 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) for (auto const& el : mMapRefLabels[matchType::TPC]) { auto const& trk = mTPCTracks[el.second.mIdx]; mPtDen[matchType::TPC]->Fill(trk.getPt()); - if (std::abs(trk.getEta()) > 0.05) { + if (std::abs(trk.getEta()) > mEtaNo0Cut) { mPtDen_noEta0[matchType::TPC]->Fill(trk.getPt()); } mPhiDen[matchType::TPC]->Fill(trk.getPhi()); mPhiVsPtDen[matchType::TPC]->Fill(trk.getPt(), trk.getPhi()); + mEtaPhiPtDen[matchType::TPC]->Fill(trk.getEta(), trk.getPhi(), trk.getPt()); mEtaDen[matchType::TPC]->Fill(trk.getEta()); mEtaVsPtDen[matchType::TPC]->Fill(trk.getPt(), trk.getEta()); m1OverPtDen[matchType::TPC]->Fill(trk.getSign() * trk.getPtInv()); @@ -898,13 +911,14 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } for (auto const& el : mMapRefLabels[matchType::ITS]) { auto const& trk = mITSTracks[el.second.mIdx]; - if (std::abs(trk.getEta()) < 0.9) { + if (std::abs(trk.getEta()) < mEtaITSCut) { mPtDen[matchType::ITS]->Fill(trk.getPt()); - if (std::abs(trk.getEta()) > 0.05) { + if (std::abs(trk.getEta()) > mEtaNo0Cut) { mPtDen_noEta0[matchType::ITS]->Fill(trk.getPt()); } mPhiDen[matchType::ITS]->Fill(trk.getPhi()); mPhiVsPtDen[matchType::ITS]->Fill(trk.getPt(), trk.getPhi()); + mEtaPhiPtDen[matchType::ITS]->Fill(trk.getEta(), trk.getPhi(), trk.getPt()); m1OverPtDen[matchType::ITS]->Fill(trk.getSign() * trk.getPtInv()); } mEtaDen[matchType::ITS]->Fill(trk.getEta()); @@ -912,7 +926,7 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) mClsVsPtDen[matchType::ITS]->Fill(trk.getPt(), trk.getNClusters()); mChi2VsPtDen[matchType::ITS]->Fill(trk.getPt(), trk.getChi2()); if (el.second.mIsPhysicalPrimary) { - if (std::abs(trk.getEta()) < 0.9) { + if (std::abs(trk.getEta()) < mEtaITSCut) { mPtPhysPrimDen[matchType::ITS]->Fill(trk.getPt()); mPhiPhysPrimDen[matchType::ITS]->Fill(trk.getPhi()); m1OverPtPhysPrimDen[matchType::ITS]->Fill(trk.getSign() * trk.getPtInv()); @@ -925,16 +939,17 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) // if we are in data, we loop over all tracks (no check on the label) for (size_t itrk = 0; itrk < mTPCTracks.size(); ++itrk) { auto const& trk = mTPCTracks[itrk]; - if (isTPCTrackSelectedEntry[itrk] == true) { + if (isTPCTrackSelectedEntry[itrk]) { LOG(debug) << "Filling den (TPC) with track with pt = " << trk.getPt(); mPtDen[matchType::TPC]->Fill(trk.getPt()); - if (std::abs(trk.getEta()) > 0.05) { + if (std::abs(trk.getEta()) > mEtaNo0Cut) { mPtDen_noEta0[matchType::TPC]->Fill(trk.getPt()); } else { LOG(debug) << "Track (ITS) " << itrk << " with pt = " << trk.getPt() << " and eta = " << trk.getEta() << " not used for den pt, phi, phi vs pt, 1.pt histos"; } mPhiDen[matchType::TPC]->Fill(trk.getPhi()); mPhiVsPtDen[matchType::TPC]->Fill(trk.getPt(), trk.getPhi()); + mEtaPhiPtDen[matchType::TPC]->Fill(trk.getEta(), trk.getPhi(), trk.getPt()); mEtaDen[matchType::TPC]->Fill(trk.getEta()); mEtaVsPtDen[matchType::TPC]->Fill(trk.getPt(), trk.getEta()); m1OverPtDen[matchType::TPC]->Fill(trk.getSign() * trk.getPtInv()); @@ -951,15 +966,16 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) for (size_t itrk = 0; itrk < mITSTracks.size(); ++itrk) { auto const& trk = mITSTracks[itrk]; LOG(debug) << "Checking den for track (ITS) " << itrk << " with pt " << trk.getPt() << " and eta = " << trk.getEta(); - if (isITSTrackSelectedEntry[itrk] == true) { - if (std::abs(trk.getEta()) < 0.9) { + if (isITSTrackSelectedEntry[itrk]) { + if (std::abs(trk.getEta()) < mEtaITSCut) { LOG(debug) << "Filling den for track (ITS) " << itrk << " with pt = " << trk.getPt() << " and eta = " << trk.getEta(); mPtDen[matchType::ITS]->Fill(trk.getPt()); - if (std::abs(trk.getEta()) > 0.05) { + if (std::abs(trk.getEta()) > mEtaNo0Cut) { mPtDen_noEta0[matchType::ITS]->Fill(trk.getPt()); } mPhiDen[matchType::ITS]->Fill(trk.getPhi()); mPhiVsPtDen[matchType::ITS]->Fill(trk.getPt(), trk.getPhi()); + mEtaPhiPtDen[matchType::ITS]->Fill(trk.getEta(), trk.getPhi(), trk.getPt()); m1OverPtDen[matchType::ITS]->Fill(trk.getSign() * trk.getPtInv()); } else { LOG(debug) << "Track (ITS) " << itrk << " with pt = " << trk.getPt() << " and eta = " << trk.getEta() << " not used for num pt, phi, phi vs pt, 1.pt histos"; @@ -1189,39 +1205,17 @@ void MatchITSTPCQC::finalize() // first we use denominators and nominators to set the TEfficiency; later they are scaled - // some checks + // filling the efficiency for (int ti = 0; ti < matchType::SIZE; ++ti) { - for (int i = 0; i < mPtDen[ti]->GetNbinsX(); ++i) { - if (mPtDen[ti]->GetBinContent(i + 1) < mPtNum[ti]->GetBinContent(i + 1)) { - LOG(error) << title[ti] << ": bin " << i + 1 << " in [" << mPtNum[ti]->GetBinLowEdge(i + 1) << " , " << mPtNum[ti]->GetBinLowEdge(i + 1) + mPtNum[ti]->GetBinWidth(i + 1) << "]: mPtDen[i] = " << mPtDen[ti]->GetBinContent(i + 1) << ", mPtNum[i] = " << mPtNum[ti]->GetBinContent(i + 1); - } - } - for (int i = 0; i < mPtDen_noEta0[ti]->GetNbinsX(); ++i) { - if (mPtDen_noEta0[ti]->GetBinContent(i + 1) < mPtNum_noEta0[ti]->GetBinContent(i + 1)) { - LOG(error) << title[ti] << ": bin " << i + 1 << " in [" << mPtNum_noEta0[ti]->GetBinLowEdge(i + 1) << " , " << mPtNum_noEta0[ti]->GetBinLowEdge(i + 1) + mPtNum_noEta0[ti]->GetBinWidth(i + 1) << "]: mPtDen_noEta0[i] = " << mPtDen_noEta0[ti]->GetBinContent(i + 1) << ", mPtNum_noEta0[i] = " << mPtNum_noEta0[ti]->GetBinContent(i + 1); - } - } - for (int i = 0; i < mPhiDen[ti]->GetNbinsX(); ++i) { - if (mPhiDen[ti]->GetBinContent(i + 1) < mPhiNum[ti]->GetBinContent(i + 1)) { - LOG(error) << title[ti] << ": bin " << i + 1 << " in [" << mPhiNum[ti]->GetBinLowEdge(i + 1) << " , " << mPhiNum[ti]->GetBinLowEdge(i + 1) + mPhiNum[ti]->GetBinWidth(i + 1) << "]: mPhiDen[i] = " << mPhiDen[ti]->GetBinContent(i + 1) << ", mPhiNum[i] = " << mPhiNum[ti]->GetBinContent(i + 1); - } - } - for (int i = 0; i < mEtaDen[ti]->GetNbinsX(); ++i) { - if (mEtaDen[ti]->GetBinContent(i + 1) < mEtaNum[ti]->GetBinContent(i + 1)) { - LOG(error) << title[ti] << ": bin " << i + 1 << " in [" << mEtaNum[ti]->GetBinLowEdge(i + 1) << " , " << mEtaNum[ti]->GetBinLowEdge(i + 1) + mEtaNum[ti]->GetBinWidth(i + 1) << "]: mEtaDen[i] = " << mEtaDen[ti]->GetBinContent(i + 1) << ", mEtaNum[i] = " << mEtaNum[ti]->GetBinContent(i + 1); - } - } - - // filling the efficiency setEfficiency(mFractionITSTPCmatch[ti], mPtNum[ti], mPtDen[ti]); setEfficiency(mFractionITSTPCmatch_noEta0[ti], mPtNum_noEta0[ti], mPtDen_noEta0[ti]); setEfficiency(mFractionITSTPCmatchPhi[ti], mPhiNum[ti], mPhiDen[ti]); setEfficiency(mFractionITSTPCmatchEta[ti], mEtaNum[ti], mEtaDen[ti]); - setEfficiency(mFractionITSTPCmatchPhiVsPt[ti], mPhiVsPtNum[ti], mPhiVsPtDen[ti], true); - setEfficiency(mFractionITSTPCmatchEtaVsPt[ti], mEtaVsPtNum[ti], mEtaVsPtDen[ti], true); + setEfficiency<2>(mFractionITSTPCmatchPhiVsPt[ti], mPhiVsPtNum[ti], mPhiVsPtDen[ti]); + setEfficiency<2>(mFractionITSTPCmatchEtaVsPt[ti], mEtaVsPtNum[ti], mEtaVsPtDen[ti]); setEfficiency(mFractionITSTPCmatch1OverPt[ti], m1OverPtNum[ti], m1OverPtDen[ti]); - setEfficiency(mFractionITSTPCmatchClsVsPt[ti], mClsVsPtNum[ti], mClsVsPtDen[ti], true); - setEfficiency(mFractionITSTPCmatchChi2VsPt[ti], mChi2VsPtNum[ti], mChi2VsPtDen[ti], true); + setEfficiency<2>(mFractionITSTPCmatchClsVsPt[ti], mClsVsPtNum[ti], mClsVsPtDen[ti]); + setEfficiency<2>(mFractionITSTPCmatchChi2VsPt[ti], mChi2VsPtNum[ti], mChi2VsPtDen[ti]); if (mUseTrkPID) { // Vs Tracking PID hypothesis for (int j = 0; j < o2::track::PID::NIDs; ++j) { setEfficiency(mFractionITSTPCmatchPtVsTrkPID[ti][j], mPtNumVsTrkPID[ti][j], mPtDenVsTrkPID[ti][j]); @@ -1236,7 +1230,7 @@ void MatchITSTPCQC::finalize() setEfficiency(mFractionITSTPCmatchPhysPrim1OverPt[ti], m1OverPtPhysPrimNum[ti], m1OverPtPhysPrimDen[ti]); } } - setEfficiency(mFractionITSTPCmatchDCArVsPt, mDCArVsPtNum, mDCArVsPtDen, true); + setEfficiency<2>(mFractionITSTPCmatchDCArVsPt, mDCArVsPtNum, mDCArVsPtDen); /* mPtTPC->Scale(scaleFactTPC); mPt->Scale(scaleFactITSTPC); @@ -1256,9 +1250,10 @@ void MatchITSTPCQC::finalize() } //__________________________________________________________ - -void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool is2D) +template +void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden) { + // Trivial check if we initalized if (eff == nullptr) { LOG(fatal) << "Cannot get TEfficiency object "; } @@ -1270,11 +1265,11 @@ void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool i } // we need to force to replace the total histogram, otherwise it will compare it to the previous passed one, and it might get an error of inconsistency in the bin contents - if constexpr (false) { // checking + if constexpr (DEBUG) { // checking bool bad{false}; - LOG(debug) << "Setting efficiency " << eff->GetName() << " from " << hnum->GetName() << " and " << hden->GetName(); - LOG(debug) << "Num " << hnum->GetName() << " " << hnum->GetNbinsX() << " " << hnum->GetNbinsY() << " with " << hnum->GetEntries() << " entries"; - LOG(debug) << "Den " << hden->GetName() << " " << hden->GetNbinsX() << " " << hden->GetNbinsY() << " with " << hden->GetEntries() << " entries"; + LOG(info) << "Setting efficiency " << eff->GetName() << " from " << hnum->GetName() << " and " << hden->GetName(); + LOG(info) << "Num " << hnum->GetName() << " " << hnum->GetNbinsX() << " " << hnum->GetNbinsY() << " with " << hnum->GetEntries() << " entries"; + LOG(info) << "Den " << hden->GetName() << " " << hden->GetNbinsX() << " " << hden->GetNbinsY() << " with " << hden->GetEntries() << " entries"; if (hnum->GetDimension() != hden->GetDimension()) { LOGP(warning, "Histograms have different dimensions (num={} to den={})", hnum->GetDimension(), hden->GetDimension()); bad = true; @@ -1283,14 +1278,18 @@ void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool i LOGP(warning, "Histograms do not have a compatible binning"); bad = true; } - if (!is2D) { + if constexpr (DIM == 3) { for (int i = 1; i <= hden->GetNbinsX(); i++) { - if (hden->GetBinContent(i) < hnum->GetBinContent(i)) { - LOG(warning) << "bin " << i << " den: " << hden->GetBinContent(i) << " < num: " << hnum->GetBinContent(i) << " should be the opposite"; - bad = true; + for (int j = 1; j <= hden->GetNbinsY(); j++) { + for (int k = 1; k <= hden->GetNbinsZ(); k++) { + if (hden->GetBinContent(i, j, k) < hnum->GetBinContent(i, j, k)) { + LOGP(warning, "bin {}/{}/{} -> den: {} < num: {}", i, j, k, hden->GetBinContent(i, j, k), hnum->GetBinContent(i, j, k)); + bad = true; + } + } } } - } else { + } else if constexpr (DIM == 2) { for (int i = 1; i <= hden->GetNbinsX(); i++) { for (int j = 1; j <= hden->GetNbinsY(); j++) { if (hden->GetBinContent(i, j) < hnum->GetBinContent(i, j)) { @@ -1299,9 +1298,19 @@ void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool i } } } + } else { + for (int i = 1; i <= hden->GetNbinsX(); i++) { + if (hden->GetBinContent(i) < hnum->GetBinContent(i)) { + LOG(warning) << "bin " << i << " den: " << hden->GetBinContent(i) << " < num: " << hnum->GetBinContent(i) << " should be the opposite"; + bad = true; + } + } } if (bad) { + LOG(info) << " `--> Histogram is bad!"; return; + } else { + LOG(info) << " `--> Histogram is good!"; } } // we need to force to replace the total histogram, otherwise it will compare it to the previous passed one, and it might get an error of inconsistency in the bin contents @@ -1311,7 +1320,9 @@ void MatchITSTPCQC::setEfficiency(TEfficiency* eff, TH1* hnum, TH1* hden, bool i if (!eff->SetPassedHistogram(*hnum, "")) { LOG(fatal) << "Something went wrong when defining the efficiency numerator " << eff->GetName() << " from " << hnum->GetName(); } - if (is2D) { + if constexpr (DIM == 3) { + eff->SetTitle(Form("%s;%s;%s;%s;%s", eff->GetTitle(), hnum->GetXaxis()->GetTitle(), hnum->GetYaxis()->GetTitle(), hnum->GetZaxis()->GetTitle(), "Efficiency")); + } else if constexpr (DIM == 2) { eff->SetTitle(Form("%s;%s;%s;%s", eff->GetTitle(), hnum->GetXaxis()->GetTitle(), hnum->GetYaxis()->GetTitle(), "Efficiency")); } else { eff->SetTitle(Form("%s;%s;%s", eff->GetTitle(), hnum->GetXaxis()->GetTitle(), "Efficiency")); @@ -1392,6 +1403,9 @@ void MatchITSTPCQC::getHistos(TObjArray& objar) objar.Add(m1OverPtPhysPrimNum[i]); objar.Add(m1OverPtPhysPrimDen[i]); objar.Add(mFractionITSTPCmatchPhysPrim1OverPt[i]); + + objar.Add(mEtaPhiPtNum[i]); + objar.Add(mEtaPhiPtDen[i]); } objar.Add(mChi2Matching); objar.Add(mChi2Refit); @@ -1408,3 +1422,30 @@ void MatchITSTPCQC::getHistos(TObjArray& objar) objar.Add(mK0MassVsPtVsOccpp); objar.Add(mK0MassVsPtVsOccPbPb); } + +void MatchITSTPCQC::printParams() const +{ + LOG(info) << "MatchITSTPCQC parameters:"; + LOG(info) << " - minPtBins = " << mPtBins; + LOG(info) << " - minPtITSCut = " << mPtITSCut; + LOG(info) << " - etaITSCut = " << mEtaITSCut; + LOG(info) << " - minNITSClustersCut = " << mMinNClustersITS; + LOG(info) << " - maxChi2PerClusterITS = " << mMaxChi2PerClusterITS; + LOG(info) << " - minPtTPCCut = " << mPtTPCCut; + LOG(info) << " - etaTPCCut = " << mEtaTPCCut; + LOG(info) << " - minNTPCClustersCut = " << mNTPCClustersCut; + LOG(info) << " - mEtaNo0Cut = " << mEtaNo0Cut; + LOG(info) << " - minDCACut = " << mDCATPCCut; + LOG(info) << " - minDCACutY = " << mDCATPCCutY; + LOG(info) << " - minPtCut = " << mPtCut; + LOG(info) << " - maxPtCut = " << mPtMaxCut; + LOG(info) << " - etaCut = " << mEtaCut; + LOG(info) << " - cutK0Mass = " << mCutK0Mass; + LOG(info) << " - maxEtaK0 = " << mMaxEtaK0; + LOG(info) << " - minTPCOccpp = " << mMinTPCOccpp; + LOG(info) << " - maxTPCOccpp = " << mMaxTPCOccpp; + LOG(info) << " - nBinsTPCOccpp = " << mNBinsTPCOccpp; + LOG(info) << " - minTPCOccPbPb = " << mMinTPCOccPbPb; + LOG(info) << " - maxTPCOccPbPb = " << mMaxTPCOccPbPb; + LOG(info) << " - nBinsTPCOccPbPb = " << mNBinsTPCOccPbPb; +} diff --git a/Detectors/GlobalTracking/include/GlobalTracking/TrackCuts.h b/Detectors/GlobalTracking/include/GlobalTracking/TrackCuts.h index 215e5e8a72f63..eaafcca527d7d 100644 --- a/Detectors/GlobalTracking/include/GlobalTracking/TrackCuts.h +++ b/Detectors/GlobalTracking/include/GlobalTracking/TrackCuts.h @@ -56,7 +56,7 @@ class TrackCuts /// ITS void setMinPtITSCut(float value) { mPtITSCut = value; } void setEtaITSCut(float value) { mEtaITSCut = value; } - void setMinNClustersITS(float value) { mMinNClustersITS = value; } + void setMinNClustersITS(int32_t value) { mMinNClustersITS = value; } void setMaxChi2PerClusterITS(float value) { mMaxChi2PerClusterITS = value; } void setRequireHitsInITSLayers(int8_t minNRequiredHits, std::set requiredLayers) { diff --git a/Detectors/GlobalTrackingWorkflow/qc/src/ITSTPCMatchingQCSpec.cxx b/Detectors/GlobalTrackingWorkflow/qc/src/ITSTPCMatchingQCSpec.cxx index 2f9ca99d29a79..db61300c4cf60 100644 --- a/Detectors/GlobalTrackingWorkflow/qc/src/ITSTPCMatchingQCSpec.cxx +++ b/Detectors/GlobalTrackingWorkflow/qc/src/ITSTPCMatchingQCSpec.cxx @@ -49,6 +49,7 @@ void ITSTPCMatchingQCDevice::init(InitContext& /*ic*/) mMatchITSTPCQC->setPtCut(params.minPtCut); mMatchITSTPCQC->setMaxPtCut(params.maxPtCut); mMatchITSTPCQC->setEtaCut(params.etaCut); + mMatchITSTPCQC->setEtaNo0Cut(params.etaNo0Cut); mMatchITSTPCQC->setCutK0Mass(params.cutK0Mass); mMatchITSTPCQC->setMaxK0Eta(params.maxEtaK0); mMatchITSTPCQC->setK0Scaling(params.K0Scaling); From edf695d1ef5d0793d4270a361826081ce3c58d7b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 19 Jan 2025 22:39:54 +0100 Subject: [PATCH 0040/1914] GPU: Remove all AliRoot code --- .../TPC/include/DataFormatsTPC/PIDResponse.h | 2 - .../TRD/include/DataFormatsTRD/Tracklet64.h | 2 +- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 2 +- .../include/GlobalTracking/TrackMethods.h | 2 +- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 2 +- .../study/src/TrackingStudy.cxx | 2 +- .../studies/include/ITSStudies/TrackCuts.h | 2 +- .../studies/include/ITSStudies/TrackMethods.h | 2 +- .../TPCCalibration/CorrectionMapsLoader.h | 2 +- .../TPC/calibration/src/CalculatedEdx.cxx | 2 +- Detectors/TPC/calibration/src/DigitAdd.cxx | 6 +- Detectors/TPC/calibration/src/TrackDump.cxx | 10 +- .../TPC/monitor/src/SimpleEventDisplayGUI.cxx | 2 +- Detectors/TPC/workflow/src/TPCRefitter.cxx | 2 +- GPU/CMakeLists.txt | 7 +- GPU/Common/CMakeLists.txt | 23 - GPU/Common/GPUCommonAlgorithm.h | 16 +- GPU/Common/GPUCommonAlgorithmThrust.h | 4 +- GPU/Common/GPUCommonConstants.h | 2 +- GPU/Common/GPUCommonDef.h | 10 +- GPU/Common/GPUCommonDefAPI.h | 4 +- GPU/Common/GPUCommonLogger.h | 2 +- GPU/Common/GPUCommonMath.h | 4 +- GPU/Common/GPUCommonTransform3D.h | 4 +- GPU/GPUTracking/Base/GPUConstantMem.h | 27 +- GPU/GPUTracking/Base/GPUGeneralKernels.cxx | 2 +- GPU/GPUTracking/Base/GPUGeneralKernels.h | 4 +- GPU/GPUTracking/Base/GPUKernelDebugOutput.cxx | 2 +- GPU/GPUTracking/Base/GPUKernelDebugOutput.h | 4 +- GPU/GPUTracking/Base/GPUMemoryResource.cxx | 2 +- GPU/GPUTracking/Base/GPUMemoryResource.h | 4 +- GPU/GPUTracking/Base/GPUParam.cxx | 86 +-- GPU/GPUTracking/Base/GPUParam.h | 5 +- GPU/GPUTracking/Base/GPUParam.inc | 4 +- GPU/GPUTracking/Base/GPUParamRTC.h | 4 +- GPU/GPUTracking/Base/GPUProcessor.cxx | 2 +- GPU/GPUTracking/Base/GPUProcessor.h | 4 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 +- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 4 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 4 +- .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionConvert.h | 4 +- .../Base/GPUReconstructionDeviceBase.cxx | 9 +- .../Base/GPUReconstructionDeviceBase.h | 4 +- .../Base/GPUReconstructionHelpers.h | 4 +- .../Base/GPUReconstructionIncludes.h | 5 - ...ReconstructionIncludesDeviceAll.template.h | 6 +- .../GPUReconstructionKernelList.template.h | 8 - .../Base/GPUReconstructionKernels.h | 4 +- .../Base/GPUReconstructionLibrary.cxx | 2 +- .../Base/GPUReconstructionTimeframe.cxx | 4 +- .../Base/GPUReconstructionTimeframe.h | 4 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 183 ++--- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 6 +- .../Base/cuda/GPUReconstructionCUDA.cu | 6 +- .../Base/cuda/GPUReconstructionCUDA.h | 8 +- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 6 +- .../cuda/GPUReconstructionCUDAInternals.h | 4 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 172 ++--- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 19 - .../Base/opencl/GPUReconstructionOCL.cxx | 2 +- .../Base/opencl/GPUReconstructionOCL.h | 8 +- .../opencl/GPUReconstructionOCLInternals.h | 4 +- GPU/GPUTracking/CMakeLists.txt | 134 +--- .../AliHLTTPCClusterStatComponent.cxx | 718 ------------------ .../AliHLTTPCClusterStatComponent.h | 98 --- .../DataCompression/GPUTPCClusterRejection.h | 4 +- .../GPUTPCClusterStatistics.cxx | 2 +- .../DataCompression/GPUTPCClusterStatistics.h | 4 +- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../DataCompression/GPUTPCCompression.h | 4 +- .../GPUTPCCompressionKernels.cxx | 2 +- .../GPUTPCCompressionKernels.h | 4 +- .../GPUTPCCompressionTrackModel.cxx | 3 +- .../GPUTPCCompressionTrackModel.h | 4 +- .../DataCompression/GPUTPCDecompression.cxx | 2 +- .../DataCompression/GPUTPCDecompression.h | 4 +- .../GPUTPCDecompressionKernels.cxx | 2 +- .../GPUTPCDecompressionKernels.h | 4 +- .../TPCClusterDecompressionCore.inc | 4 +- .../TPCClusterDecompressor.cxx | 2 +- .../DataCompression/TPCClusterDecompressor.h | 4 +- .../DataTypes/CalibdEdxContainer.cxx | 2 +- .../DataTypes/CalibdEdxContainer.h | 2 - .../CalibdEdxTrackTopologySpline.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUConfigDump.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUConfigDump.h | 4 +- GPU/GPUTracking/DataTypes/GPUDataTypes.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 8 +- GPU/GPUTracking/DataTypes/GPUHostDataTypes.h | 4 +- .../DataTypes/GPUMemorySizeScalers.cxx | 2 +- .../DataTypes/GPUMemorySizeScalers.h | 4 +- .../DataTypes/GPUNewCalibValues.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUNewCalibValues.h | 4 +- GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h | 4 +- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 4 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 4 +- .../DataTypes/GPUTPCClusterOccupancyMap.cxx | 2 +- .../DataTypes/GPUTPCClusterOccupancyMap.h | 4 +- .../DataTypes/GPUTPCGMMergedTrackHit.h | 8 +- .../DataTypes/GPUTPCGMPolynomialField.cxx | 4 +- .../DataTypes/GPUTPCGMPolynomialField.h | 4 +- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 4 +- GPU/GPUTracking/DataTypes/GPUTRDDef.h | 33 +- .../DataTypes/GPUTRDInterfaceO2Track.h | 8 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx | 9 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 16 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.inc | 54 +- GPU/GPUTracking/DataTypes/GPUTRDTrackO2.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h | 4 +- GPU/GPUTracking/DataTypes/GPUdEdxInfo.h | 4 +- GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx | 2 +- GPU/GPUTracking/DataTypes/TPCPadBitMap.h | 4 +- GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx | 2 +- GPU/GPUTracking/DataTypes/TPCPadGainCalib.h | 4 +- .../DataTypes/TPCZSLinkMapping.cxx | 2 +- GPU/GPUTracking/DataTypes/TPCZSLinkMapping.h | 4 +- GPU/GPUTracking/Debug/GPUROOTDump.h | 4 +- GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx | 2 +- GPU/GPUTracking/Debug/GPUROOTDumpCore.h | 4 +- .../Definitions/GPUDefConstantsAndSettings.h | 8 +- GPU/GPUTracking/Definitions/GPULogging.h | 35 +- GPU/GPUTracking/Definitions/GPUSettingsList.h | 14 +- .../Definitions/clusterFinderDefs.h | 4 +- GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h | 49 -- .../Global/AliHLTGPUDumpComponent.cxx | 492 ------------ .../Global/AliHLTGPUDumpComponent.h | 75 -- GPU/GPUTracking/Global/GPUChain.cxx | 2 +- GPU/GPUTracking/Global/GPUChain.h | 4 +- GPU/GPUTracking/Global/GPUChainITS.cxx | 2 +- GPU/GPUTracking/Global/GPUChainITS.h | 4 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 8 +- GPU/GPUTracking/Global/GPUChainTracking.h | 5 +- .../Global/GPUChainTrackingClusterizer.cxx | 4 +- .../Global/GPUChainTrackingCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTrackingDefs.h | 4 +- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 2 +- .../Global/GPUChainTrackingMerger.cxx | 2 +- .../Global/GPUChainTrackingRefit.cxx | 2 +- .../Global/GPUChainTrackingSliceTracker.cxx | 2 +- .../Global/GPUChainTrackingTRD.cxx | 4 +- .../Global/GPUChainTrackingTransformation.cxx | 2 +- GPU/GPUTracking/Global/GPUErrors.cxx | 2 +- GPU/GPUTracking/Global/GPUErrors.h | 4 +- .../Global/GPUTrackingInputProvider.cxx | 2 +- .../Global/GPUTrackingInputProvider.h | 4 +- GPU/GPUTracking/ITS/GPUITSFitter.cxx | 2 +- GPU/GPUTracking/ITS/GPUITSFitter.h | 4 +- GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx | 2 +- GPU/GPUTracking/ITS/GPUITSFitterKernels.h | 4 +- GPU/GPUTracking/ITS/GPUITSTrack.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 18 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 46 +- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 4 +- .../Merger/GPUTPCGMPhysicalTrackModel.cxx | 2 +- .../Merger/GPUTPCGMPhysicalTrackModel.h | 4 +- .../Merger/GPUTPCGMPolynomialFieldManager.cxx | 604 +-------------- .../Merger/GPUTPCGMPolynomialFieldManager.h | 28 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 59 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 9 +- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 2 +- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 2 +- .../Merger/GPUTPCGlobalDebugSortKernels.h | 4 +- .../Merger/GPUTPCGlobalMergerComponent.cxx | 575 -------------- .../Merger/GPUTPCGlobalMergerComponent.h | 147 ---- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../Refit/GPUTrackingRefitKernel.cxx | 2 +- .../SliceTracker/GPUTPCBaseTrackParam.h | 4 +- .../SliceTracker/GPUTPCClusterData.h | 4 +- .../SliceTracker/GPUTPCCreateOccupancyMap.cxx | 2 +- .../SliceTracker/GPUTPCCreateOccupancyMap.h | 4 +- .../SliceTracker/GPUTPCCreateSliceData.cxx | 2 +- .../SliceTracker/GPUTPCCreateSliceData.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCDef.h | 4 +- .../SliceTracker/GPUTPCGlobalTracking.cxx | 2 +- .../SliceTracker/GPUTPCGlobalTracking.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCGrid.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCHit.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCHitId.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h | 4 +- .../SliceTracker/GPUTPCNeighboursCleaner.cxx | 2 +- .../SliceTracker/GPUTPCNeighboursCleaner.h | 4 +- .../SliceTracker/GPUTPCNeighboursFinder.cxx | 2 +- .../SliceTracker/GPUTPCNeighboursFinder.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCRow.h | 4 +- .../GPUTPCSectorDebugSortKernels.cxx | 2 +- .../GPUTPCSectorDebugSortKernels.h | 4 +- .../SliceTracker/GPUTPCSliceData.cxx | 2 +- .../SliceTracker/GPUTPCSliceData.h | 4 +- .../SliceTracker/GPUTPCSliceOutCluster.h | 4 +- .../SliceTracker/GPUTPCSliceOutput.cxx | 2 +- .../SliceTracker/GPUTPCSliceOutput.h | 4 +- .../SliceTracker/GPUTPCStartHitsFinder.cxx | 2 +- .../SliceTracker/GPUTPCStartHitsFinder.h | 4 +- .../SliceTracker/GPUTPCStartHitsSorter.cxx | 2 +- .../SliceTracker/GPUTPCStartHitsSorter.h | 4 +- GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTrack.h | 4 +- .../SliceTracker/GPUTPCTrackLinearisation.h | 4 +- .../SliceTracker/GPUTPCTrackParam.cxx | 2 +- .../SliceTracker/GPUTPCTrackParam.h | 4 +- .../SliceTracker/GPUTPCTracker.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 4 +- .../SliceTracker/GPUTPCTrackerComponent.cxx | 709 ----------------- .../SliceTracker/GPUTPCTrackerComponent.h | 139 ---- .../SliceTracker/GPUTPCTrackerDump.cxx | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h | 4 +- .../GPUTPCTrackletConstructor.cxx | 2 +- .../SliceTracker/GPUTPCTrackletConstructor.h | 4 +- .../SliceTracker/GPUTPCTrackletSelector.cxx | 2 +- .../SliceTracker/GPUTPCTrackletSelector.h | 4 +- .../Standalone/Benchmark/standalone.cxx | 4 +- GPU/GPUTracking/Standalone/tools/createGeo.C | 3 +- GPU/GPUTracking/Standalone/tools/createLUT.C | 2 +- GPU/GPUTracking/TPCClusterFinder/Array2D.h | 4 +- GPU/GPUTracking/TPCClusterFinder/CfConsts.h | 4 +- GPU/GPUTracking/TPCClusterFinder/CfFragment.h | 4 +- GPU/GPUTracking/TPCClusterFinder/CfUtils.h | 4 +- GPU/GPUTracking/TPCClusterFinder/ChargePos.h | 4 +- .../TPCClusterFinder/ClusterAccumulator.cxx | 4 +- .../TPCClusterFinder/ClusterAccumulator.h | 4 +- .../TPCClusterFinder/GPUTPCCFChainContext.h | 4 +- .../GPUTPCCFChargeMapFiller.cxx | 4 +- .../GPUTPCCFChargeMapFiller.h | 4 +- .../GPUTPCCFCheckPadBaseline.cxx | 4 +- .../GPUTPCCFCheckPadBaseline.h | 4 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 4 +- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 4 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 4 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.h | 4 +- .../GPUTPCCFDeconvolution.cxx | 4 +- .../TPCClusterFinder/GPUTPCCFDeconvolution.h | 4 +- .../TPCClusterFinder/GPUTPCCFGather.cxx | 4 +- .../TPCClusterFinder/GPUTPCCFGather.h | 4 +- .../GPUTPCCFMCLabelFlattener.cxx | 4 +- .../GPUTPCCFMCLabelFlattener.h | 4 +- .../GPUTPCCFNoiseSuppression.cxx | 4 +- .../GPUTPCCFNoiseSuppression.h | 4 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.cxx | 4 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.h | 4 +- .../GPUTPCCFStreamCompaction.cxx | 4 +- .../GPUTPCCFStreamCompaction.h | 4 +- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 2 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 4 +- .../GPUTPCClusterFinderDump.cxx | 4 +- .../TPCClusterFinder/MCLabelAccumulator.cxx | 4 +- .../TPCClusterFinder/MCLabelAccumulator.h | 4 +- .../TPCClusterFinder/PackedCharge.h | 4 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx | 2 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.h | 4 +- .../TPCConvert/GPUTPCConvertImpl.h | 4 +- .../TPCConvert/GPUTPCConvertKernel.cxx | 2 +- .../TPCConvert/GPUTPCConvertKernel.h | 4 +- GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h | 42 +- .../TRDTracking/GPUTRDInterfaces.h | 132 +--- .../TRDTracking/GPUTRDSpacePoint.h | 8 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 81 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 16 +- .../TRDTracking/GPUTRDTrackerComponent.cxx | 509 ------------- .../TRDTracking/GPUTRDTrackerComponent.h | 148 ---- .../TRDTracking/GPUTRDTrackerDebug.h | 300 +------- .../TRDTracking/GPUTRDTrackerKernels.cxx | 2 +- .../TRDTracking/GPUTRDTrackerKernels.h | 4 +- .../TRDTracking/GPUTRDTrackletLabels.h | 4 +- .../GPUTRDTrackletReaderComponent.cxx | 396 ---------- .../GPUTRDTrackletReaderComponent.h | 126 --- .../TRDTracking/GPUTRDTrackletWord.cxx | 20 +- .../TRDTracking/GPUTRDTrackletWord.h | 8 +- .../TRDTracking/macros/run_trd_tracker.C | 3 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 4 - GPU/GPUTracking/dEdx/GPUdEdx.cxx | 2 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 4 +- GPU/GPUTracking/display/GPUDisplay.cxx | 2 +- GPU/GPUTracking/display/GPUDisplay.h | 4 +- .../display/GPUDisplayInterface.cxx | 2 +- GPU/GPUTracking/display/GPUDisplayInterface.h | 4 +- .../display/backend/GPUDisplayBackend.cxx | 2 +- .../display/backend/GPUDisplayBackend.h | 4 +- .../backend/GPUDisplayBackendOpenGL.cxx | 2 +- .../display/backend/GPUDisplayBackendOpenGL.h | 4 +- .../backend/GPUDisplayBackendVulkan.cxx | 2 +- .../display/backend/GPUDisplayBackendVulkan.h | 4 +- .../display/frontend/GPUDisplayFrontend.cxx | 6 +- .../display/frontend/GPUDisplayFrontend.h | 4 +- .../frontend/GPUDisplayFrontendGlfw.cxx | 2 +- .../display/frontend/GPUDisplayFrontendGlfw.h | 4 +- .../frontend/GPUDisplayFrontendGlut.cxx | 2 +- .../display/frontend/GPUDisplayFrontendGlut.h | 4 +- .../frontend/GPUDisplayFrontendNone.cxx | 2 +- .../display/frontend/GPUDisplayFrontendNone.h | 4 +- .../frontend/GPUDisplayFrontendWayland.cxx | 2 +- .../frontend/GPUDisplayFrontendWayland.h | 4 +- .../frontend/GPUDisplayFrontendWindows.cxx | 2 +- .../frontend/GPUDisplayFrontendWindows.h | 4 +- .../frontend/GPUDisplayFrontendX11.cxx | 2 +- .../display/frontend/GPUDisplayFrontendX11.h | 4 +- .../display/frontend/GPUDisplayGUIWrapper.cxx | 6 +- .../display/frontend/GPUDisplayGUIWrapper.h | 4 +- .../display/frontend/GPUDisplayKeys.cxx | 2 +- .../display/helpers/GPUDisplayAnimation.cxx | 2 +- .../GPUDisplayBackendOpenGLMagneticField.cxx | 2 +- .../display/helpers/GPUDisplayColors.inc | 2 +- .../display/helpers/GPUDisplayHelpers.cxx | 2 +- .../helpers/GPUDisplayInterpolation.cxx | 2 +- .../display/helpers/GPUDisplayLoader.cxx | 2 +- .../helpers/GPUDisplayMagneticField.cxx | 2 +- .../display/helpers/GPUDisplayMagneticField.h | 4 +- .../display/helpers/GPUDisplayQuaternion.cxx | 2 +- .../display/helpers/GPUDisplayROOT.cxx | 2 +- .../helpers/field-uniform-exporter.cxx | 2 +- .../display/render/GPUDisplayDraw.cxx | 2 +- .../display/render/GPUDisplayImportEvent.cxx | 2 +- .../display/shaders/GPUDisplayShaders.h | 4 +- GPU/GPUTracking/kernels.cmake | 20 +- .../oldFiles/AliHLT3DTrackParam.cxx | 425 ----------- GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.h | 137 ---- .../oldFiles/GPUTPCGMOfflineFitter.cxx | 309 -------- .../oldFiles/GPUTPCGMOfflineFitter.h | 48 -- GPU/GPUTracking/oldFiles/GPUTPCMCPoint.cxx | 20 - GPU/GPUTracking/oldFiles/GPUTPCMCPoint.h | 76 -- GPU/GPUTracking/oldFiles/GPUTPCMCTrack.cxx | 90 --- GPU/GPUTracking/oldFiles/GPUTPCMCTrack.h | 72 -- GPU/GPUTracking/qa/GPUQA.cxx | 8 +- GPU/GPUTracking/qa/GPUQA.h | 8 +- GPU/GPUTracking/qa/genEvents.cxx | 4 +- GPU/GPUTracking/qa/genEvents.h | 4 +- GPU/GPUTracking/utils/qlibload.h | 4 +- .../BandMatrixSolver.cxx | 6 +- GPU/TPCFastTransformation/BandMatrixSolver.h | 6 +- GPU/TPCFastTransformation/CMakeLists.txt | 54 -- GPU/TPCFastTransformation/ChebyshevFit1D.cxx | 2 +- GPU/TPCFastTransformation/ChebyshevFit1D.h | 4 +- .../CorrectionMapsHelper.cxx | 2 +- .../CorrectionMapsHelper.h | 30 +- .../MultivariatePolynomial.h | 14 +- .../MultivariatePolynomialHelper.cxx | 4 +- .../MultivariatePolynomialHelper.h | 4 +- .../NDPiecewisePolynomials.h | 12 +- .../NDPiecewisePolynomials.inc | 18 +- GPU/TPCFastTransformation/Spline.cxx | 6 +- GPU/TPCFastTransformation/Spline.h | 8 +- GPU/TPCFastTransformation/Spline1D.cxx | 6 +- GPU/TPCFastTransformation/Spline1D.h | 8 +- GPU/TPCFastTransformation/Spline1DHelper.cxx | 10 +- GPU/TPCFastTransformation/Spline1DHelper.h | 8 +- .../Spline1DHelperOld.cxx | 10 +- GPU/TPCFastTransformation/Spline1DHelperOld.h | 8 +- GPU/TPCFastTransformation/Spline1DSpec.cxx | 18 +- GPU/TPCFastTransformation/Spline1DSpec.h | 12 +- GPU/TPCFastTransformation/Spline2D.cxx | 6 +- GPU/TPCFastTransformation/Spline2D.h | 8 +- GPU/TPCFastTransformation/Spline2DHelper.cxx | 8 +- GPU/TPCFastTransformation/Spline2DHelper.h | 8 +- GPU/TPCFastTransformation/Spline2DSpec.cxx | 12 +- GPU/TPCFastTransformation/Spline2DSpec.h | 8 +- GPU/TPCFastTransformation/SplineHelper.cxx | 6 +- GPU/TPCFastTransformation/SplineHelper.h | 4 +- GPU/TPCFastTransformation/SplineSpec.cxx | 12 +- GPU/TPCFastTransformation/SplineSpec.h | 8 +- GPU/TPCFastTransformation/SplineUtil.h | 5 +- GPU/TPCFastTransformation/SymMatrixSolver.cxx | 6 +- GPU/TPCFastTransformation/SymMatrixSolver.h | 8 +- .../TPCFastSpaceChargeCorrection.cxx | 4 +- .../TPCFastSpaceChargeCorrection.h | 14 +- .../TPCFastSpaceChargeCorrectionMap.cxx | 4 +- .../TPCFastSpaceChargeCorrectionMap.h | 6 +- .../TPCFastTransform.cxx | 8 +- GPU/TPCFastTransformation/TPCFastTransform.h | 12 +- .../TPCFastTransformGeo.cxx | 2 +- .../TPCFastTransformGeo.h | 10 +- .../TPCFastTransformManager.cxx | 2 +- .../TPCFastTransformManager.h | 4 +- .../TPCFastTransformQA.cxx | 2 +- .../TPCFastTransformQA.h | 4 +- .../TPCFastTransformationLinkDef_AliRoot.h | 21 - .../alirootMacro/createTPCFastTransform.C | 69 -- .../generateTPCDistortionNTupleAliRoot.C | 150 ---- .../alirootMacro/initTPCcalibration.C | 198 ----- .../alirootMacro/moveTPCFastTransform.C | 69 -- .../devtools/IrregularSpline1D.cxx | 2 +- .../devtools/IrregularSpline1D.h | 6 +- .../devtools/IrregularSpline1DTest.C | 2 +- .../devtools/IrregularSpline2D3D.cxx | 2 +- .../devtools/IrregularSpline2D3D.h | 6 +- .../IrregularSpline2D3DCalibrator.cxx | 4 +- .../devtools/IrregularSpline2D3DCalibrator.h | 4 +- .../devtools/RegularSpline1D.h | 4 +- .../devtools/SemiregularSpline2D3D.cxx | 4 +- .../devtools/SemiregularSpline2D3D.h | 6 +- GPU/Utils/CMakeLists.txt | 25 +- GPU/Utils/FlatObject.h | 6 +- cmake/O2RootMacroExclusionList.cmake | 4 - 407 files changed, 892 insertions(+), 8703 deletions(-) delete mode 100644 GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.cxx delete mode 100644 GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.h delete mode 100644 GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h delete mode 100644 GPU/GPUTracking/Global/AliHLTGPUDumpComponent.cxx delete mode 100644 GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.h delete mode 100644 GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.cxx delete mode 100644 GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.h delete mode 100644 GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.cxx delete mode 100644 GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.h delete mode 100644 GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.cxx delete mode 100644 GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.h delete mode 100644 GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.cxx delete mode 100644 GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.h delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.cxx delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.h delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCMCPoint.cxx delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCMCPoint.h delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCMCTrack.cxx delete mode 100644 GPU/GPUTracking/oldFiles/GPUTPCMCTrack.h delete mode 100644 GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h delete mode 100644 GPU/TPCFastTransformation/alirootMacro/createTPCFastTransform.C delete mode 100644 GPU/TPCFastTransformation/alirootMacro/generateTPCDistortionNTupleAliRoot.C delete mode 100644 GPU/TPCFastTransformation/alirootMacro/initTPCcalibration.C delete mode 100644 GPU/TPCFastTransformation/alirootMacro/moveTPCFastTransform.C diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/PIDResponse.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/PIDResponse.h index 77b26f47d0c2f..277011a260631 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/PIDResponse.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/PIDResponse.h @@ -65,9 +65,7 @@ class PIDResponse float mMIP = 50.f; float mChargeFactor = 2.299999952316284f; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(PIDResponse, 1); -#endif }; GPUd() void PIDResponse::setBetheBlochParams(const float betheBlochParams[5]) diff --git a/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h b/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h index f0af50a3c5a64..e63d8fbb5f277 100644 --- a/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h +++ b/DataFormats/Detectors/TRD/include/DataFormatsTRD/Tracklet64.h @@ -144,7 +144,7 @@ class Tracklet64 GPUd() float getPadColFloat(bool applyShift) const { return getPositionFloat() + getMCMCol() * constants::NCOLMCM + 8.f + (applyShift ? 1.f : 0.f); } // pad column number inside pad row as int can be off by +-1 pad (same function name as for TRD digit) - GPUd() int getPadCol(bool applyShift = false) const { return GPUCA_NAMESPACE::gpu::CAMath::Float2IntRn(getPadColFloat(applyShift)); } + GPUd() int getPadCol(bool applyShift = false) const { return o2::gpu::CAMath::Float2IntRn(getPadColFloat(applyShift)); } // translate local position into global y (in cm) not taking into account calibrations (ExB, vDrift, t0) GPUd() float getUncalibratedY(bool applyShift = false) const { return (getPadColFloat(applyShift) - (constants::NCOLUMN / 2.f)) * getPadWidth(); } diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 9f0a64f96b876..c12b614960554 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -1442,7 +1442,7 @@ void AODProducerWorkflowDPL::countTPCClusters(const o2::globaltracking::RecoCont o2::tpc::TrackTPC::getClusterReference(tpcClusRefs, i, sectorIndex, rowIndex, clusterIndex, track.getClusterRef()); unsigned int absoluteIndex = tpcClusAcc.clusterOffset[sectorIndex][rowIndex] + clusterIndex; clMap[rowIndex] = true; - if (tpcClusShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (tpcClusShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { if (!shMap[rowIndex]) { counters.shared++; } diff --git a/Detectors/GlobalTracking/include/GlobalTracking/TrackMethods.h b/Detectors/GlobalTracking/include/GlobalTracking/TrackMethods.h index d87d68dd122b2..f65d9ffd260e6 100644 --- a/Detectors/GlobalTracking/include/GlobalTracking/TrackMethods.h +++ b/Detectors/GlobalTracking/include/GlobalTracking/TrackMethods.h @@ -47,7 +47,7 @@ class TrackMethods o2::tpc::TrackTPC::getClusterReference(tpcClusRefs, i, sectorIndex, rowIndex, clusterIndex, track.getClusterRef()); unsigned int absoluteIndex = tpcClusAcc.clusterOffset[sectorIndex][rowIndex] + clusterIndex; clMap[rowIndex] = true; - if (tpcClusShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (tpcClusShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { if (!shMap[rowIndex]) { shared++; } diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index 436a12df51352..403b7dbbb0e09 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -2890,7 +2890,7 @@ void MatchTPCITS::dumpTPCOrig(bool acc, int tpcIndex) for (int i = 0; i < tpcOrig.getNClusterReferences(); i++) { tpcOrig.getClusterReference(mTPCTrackClusIdx, i, clSect, clRow, clIdx); unsigned int absoluteIndex = mTPCClusterIdxStruct->clusterOffset[clSect][clRow] + clIdx; - if (mTPCRefitterShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (mTPCRefitterShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { if (!(prevRow == clRow && prevRawShared)) { nshared++; } diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index b3ef78bd2eabf..8df1e980ecb8a 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -293,7 +293,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) clRowP = clRow; } unsigned int absoluteIndex = tpcClusAcc.clusterOffset[clSect][clRow] + clIdx; - if (shMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (shMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { trExt.nClTPCShared++; } } diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackCuts.h b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackCuts.h index ef1d9faf86b83..03f52aae380c5 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackCuts.h +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackCuts.h @@ -76,7 +76,7 @@ class TrackCuts o2::tpc::TrackTPC::getClusterReference(tpcClusRefs, i, sectorIndex, rowIndex, clusterIndex, track.getClusterRef()); unsigned int absoluteIndex = tpcClusAcc.clusterOffset[sectorIndex][rowIndex] + clusterIndex; clMap[rowIndex] = true; - if (tpcClusShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (tpcClusShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { if (!shMap[rowIndex]) { counters.shared++; } diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackMethods.h b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackMethods.h index c3e6eadc979f4..55f92843cd14d 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackMethods.h +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/TrackMethods.h @@ -53,7 +53,7 @@ class TrackMethods o2::tpc::TrackTPC::getClusterReference(tpcClusRefs, i, sectorIndex, rowIndex, clusterIndex, track.getClusterRef()); unsigned int absoluteIndex = tpcClusAcc.clusterOffset[sectorIndex][rowIndex] + clusterIndex; clMap[rowIndex] = true; - if (tpcClusShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (tpcClusShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { if (!shMap[rowIndex]) { shared++; } diff --git a/Detectors/TPC/calibration/include/TPCCalibration/CorrectionMapsLoader.h b/Detectors/TPC/calibration/include/TPCCalibration/CorrectionMapsLoader.h index 48af1be6b5b19..90dc84e618cec 100644 --- a/Detectors/TPC/calibration/include/TPCCalibration/CorrectionMapsLoader.h +++ b/Detectors/TPC/calibration/include/TPCCalibration/CorrectionMapsLoader.h @@ -75,7 +75,7 @@ class CorrectionMapsLoader : public o2::gpu::CorrectionMapsHelper float mInstLumiCTPFactor = 1.0; // multiplicative factor for inst. lumi int mLumiCTPSource = 0; // 0: main, 1: alternative CTP lumi source - std::unique_ptr mCorrMapMShape{nullptr}; + std::unique_ptr mCorrMapMShape{nullptr}; #endif }; diff --git a/Detectors/TPC/calibration/src/CalculatedEdx.cxx b/Detectors/TPC/calibration/src/CalculatedEdx.cxx index 2ac3b44938bce..60e9ada7794d3 100644 --- a/Detectors/TPC/calibration/src/CalculatedEdx.cxx +++ b/Detectors/TPC/calibration/src/CalculatedEdx.cxx @@ -159,7 +159,7 @@ void CalculatedEdx::calculatedEdx(o2::tpc::TrackTPC& track, dEdxInfo& output, fl // check if the cluster is shared const unsigned int absoluteIndex = mClusterIndex->clusterOffset[sectorIndex][rowIndex] + clusterIndexNumb; - const bool isShared = mRefit ? (mTPCRefitterShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) : 0; + const bool isShared = mRefit ? (mTPCRefitterShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) : 0; // get region, pad, stack and stack ID const int region = Mapper::REGION[rowIndex]; diff --git a/Detectors/TPC/calibration/src/DigitAdd.cxx b/Detectors/TPC/calibration/src/DigitAdd.cxx index bb5e4c5f31c51..4c8eb8ae9df6f 100644 --- a/Detectors/TPC/calibration/src/DigitAdd.cxx +++ b/Detectors/TPC/calibration/src/DigitAdd.cxx @@ -23,13 +23,13 @@ int DigitAdd::sector() const float DigitAdd::lx() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return gpuGeom.Row2X(mRow); } float DigitAdd::ly() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return gpuGeom.LinearPad2Y(sector(), mRow, getPad()); } @@ -49,6 +49,6 @@ float DigitAdd::gy() const float DigitAdd::cpad() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return getPad() - gpuGeom.NPads(mRow) / 2.f; } diff --git a/Detectors/TPC/calibration/src/TrackDump.cxx b/Detectors/TPC/calibration/src/TrackDump.cxx index 200abf766ac54..421750a5cb22b 100644 --- a/Detectors/TPC/calibration/src/TrackDump.cxx +++ b/Detectors/TPC/calibration/src/TrackDump.cxx @@ -47,7 +47,7 @@ void TrackDump::filter(const gsl::span tracks, ClusterNativeAcce ClExcludes excludes; - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; for (const auto& track : tracks) { const int nCl = track.getNClusterReferences(); @@ -141,7 +141,7 @@ void TrackDump::finalize() void TrackDump::fillClNativeAdd(ClusterNativeAccess const& clusterIndex, std::vector& clInfos, ClExcludes* excludes) { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; for (int sector = 0; sector < MAXSECTOR; ++sector) { for (int padrow = 0; padrow < MAXGLOBALPADROW; ++padrow) { @@ -164,19 +164,19 @@ void TrackDump::fillClNativeAdd(ClusterNativeAccess const& clusterIndex, std::ve float TrackDump::ClusterNativeAdd::cpad() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return getPad() - gpuGeom.NPads(padrow) / 2.f; } float TrackDump::ClusterNativeAdd::lx() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return gpuGeom.Row2X(padrow); } float TrackDump::ClusterNativeAdd::ly() const { - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; return gpuGeom.LinearPad2Y(sector, padrow, getPad()); } diff --git a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx index 20da28eefe364..78f6b4d3e5397 100644 --- a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx +++ b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx @@ -1195,7 +1195,7 @@ void SimpleEventDisplayGUI::showClusters(int roc, int row) selFlags += mCheckClFlags[iFlag]->IsDown() << (iFlag - 1); } const bool fillSingleTB = mCheckSingleTB->IsDown(); - const GPUCA_NAMESPACE::gpu::GPUTPCGeometry gpuGeom; + const o2::gpu::GPUTPCGeometry gpuGeom; const int rowMin = fillSingleTB ? 0 : row; const int rowMax = fillSingleTB ? constants::MAXGLOBALPADROW : row + 1; diff --git a/Detectors/TPC/workflow/src/TPCRefitter.cxx b/Detectors/TPC/workflow/src/TPCRefitter.cxx index 7c14c8cd7d973..211f14cfae184 100644 --- a/Detectors/TPC/workflow/src/TPCRefitter.cxx +++ b/Detectors/TPC/workflow/src/TPCRefitter.cxx @@ -509,7 +509,7 @@ bool TPCRefitterSpec::processTPCTrack(o2::tpc::TrackTPC tr, o2::MCCompLabel lbl, unsigned int absoluteIndex = mTPCClusterIdxStruct->clusterOffset[sector][row] + clusterIndex; cl = &mTPCClusterIdxStruct->clusters[sector][row][clusterIndex]; uint8_t clflags = cl->getFlags(); - if (mTPCRefitterShMap[absoluteIndex] & GPUCA_NAMESPACE::gpu::GPUTPCGMMergedTrackHit::flagShared) { + if (mTPCRefitterShMap[absoluteIndex] & o2::gpu::GPUTPCGMMergedTrackHit::flagShared) { clflags |= 0x10; } clData.clSector.emplace_back(sector); diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 95167d0a8b59a..3c83c583eebfc 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -9,14 +9,9 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -# Subdirectories will be compiled with O2 / AliRoot / Standalone To simplify the +# Subdirectories will be compiled with O2 / Standalone To simplify the # CMake, variables are defined for Sources / Headers first. Then, the actual # CMake build scripts use these variables. -# -# SRCS: Common Sources for all builds HDRS_CINT: Headers for ROOT dictionary -# (always) HDRS_CINT_ALIROOT: Headers for ROOT dictionary (only in AliRoot) -# HDRS_CINT_O2: Headers for ROOT dictionary (only for O2) HDRS_INSTALL: Headers -# for installation only if(NOT DEFINED GPUCA_NO_FAST_MATH) set(GPUCA_NO_FAST_MATH 0) diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 2a31747b3673e..21253f7f9b815 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -66,26 +66,3 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") endif() install(FILES ${HDRS_INSTALL} DESTINATION include/GPU) endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - add_definitions(-DGPUCA_ALIROOT_LIB) - - set(SRCS ${SRCS} ../GPUTracking/utils/EmptyFile.cxx) - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS}) - - # Additional compilation flags - set_target_properties(Ali${MODULE} PROPERTIES COMPILE_FLAGS "") - - # System dependent: Modify the way the library is build - if(${CMAKE_SYSTEM} MATCHES Darwin) - set_target_properties(Ali${MODULE} - PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - endif(${CMAKE_SYSTEM} MATCHES Darwin) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS_INSTALL} DESTINATION include) -endif() \ No newline at end of file diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index dd3cc1925b04a..a733f0ff99f26 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -24,7 +24,7 @@ // ----------------------------- SORTING ----------------------------- -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -72,9 +72,9 @@ class GPUCommonAlgorithm GPUd() static void IterSwap(I a, I b) noexcept; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -218,7 +218,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l) noexcept typedef GPUCommonAlgorithm CAAlgo; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_HOSTONLY) @@ -226,7 +226,7 @@ typedef GPUCommonAlgorithm CAAlgo; #else -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -248,12 +248,12 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // THRUST // sort and sortInBlock below are not taken from Thrust, since our implementations are faster -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -329,7 +329,7 @@ GPUdi() void GPUCommonAlgorithm::swap(T& a, T& b) #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 // ----------------------------- WORK GROUP FUNCTIONS ----------------------------- diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 8c99200e00183..0208c12f1cd08 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -30,7 +30,7 @@ #define GPUCA_THRUST_NAMESPACE thrust::hip #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -88,6 +88,6 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index d209b6a42a794..01d67eab1f9d3 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE::gpu::gpu_common_constants +namespace o2::gpu::gpu_common_constants { static constexpr const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP } diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index 059e011a715d8..62d665d36c285 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -40,22 +40,16 @@ #endif #endif -// Set AliRoot / O2 namespace -#if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined(GPUCA_ALIROOT_LIB) || defined (GPUCA_GPUCODE) +#if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE) #define GPUCA_ALIGPUCODE #endif -#ifdef GPUCA_ALIROOT_LIB - #define GPUCA_NAMESPACE AliGPU -#else - #define GPUCA_NAMESPACE o2 -#endif #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY #elif defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM #endif -#if !defined(GPUCA_HAVE_O2HEADERS) && (defined(GPUCA_O2_LIB) || (!defined(GPUCA_ALIROOT_LIB) && !defined(GPUCA_STANDALONE))) +#if !defined(GPUCA_HAVE_O2HEADERS) && (defined(GPUCA_O2_LIB) || !defined(GPUCA_STANDALONE)) #define GPUCA_HAVE_O2HEADERS #endif diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index 23e16c75f098f..0cd3c4ebddb7f 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -104,12 +104,12 @@ #define GPUbarrier() barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE) #define GPUbarrierWarp() #if defined(__OPENCL__) && defined(GPUCA_OPENCL_CLANG_C11_ATOMICS) - namespace GPUCA_NAMESPACE { namespace gpu { + namespace o2 { namespace gpu { template struct oclAtomic; template <> struct oclAtomic {typedef atomic_uint t;}; static_assert(sizeof(oclAtomic::t) == sizeof(uint32_t), "Invalid size of atomic type"); }} - #define GPUAtomic(type) GPUCA_NAMESPACE::gpu::oclAtomic::t + #define GPUAtomic(type) o2::gpu::oclAtomic::t #else #define GPUAtomic(type) volatile type #endif diff --git a/GPU/Common/GPUCommonLogger.h b/GPU/Common/GPUCommonLogger.h index ca5a8ff3c3410..0b6b5ae401244 100644 --- a/GPU/Common/GPUCommonLogger.h +++ b/GPU/Common/GPUCommonLogger.h @@ -45,7 +45,7 @@ struct DummyLogger { #define LOGP(...) // #define LOGP(...) static_assert(false, "LOGP(...) unsupported in GPU code"); -#elif defined(GPUCA_STANDALONE) || defined(GPUCA_ALIROOT_LIB) +#elif defined(GPUCA_STANDALONE) #include #include #define LOG(type) std::cout diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 27b3be8869b94..9aa260f59842f 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -31,7 +31,7 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -552,6 +552,6 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef CHOICE } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUCOMMONMATH_H diff --git a/GPU/Common/GPUCommonTransform3D.h b/GPU/Common/GPUCommonTransform3D.h index 5cd09254d30d2..4c5cca1f00ddc 100644 --- a/GPU/Common/GPUCommonTransform3D.h +++ b/GPU/Common/GPUCommonTransform3D.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -80,6 +80,6 @@ class Transform3D kDZ = 11 }; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 26c7726d13ca6..2a9a15e10f491 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -21,25 +21,10 @@ #include "GPUErrors.h" #include "GPUTPCGMMerger.h" -#if (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) #include "GPUTRDTracker.h" -#else -#include "GPUTRDDef.h" -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -template -class GPUTRDTracker_t -{ - void SetMaxData(const GPUTrackingInOutPointers& io) {} -}; -} // namespace gpu -} // namespace GPUCA_NAMESPACE -#endif // Dummies for stuff not suppored in legacy code, or for what requires O2 headers while not available -#if (!defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB)) && defined(GPUCA_HAVE_O2HEADERS) +#if defined(GPUCA_HAVE_O2HEADERS) #include "GPUTPCConvert.h" #include "GPUTPCCompression.h" #include "GPUTPCDecompression.h" @@ -54,7 +39,7 @@ class GPUTRDTracker_t #include "GPUKernelDebugOutput.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -123,11 +108,11 @@ union GPUConstantMemCopyable { static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) -GPUconstant() GPUCA_NAMESPACE::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; +GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -153,6 +138,6 @@ GPUdi() void GPUProcessor::raiseError(uint32_t code, uint32_t param1, uint32_t p } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx index 44faf09112e5e..e1a3ce69dd8df 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.cxx +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.cxx @@ -14,7 +14,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUMemClean16::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUglobalref() void* ptr, uint64_t size) diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 333a53576663b..e11f818297770 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -32,7 +32,7 @@ #define GPUCA_CUB cub #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -111,7 +111,7 @@ class GPUitoa : public GPUKernelTemplate }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #undef GPUCA_CUB diff --git a/GPU/GPUTracking/Base/GPUKernelDebugOutput.cxx b/GPU/GPUTracking/Base/GPUKernelDebugOutput.cxx index be4207abc75d3..a417c41f2db60 100644 --- a/GPU/GPUTracking/Base/GPUKernelDebugOutput.cxx +++ b/GPU/GPUTracking/Base/GPUKernelDebugOutput.cxx @@ -17,7 +17,7 @@ #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUKernelDebugOutput::InitializeProcessor() {} diff --git a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h index 2a032125ced79..dc60014718677 100644 --- a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h +++ b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h @@ -19,7 +19,7 @@ #include "GPUProcessor.h" #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -76,7 +76,7 @@ class GPUKernelDebugOutput : public GPUProcessor }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif #endif diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.cxx b/GPU/GPUTracking/Base/GPUMemoryResource.cxx index ccc912fe6c036..b22267859345a 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.cxx +++ b/GPU/GPUTracking/Base/GPUMemoryResource.cxx @@ -13,4 +13,4 @@ /// \author David Rohr #include "GPUMemoryResource.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 143a0ead26235..5037e7800d4d9 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUProcessor.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -104,6 +104,6 @@ class GPUMemoryResource MemoryType mType; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 39b5a18c51eff..6726c022e5ddb 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -21,13 +21,8 @@ #include "GPUDataTypes.h" #include "GPUConstantMem.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#ifdef GPUCA_ALIROOT_LIB -#include "AliTPCClusterParam.h" -#include "AliTPCcalibDB.h" -#include -#endif #include #include #ifdef GPUCA_HAVE_O2HEADERS @@ -155,7 +150,7 @@ void GPUParam::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessi void GPUParam::UpdateBzOnly(float newSolenoidBz) { bzkG = newSolenoidBz; - bzCLight = bzkG * GPUCA_NAMESPACE::gpu::gpu_common_constants::kCLight; + bzCLight = bzkG * o2::gpu::gpu_common_constants::kCLight; polynomialField.Reset(); if (par.assumeConstantBz) { GPUTPCGMPolynomialFieldManager::GetPolynomialField(GPUTPCGMPolynomialFieldManager::kUniform, bzkG, polynomialField); @@ -193,81 +188,6 @@ void GPUParam::UpdateRun3ClusterErrors(const float* yErrorParam, const float* zE #endif } -#ifndef GPUCA_ALIROOT_LIB -void GPUParam::LoadClusterErrors(bool Print) -{ -} -#else - -#include -#include -void GPUParam::LoadClusterErrors(bool Print) -{ - // update of calculated values - const AliTPCClusterParam* clparam = AliTPCcalibDB::Instance()->GetClusterParam(); - if (!clparam) { - std::cout << "Error: GPUParam::LoadClusterErrors():: No AliTPCClusterParam instance found !!!! " << std::endl; - return; - } - - for (int32_t i = 0; i < 2; i++) { - for (int32_t j = 0; j < 3; j++) { - for (int32_t k = 0; k < 6; k++) { - ParamS0Par[i][j][k] = clparam->GetParamS0Par(i, j, k); - } - } - } - - for (int32_t i = 0; i < 2; i++) { - for (int32_t j = 0; j < 3; j++) { - for (int32_t k = 0; k < 4; k++) { - ParamErrorsSeeding0[i][j][k] = clparam->GetParamRMS0(i, j, k); - } - } - } - - if (Print) { - typedef std::numeric_limits flt; - std::cout << std::scientific; - std::cout << std::setprecision(flt::max_digits10 + 2); - std::cout << "ParamS0Par[2][3][7]=" << std::endl; - std::cout << " { " << std::endl; - for (int32_t i = 0; i < 2; i++) { - std::cout << " { " << std::endl; - for (int32_t j = 0; j < 3; j++) { - std::cout << " { "; - for (int32_t k = 0; k < 6; k++) { - std::cout << ParamS0Par[i][j][k] << ", "; - } - std::cout << " }, " << std::endl; - } - std::cout << " }, " << std::endl; - } - std::cout << " }; " << std::endl; - - std::cout << "ParamErrorsSeeding0[2][3][4]=" << std::endl; - std::cout << " { " << std::endl; - for (int32_t i = 0; i < 2; i++) { - std::cout << " { " << std::endl; - for (int32_t j = 0; j < 3; j++) { - std::cout << " { "; - for (int32_t k = 0; k < 4; k++) { - std::cout << ParamErrorsSeeding0[i][j][k] << ", "; - } - std::cout << " }, " << std::endl; - } - std::cout << " }, " << std::endl; - } - std::cout << " }; " << std::endl; - - const THnBase* waveMap = clparam->GetWaveCorrectionMap(); - const THnBase* resYMap = clparam->GetResolutionYMap(); - std::cout << "waveMap = " << (void*)waveMap << std::endl; - std::cout << "resYMap = " << (void*)resYMap << std::endl; - } -} -#endif - void GPUParamRTC::setFrom(const GPUParam& param) { memcpy((void*)this, (void*)¶m, sizeof(param)); @@ -285,4 +205,4 @@ std::string GPUParamRTC::generateRTCCode(const GPUParam& param, bool useConstexp qConfigPrintRtc(std::make_tuple(¶m.rec.tpc, ¶m.rec.trd, ¶m.rec, ¶m.par), useConstexpr); } -static_assert(sizeof(GPUCA_NAMESPACE::gpu::GPUParam) == sizeof(GPUCA_NAMESPACE::gpu::GPUParamRTC), "RTC param size mismatch"); +static_assert(sizeof(o2::gpu::GPUParam) == sizeof(o2::gpu::GPUParamRTC), "RTC param size mismatch"); diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 1a3ff9065dc94..78c13d19be81d 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -31,7 +31,7 @@ using Propagator = PropagatorImpl; } // namespace o2::base #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -85,7 +85,6 @@ struct GPUParam : public internal::GPUParam_t void SetDefaults(const GPUSettingsGRP* g, const GPUSettingsRec* r = nullptr, const GPUSettingsProcessing* p = nullptr, const GPURecoStepConfiguration* w = nullptr); void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPURecoStepConfiguration* w = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateBzOnly(float newSolenoidBz); - void LoadClusterErrors(bool Print = 0); void UpdateRun3ClusterErrors(const float* yErrorParam, const float* zErrorParam); #endif @@ -116,6 +115,6 @@ struct GPUParam : public internal::GPUParam_t }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 357af42276575..87d7b2dc4c7cd 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -19,7 +19,7 @@ #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCClusterOccupancyMap.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -224,6 +224,6 @@ GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, fl } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUParamRTC.h b/GPU/GPUTracking/Base/GPUParamRTC.h index d7959a294854c..dd9cf73e38a1e 100644 --- a/GPU/GPUTracking/Base/GPUParamRTC.h +++ b/GPU/GPUTracking/Base/GPUParamRTC.h @@ -18,7 +18,7 @@ #include "GPUParam.h" #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -39,6 +39,6 @@ struct GPUParamRTC : public internal::GPUParam_t #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -158,6 +158,6 @@ class GPUProcessor friend class GPUTPCNeighboursFinder; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 9abe225c7848e..1fa8af0adffe8 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -46,7 +46,7 @@ #include "GPUReconstructionIncludesITS.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -66,9 +66,9 @@ struct GPUReconstructionPipelineContext { bool terminate = false; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; constexpr const char* const GPUReconstruction::GEOMETRY_TYPE_NAMES[]; constexpr const char* const GPUReconstruction::IOTYPENAMES[]; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 6951646dff840..6fd00e1fda207 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -44,7 +44,7 @@ class TimeFrame; } // namespace its } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -645,6 +645,6 @@ inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) return 0; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 271bee59db31b..944fcb32e4eda 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -47,8 +47,8 @@ static inline int32_t omp_get_thread_num() { return 0; } static inline int32_t omp_get_max_threads() { return 1; } #endif -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::gpu_reconstruction_kernels; +using namespace o2::gpu; +using namespace o2::gpu::gpu_reconstruction_kernels; constexpr GPUReconstructionCPU::krnlRunRange GPUReconstructionCPU::krnlRunRangeNone; constexpr GPUReconstructionCPU::krnlEvent GPUReconstructionCPU::krnlEventNone; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 7eaf3e4a5e40d..8cc753731d074 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -26,7 +26,7 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionKernels.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -307,6 +307,6 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 03898d16d5105..a21bdcf28dd35 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -42,7 +42,7 @@ #include "DetectorsRaw/RDHUtils.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; using namespace std::string_literals; diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h index 6e0f80d6678b9..b8aedbcde582b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h @@ -37,7 +37,7 @@ class RawFileWriter; struct AliHLTTPCRawCluster; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -64,6 +64,6 @@ class GPUReconstructionConvert }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index c9155c1cb8f60..3522095622ad4 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -18,7 +18,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCSliceOutput.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #if defined(_WIN32) #include "../utils/pthread_mutex_win32_wrapper.h" @@ -39,12 +39,7 @@ GPUReconstructionDeviceBase::GPUReconstructionDeviceBase(const GPUSettingsDevice } } -GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() -{ - // make d'tor such that vtable is created for this class - // needed for build with AliRoot, otherwise dynamic loading of GPU libraries will fail - (void)0; // Avoid compiler warnings -} +GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() = default; void* GPUReconstructionDeviceBase::helperWrapper_static(void* arg) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 5a3f02efe7e3b..215615f558442 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -21,7 +21,7 @@ #include "GPUChain.h" #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -103,6 +103,6 @@ inline size_t GPUReconstructionDeviceBase::GPUMemCpyAlways(bool onGpu, void* dst } } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h b/GPU/GPUTracking/Base/GPUReconstructionHelpers.h index 75bce35e02fa1..c55e81905f32f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h +++ b/GPU/GPUTracking/Base/GPUReconstructionHelpers.h @@ -17,7 +17,7 @@ #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -45,6 +45,6 @@ class GPUReconstructionHelpers }; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index 4575c77b6ff54..d4502b978ef5b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -29,11 +29,6 @@ #include #include -#if defined(GPUCA_ALIROOT_LIB) && !defined(GPUCA_GPUCODE) -#include "AliHLTDefinitions.h" -#include "AliHLTSystem.h" -#endif - #define GPUCA_GPUReconstructionUpdateDefaults() \ if (mProcessingSettings.trackletConstructorInPipeline < 0) { \ mProcessingSettings.trackletConstructorInPipeline = GPUCA_CONSTRUCTOR_IN_PIPELINE; \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h index 4822332a1839c..02008ed0ff78f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h @@ -17,13 +17,13 @@ #include "GPUDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { } -} // namespace GPUCA_NAMESPACE -using namespace GPUCA_NAMESPACE::gpu; +} // namespace o2 +using namespace o2::gpu; // clang-format off $>,APPEND,">,PREPEND,#include ">, diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h index 1def09c61e606..92fa2546d3ca0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelList.template.h @@ -15,14 +15,6 @@ // No header protection, this may be used multiple times #include "GPUReconstructionKernelMacros.h" -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) -#define GPUCA_KRNL_NOALIROOT -#endif - // clang-format off $,> // clang-format on - -#ifdef GPUCA_KRNL_NOALIROOT -#undef GPUCA_KRNL_NOALIROOT -#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index e5c22dd7f3cd5..d5d329d55ad4a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -17,7 +17,7 @@ #include "GPUReconstruction.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -127,6 +127,6 @@ class GPUReconstructionKernels : public T }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index f3749c160c3ff..94b16ae5a6936 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -33,7 +33,7 @@ #include "GPULogging.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUReconstruction* GPUReconstruction::CreateInstance(DeviceType type, bool forceType, GPUReconstruction* master) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index 840a24a695706..fbca43e03781a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -31,9 +31,9 @@ #include "utils/qconfig.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { extern GPUSettingsStandalone configStandalone; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h index 1ffb730b54d55..a4f2e055da2c3 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h @@ -27,7 +27,7 @@ namespace o2::tpc struct ClusterNative; } // namespace o2::tpc -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -74,6 +74,6 @@ class GPUReconstructionTimeframe std::vector> mShiftedEvents; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 995b9224a4ad0..7c382e9b9374d 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -20,89 +20,87 @@ message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludes.h CUDAThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- -if(NOT ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - enable_language(ASM) - if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(defineIncludeSrc "O2::${MODULE}") - else() - set(defineIncludeSrc "${MODULE}") - endif() - set(GPU_RTC_DEFINES "-D$,$-D>") - set(GPU_RTC_INCLUDES "-I$,EXCLUDE,^/usr/include/?>,$-I>" - -I${CMAKE_SOURCE_DIR}/Detectors/Base/src - -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src - ) - if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(GPU_RTC_INCLUDES ${GPU_RTC_INCLUDES} "-I$,EXCLUDE,^/usr/include/?>,$-I>") - endif() - #set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -keep") - - # build flags to use for RTC - set(GPU_RTC_FLAGS "${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -std=c++${CMAKE_CUDA_STANDARD}") - set(GPU_RTC_FLAGS_ARCH "") - if(CUDA_COMPUTETARGET) - foreach(CUDA_ARCH ${CUDA_COMPUTETARGET}) - set(GPU_RTC_FLAGS_ARCH "${GPU_RTC_FLAGS_ARCH} -gencode arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}") - endforeach() - list (GET CUDA_COMPUTETARGET 0 RTC_CUDA_ARCH) - set(RTC_CUDA_ARCH "${RTC_CUDA_ARCH}0") - else() - set(RTC_CUDA_ARCH "750") - endif() - if(GPUCA_CUDA_GCCBIN) - set(GPU_RTC_FLAGS "${GPU_RTC_FLAGS} --compiler-bindir ${GPUCA_CUDA_GCCBIN}") - endif() - set(GPU_RTC_FLAGS_SEPARATED "${GPU_RTC_FLAGS}") - separate_arguments(GPU_RTC_FLAGS_SEPARATED) - - # convenience variables - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - get_filename_component(GPUDIR ${CMAKE_SOURCE_DIR}/../ ABSOLUTE) - else() - set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) - endif() - - set(GPU_RTC_SRC ${GPUDIR}/Base/cuda/GPUReconstructionCUDArtc.cu) - set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) - - # cmake-format: off - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.src - COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludes.h > ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src - MAIN_DEPENDENCY ${GPU_RTC_SRC} - IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} - COMMAND_EXPAND_LISTS - COMMENT "Preparing CUDA RTC source file ${GPU_RTC_BIN}.src" - ) - create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) +enable_language(ASM) +if(ALIGPU_BUILD_TYPE STREQUAL "O2") + set(defineIncludeSrc "O2::${MODULE}") +else() + set(defineIncludeSrc "${MODULE}") +endif() +set(GPU_RTC_DEFINES "-D$,$-D>") +set(GPU_RTC_INCLUDES "-I$,EXCLUDE,^/usr/include/?>,$-I>" + -I${CMAKE_SOURCE_DIR}/Detectors/Base/src + -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src +) +if(ALIGPU_BUILD_TYPE STREQUAL "O2") + set(GPU_RTC_INCLUDES ${GPU_RTC_INCLUDES} "-I$,EXCLUDE,^/usr/include/?>,$-I>") +endif() +#set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -keep") + +# build flags to use for RTC +set(GPU_RTC_FLAGS "${CMAKE_CUDA_FLAGS} ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -std=c++${CMAKE_CUDA_STANDARD}") +set(GPU_RTC_FLAGS_ARCH "") +if(CUDA_COMPUTETARGET) + foreach(CUDA_ARCH ${CUDA_COMPUTETARGET}) + set(GPU_RTC_FLAGS_ARCH "${GPU_RTC_FLAGS_ARCH} -gencode arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}") + endforeach() + list (GET CUDA_COMPUTETARGET 0 RTC_CUDA_ARCH) + set(RTC_CUDA_ARCH "${RTC_CUDA_ARCH}0") +else() + set(RTC_CUDA_ARCH "750") +endif() +if(GPUCA_CUDA_GCCBIN) + set(GPU_RTC_FLAGS "${GPU_RTC_FLAGS} --compiler-bindir ${GPUCA_CUDA_GCCBIN}") +endif() +set(GPU_RTC_FLAGS_SEPARATED "${GPU_RTC_FLAGS}") +separate_arguments(GPU_RTC_FLAGS_SEPARATED) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" - COMMENT Checking CUDA RTC File ${GPU_RTC_BIN}.src - DEPENDS ${GPU_RTC_BIN}.src VERBATIM) - add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) +# convenience variables +if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") + get_filename_component(GPUDIR ${CMAKE_SOURCE_DIR}/../ ABSOLUTE) +else() + set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) +endif() - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" - ) - create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) +set(GPU_RTC_SRC ${GPUDIR}/Base/cuda/GPUReconstructionCUDArtc.cu) +set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.command.arch - COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing CUDA RTC ARCH file ${GPU_RTC_BIN}.command.arch" - ) - create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) +# cmake-format: off +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludes.h > ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + MAIN_DEPENDENCY ${GPU_RTC_SRC} + IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} + COMMAND_EXPAND_LISTS + COMMENT "Preparing CUDA RTC source file ${GPU_RTC_BIN}.src" +) +create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) + +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" + COMMENT Checking CUDA RTC File ${GPU_RTC_BIN}.src + DEPENDS ${GPU_RTC_BIN}.src VERBATIM) +add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) + +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command + COMMAND_EXPAND_LISTS + VERBATIM + COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" +) +create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) - set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) -endif() +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.arch + COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch + COMMAND_EXPAND_LISTS + VERBATIM + COMMENT "Preparing CUDA RTC ARCH file ${GPU_RTC_BIN}.command.arch" +) +create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) + +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") @@ -123,27 +121,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") install(FILES ${HDRS} DESTINATION include/GPU) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(targetName "Ali${MODULE}") - set(TMP_BASELIB AliGPUTracking) - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("${targetName}" "" "GPUReconstructionCUDA.h" "${incdirs} .") - - # Generate the ROOT map Dependecies - generate_rootmap("${targetName}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(${targetName} SHARED ${SRCS} G__${targetName}.cxx) - target_link_libraries(${targetName} PUBLIC ${TMP_BASELIB}) - - # Installation - install(TARGETS ${targetName} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}) -endif() - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) @@ -168,11 +145,7 @@ endif() # Special handling of GPU kernels in case of per-kernel compilation / RDC if(NOT DEFINED GPUCA_CUDA_COMPILE_MODE) - if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(GPUCA_CUDA_COMPILE_MODE "onefile") - else() - set(GPUCA_CUDA_COMPILE_MODE "perkernel") - endif() + set(GPUCA_CUDA_COMPILE_MODE "perkernel") endif() if(GPUCA_CUDA_COMPILE_MODE STREQUAL "onefile") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=0) diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h index 7f5f596d96abf..99399f505d552 100644 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h @@ -19,7 +19,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -39,7 +39,7 @@ class ThrustVolatileAsyncAllocator }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #ifndef __HIPCC__ // Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize @@ -48,7 +48,7 @@ namespace thrust namespace cuda_cub { -typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; +typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; template <> __host__ __device__ inline cudaError_t synchronize(thrustStreamPolicy& policy) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index dd35a23d67c21..26cbc282b6fc2 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -42,7 +42,7 @@ static constexpr size_t REQUIRE_FREE_MEMORY_RESERVED_PER_SM = 40L * 1024 * 1024; static constexpr size_t RESERVE_EXTRA_MEM_THRESHOLD = 10L * 1024 * 1024 * 1024; static constexpr size_t RESERVE_EXTRA_MEM_OFFSET = 1L * 512 * 1024 * 1024; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; __global__ void dummyInitKernel(void*) {} @@ -371,7 +371,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase); #endif -#ifndef GPUCA_ALIROOT_LIB if (mProcessingSettings.rtc.enable) { genAndLoadRTC(); } @@ -384,7 +383,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #undef GPUCA_KRNL loadKernelModules(true, false); } -#endif #endif void* devPtrConstantMem = nullptr; #ifndef GPUCA_NO_CONSTANT_MEMORY @@ -698,7 +696,7 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr) } #endif // __HIPCC__ -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { template class GPUReconstructionKernels; } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 070177fb344f1..49142d409c5ae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -20,12 +20,12 @@ #include #ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); +extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #else -extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); +extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -105,6 +105,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "GPUConstantMem.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 93340c82a39ba..7114d37380afc 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -27,18 +27,15 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#ifndef GPUCA_ALIROOT_LIB #include "utils/qGetLdBinarySymbols.h" QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_src); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); -#endif int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { -#ifndef GPUCA_ALIROOT_LIB std::string rtcparam = std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; @@ -253,6 +250,5 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) close(fd); } -#endif return 0; } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 246677b4b9647..d6c2742ef0581 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -23,7 +23,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -86,6 +86,6 @@ class GPUDebugTiming static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 72e5d16006a2f..2e695b49ebb6c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -19,7 +19,7 @@ #include "GPUReconstructionCUDAInternals.h" #include "CUDAThrustHelpers.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifdef GPUCA_USE_TEXTURES texture gAliTexRefu2; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index acd87c528e8e4..729ac3cf483e9 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -66,81 +66,79 @@ set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludes.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- -if(NOT ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - enable_language(ASM) - if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(defineIncludeSrc "O2::${MODULE}") - else() - set(defineIncludeSrc "${MODULE}") - endif() - set(GPU_RTC_DEFINES "-D$,$-D>") - set(GPU_RTC_INCLUDES "-I$,EXCLUDE,^/usr/include/?>,$-I>" - -I${CMAKE_SOURCE_DIR}/Detectors/Base/src - -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src - ) - if(ALIGPU_BUILD_TYPE STREQUAL "O2") - set(GPU_RTC_INCLUDES ${GPU_RTC_INCLUDES} "-I$,EXCLUDE,^/usr/include/?>,$-I>") - endif() - - # build flags to use for RTC - set(GPU_RTC_FLAGS "${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -std=c++${CMAKE_HIP_STANDARD}") - set(GPU_RTC_FLAGS_ARCH "") - foreach(HIP_ARCH ${CMAKE_HIP_ARCHITECTURES}) - set(GPU_RTC_FLAGS_ARCH "${GPU_RTC_FLAGS_ARCH} --offload-arch=${HIP_ARCH}") - endforeach() - - set(GPU_RTC_FLAGS_SEPARATED "${GPU_RTC_FLAGS}") - separate_arguments(GPU_RTC_FLAGS_SEPARATED) - - # convenience variables - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - get_filename_component(GPUDIR ${CMAKE_SOURCE_DIR}/../ ABSOLUTE) - else() - set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) - endif() +enable_language(ASM) +if(ALIGPU_BUILD_TYPE STREQUAL "O2") + set(defineIncludeSrc "O2::${MODULE}") +else() + set(defineIncludeSrc "${MODULE}") +endif() +set(GPU_RTC_DEFINES "-D$,$-D>") +set(GPU_RTC_INCLUDES "-I$,EXCLUDE,^/usr/include/?>,$-I>" + -I${CMAKE_SOURCE_DIR}/Detectors/Base/src + -I${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src +) +if(ALIGPU_BUILD_TYPE STREQUAL "O2") + set(GPU_RTC_INCLUDES ${GPU_RTC_INCLUDES} "-I$,EXCLUDE,^/usr/include/?>,$-I>") +endif() - set(GPU_RTC_SRC ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPrtc.hip) - set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) +# build flags to use for RTC +set(GPU_RTC_FLAGS "${CMAKE_HIP_FLAGS} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -std=c++${CMAKE_HIP_STANDARD}") +set(GPU_RTC_FLAGS_ARCH "") +foreach(HIP_ARCH ${CMAKE_HIP_ARCHITECTURES}) + set(GPU_RTC_FLAGS_ARCH "${GPU_RTC_FLAGS_ARCH} --offload-arch=${HIP_ARCH}") +endforeach() - # cmake-format: off - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.src - COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludes.h > ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src - MAIN_DEPENDENCY ${GPU_RTC_SRC} - IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} - DEPENDS ${MODULE}_HIPIFIED - COMMAND_EXPAND_LISTS - COMMENT "Preparing HIP RTC source file ${GPU_RTC_BIN}.src" - ) - create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) +set(GPU_RTC_FLAGS_SEPARATED "${GPU_RTC_FLAGS}") +separate_arguments(GPU_RTC_FLAGS_SEPARATED) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" - COMMENT Checking HIP RTC File ${GPU_RTC_BIN}.src - DEPENDS ${GPU_RTC_BIN}.src VERBATIM) - add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) +# convenience variables +if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") + get_filename_component(GPUDIR ${CMAKE_SOURCE_DIR}/../ ABSOLUTE) +else() + set(GPUDIR ${CMAKE_SOURCE_DIR}/GPU/GPUTracking) +endif() - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${hip_HIPCC_EXECUTABLE} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} --genco" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" - ) - create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) +set(GPU_RTC_SRC ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPrtc.hip) +set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) + +# cmake-format: off +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludes.h > ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + MAIN_DEPENDENCY ${GPU_RTC_SRC} + IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} + DEPENDS ${MODULE}_HIPIFIED + COMMAND_EXPAND_LISTS + COMMENT "Preparing HIP RTC source file ${GPU_RTC_BIN}.src" +) +create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) + +add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" + COMMENT Checking HIP RTC File ${GPU_RTC_BIN}.src + DEPENDS ${GPU_RTC_BIN}.src VERBATIM) +add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) + +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command + COMMAND echo -n "${hip_HIPCC_EXECUTABLE} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} --genco" > ${GPU_RTC_BIN}.command + COMMAND_EXPAND_LISTS + VERBATIM + COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" +) +create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) - add_custom_command( - OUTPUT ${GPU_RTC_BIN}.command.arch - COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing HIP RTC ARCH file ${GPU_RTC_BIN}.command.arch" - ) - create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.arch + COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch + COMMAND_EXPAND_LISTS + VERBATIM + COMMENT "Preparing HIP RTC ARCH file ${GPU_RTC_BIN}.command.arch" +) +create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) - set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) -endif() +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") @@ -167,36 +165,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") # LABELS gpu) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(targetName "Ali${MODULE}") - set(TMP_BASELIB AliGPUTracking) - if(NOT CMAKE_CXX_STANDARD OR CMAKE_CXX_STANDARD LESS 14) - message(FATAL_ERROR "HIP support needs at least C++14") - endif() - - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("${targetName}" "" "${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h" "${incdirs} .") - - # Generate the ROOT map Dependecies - generate_rootmap("${targetName}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(${targetName} SHARED ${SRCS}) - set(SRCS_CXX ${SRCS_CXX} G__${targetName}.cxx) # CMake HIP language feature will parse hip flags to everything that is linked to hip libraries, so add the ROOT dictionary file here - target_link_libraries(${targetName} PUBLIC ${TMP_BASELIB}) - - # ROOT Cannot load this library, so we have to remove it from the list - list(REMOVE_ITEM ALILIBSTESTED ${targetName}) - set(ALILIBSTESTED ${ALILIBSTESTED} CACHE INTERNAL "ALILIBSTESTED") - - # Installation - install(TARGETS ${targetName} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) - include_directories(${GPUCA_HIP_SOURCE_DIR}) -endif() - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) @@ -229,11 +197,7 @@ endif() # Special handling of GPU kernels in case of per-kernel compilation / RDC if(NOT DEFINED GPUCA_HIP_COMPILE_MODE) - if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(GPUCA_HIP_COMPILE_MODE "onefile") - else() - set(GPUCA_HIP_COMPILE_MODE "perkernel") - endif() + set(GPUCA_HIP_COMPILE_MODE "perkernel") endif() if(GPUCA_HIP_COMPILE_MODE STREQUAL "onefile") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=0) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 58d0e5f40b593..1aa3739b0b44a 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -96,25 +96,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") install(FILES ${HDRS} DESTINATION include/GPU) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary("Ali${MODULE}" "" "GPUReconstructionOCL.h" "${incdirs} .") - - # Generate the ROOT map - generate_rootmap("Ali${MODULE}" "" "") - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} PUBLIC OpenCL AliGPUTracking) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS} DESTINATION include) - set(targetName Ali${MODULE}) -endif() - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS}) target_link_libraries(${MODULE} GPUTracking OpenCL) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index f05780e86fe62..ed985e31ab1b0 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -19,7 +19,7 @@ #include "GPUReconstructionOCLInternals.h" #include "GPUReconstructionIncludes.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include #include diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 4d0c51e65a517..245e9674801f6 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -18,12 +18,12 @@ #include "GPUReconstructionDeviceBase.h" #ifdef _WIN32 -extern "C" __declspec(dllexport) GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); +extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const o2::gpu::GPUSettingsDeviceBackend& cfg); #else -extern "C" GPUCA_NAMESPACE::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const GPUCA_NAMESPACE::gpu::GPUSettingsDeviceBackend& cfg); +extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_OCL(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUReconstructionOCLInternals; @@ -78,6 +78,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase }; using GPUReconstructionOCL = GPUReconstructionKernels; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h index fdcd7ff7f12c9..1020ce85563c2 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h @@ -25,7 +25,7 @@ #include #include "GPULogging.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { static const char* opencl_error_string(int32_t errorcode) @@ -244,6 +244,6 @@ inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) } static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b04a8c12dd598..aac689a2e01c8 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -21,20 +21,11 @@ endif() include(cmake/helpers.cmake) -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - if(ENABLE_CUDA OR ENABLE_OPENCL OR ENABLE_HIP) - include(FeatureSummary) - find_package(O2GPU) - else() - include("cmake/kernel_helpers.cmake") - endif() -else() - if((ALIGPU_BUILD_TYPE STREQUAL "Standalone" AND GPUCA_BUILD_EVENT_DISPLAY) OR (ALIGPU_BUILD_TYPE STREQUAL "O2" AND NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND TARGET AliceO2::DebugGUI AND OPENGL_FOUND AND GLFW_FOUND)) - set(GPUCA_EVENT_DISPLAY ON) - endif() - if(ROOT_FOUND) - set(GPUCA_QA ON) - endif() +if((ALIGPU_BUILD_TYPE STREQUAL "Standalone" AND GPUCA_BUILD_EVENT_DISPLAY) OR (ALIGPU_BUILD_TYPE STREQUAL "O2" AND NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND TARGET AliceO2::DebugGUI AND OPENGL_FOUND AND GLFW_FOUND)) + set(GPUCA_EVENT_DISPLAY ON) +endif() +if(ROOT_FOUND) + set(GPUCA_QA ON) endif() # General sources @@ -236,27 +227,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") Interface/GPUO2InterfaceConfigurableParam.cxx) endif() -# Sources only for AliRoot -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(SRCS - ${SRCS} - Merger/GPUTPCGlobalMergerComponent.cxx - SliceTracker/GPUTPCTrackerComponent.cxx - Merger/GPUTPCGMTracksToTPCSeeds.cxx - DataCompression/AliHLTTPCClusterStatComponent.cxx - TRDTracking/GPUTRDTrackerComponent.cxx - TRDTracking/GPUTRDTrackletReaderComponent.cxx - Global/AliHLTGPUDumpComponent.cxx) - - set(SRCS_NO_CINT ${SRCS_NO_CINT} - ${CMAKE_SOURCE_DIR}/HLT/TPCLib/AliHLTTPCGeometry.cxx - ${CMAKE_SOURCE_DIR}/HLT/TPCLib/AliHLTTPCLog.cxx - ${CMAKE_SOURCE_DIR}/HLT/TPCLib/AliHLTTPCDefinitions.cxx - ${CMAKE_SOURCE_DIR}/HLT/TRD/AliHLTTRDDefinitions.cxx) - - set(HDRS_INSTALL ${HDRS_INSTALL} SliceTracker/GPUTPCDefinitions.h) -endif() - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionKernelList.h @@ -376,100 +346,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") add_subdirectory(Interface) endif() -# Main CMake part for AliRoot -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - set(HDRS_SRCS ${HDRS_SRCS} - DataTypes/GPUO2DataTypes.h) - add_definitions(-DGPUCA_ALIROOT_LIB) - include_directories(qa) - include_directories(SYSTEM ${ROOT_INCLUDE_DIR}) - include_directories(${CMAKE_SOURCE_DIR}/HLT/BASE - ${CMAKE_SOURCE_DIR}/HLT/BASE/util - ${CMAKE_SOURCE_DIR}/HLT/TPCLib - ${CMAKE_SOURCE_DIR}/HLT/TPCLib/transform - ${CMAKE_SOURCE_DIR}/HLT/TPCLib/comp - ${CMAKE_SOURCE_DIR}/HLT/TRD - ${CMAKE_SOURCE_DIR}/TRD/TRDbase - ${CMAKE_SOURCE_DIR}/STEER/STEERBase - ${CMAKE_SOURCE_DIR}/STEER/STEER - ${CMAKE_SOURCE_DIR}/STEER/ESD - ${CMAKE_SOURCE_DIR}/STEER/CDB - ${CMAKE_SOURCE_DIR}/TPC/TPCbase - ${CMAKE_SOURCE_DIR}/TPC/TPCcalib - ${CMAKE_SOURCE_DIR}/TPC/TPCrec - ${CMAKE_SOURCE_DIR}/GPU/Common - ${CMAKE_SOURCE_DIR}/GPU/Utils - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Debug - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Definitions - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/DataTypes - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/SliceTracker - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Merger - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Global - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/TPCConvert - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/dEdx - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/TRDTracking - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/utils - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/cuda - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/hip - ${CMAKE_SOURCE_DIR}/GPU/GPUTracking/Base/opencl - ${CMAKE_SOURCE_DIR}/GPU/TPCFastTransformation) - alice_usevc() - - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary_flat("Ali${MODULE}" "GPUTrackingLinkDef_AliRoot.h" - "${HDRS_SRCS}" "${incdirs}") - - # Generate the ROOT map Dependecies - set(LIBDEPS - STEERBase - STEER - ESD - CDB - HLTbase - TPCbase - TPCrec - TPCcalib - TRDbase - AliTPCFastTransformation) - generate_rootmap("Ali${MODULE}" "${LIBDEPS}" - "${CMAKE_CURRENT_SOURCE_DIR}/GPUTrackingLinkDef_AliRoot.h") - # Don't pass Vc to root - set(LIBDEPS ${LIBDEPS} Vc) - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} - SHARED - ${SRCS} - ${SRCS_NO_CINT} - ${SRCS_NO_H} - ${SRCS_DATATYPES} - G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} ${LIBDEPS}) - - # Additional compilation flags - set_target_properties(Ali${MODULE} PROPERTIES COMPILE_FLAGS "") - - # System dependent: Modify the way the library is build - if(${CMAKE_SYSTEM} MATCHES Darwin) - set_target_properties(Ali${MODULE} - PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - endif(${CMAKE_SYSTEM} MATCHES Darwin) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS_SRCS} ${HDRS_INSTALL} DESTINATION include) - install(DIRECTORY utils - DESTINATION include - FILES_MATCHING PATTERN *.h) - - set(targetName Ali${MODULE}) - add_library(O2::${MODULE} ALIAS Ali${MODULE}) -endif() - # Main CMake part for Standalone if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H} ${SRCS_DATATYPES}) diff --git a/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.cxx b/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.cxx deleted file mode 100644 index d6f9ff692c15f..0000000000000 --- a/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.cxx +++ /dev/null @@ -1,718 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLTTPCClusterStatComponent.cxx -/// \author David Rohr - -#include "AliCDBEntry.h" -#include "AliCDBManager.h" -#include "AliEventInfo.h" -#include "AliGRPObject.h" -#include "AliGeomManager.h" -#include "AliHLTExternalTrackParam.h" -#include "AliHLTGlobalBarrelTrack.h" -#include "GPUParam.h" -#include "AliHLTTPCClusterStatComponent.h" -#include "AliHLTTPCClusterTransformation.h" -#include "AliHLTTPCClusterXYZ.h" -#include "AliHLTTPCDataCompressionComponent.h" -#include "AliHLTTPCDefinitions.h" -#include "GPUTPCGMPropagator.h" -#include "GPUTPCGMPolynomialField.h" -#include "GPUTPCGMPolynomialFieldManager.h" -#include "GPUTPCGMTrackParam.h" -#include "AliHLTTPCGeometry.h" -#include "AliHLTTPCRawCluster.h" -#include "AliRawEventHeaderBase.h" -#include "AliRecoParam.h" -#include "AliRunInfo.h" -#include "AliTPCParam.h" -#include "AliTPCRecoParam.h" -#include "AliTPCTransform.h" -#include "AliTPCcalibDB.h" -#include - -using namespace GPUCA_NAMESPACE::gpu; - -ClassImp(AliHLTTPCClusterStatComponent); - -AliHLTTPCClusterStatComponent::AliHLTTPCClusterStatComponent() - : AliHLTProcessor(), mSliceParam(nullptr), fTotal(0), fEdge(0), fSplitPad(0), fSplitTime(0), fSplitPadTime(0), fSplitPadOrTime(0), fAssigned(0), fCompressionStudy(0), fPrintClusters(0), fPrintClustersScaled(0), fDumpClusters(0), fAggregate(0), fSort(0), fEvent(0) -{ -} - -AliHLTTPCClusterStatComponent::~AliHLTTPCClusterStatComponent() {} - -void AliHLTTPCClusterStatComponent::GetInputDataTypes(AliHLTComponentDataTypeList& list) -{ - list.push_back(AliHLTTPCDefinitions::fgkRawClustersDataType | kAliHLTDataOriginTPC); - list.push_back(AliHLTTPCDefinitions::fgkTPCReverseTransformInfoDataType); - list.push_back(AliHLTTPCDefinitions::ClustersXYZDataType()); - list.push_back((kAliHLTDataTypeTrack | kAliHLTDataOriginTPC)); -} - -AliHLTComponentDataType AliHLTTPCClusterStatComponent::GetOutputDataType() { return kAliHLTDataTypeHistogram | kAliHLTDataOriginOut; } - -void AliHLTTPCClusterStatComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - constBase = 2000000; - inputMultiplier = 0.0; -} - -int32_t AliHLTTPCClusterStatComponent::ProcessOption(TString option, TString value) -{ - int32_t iResult = 0; - - if (option.EqualTo("print-clusters")) { - fPrintClusters = 1; - } else if (option.EqualTo("aggregate")) { - fAggregate = 1; - } else if (option.EqualTo("sort")) { - fSort = 1; - } else if (option.EqualTo("print-clusters-scaled")) { - fPrintClustersScaled = 1; - } else if (option.EqualTo("dump-clusters")) { - fDumpClusters = 1; - } else if (option.EqualTo("compression-study")) { - fCompressionStudy = 1; - } else { - HLTError("invalid option: %s", value.Data()); - return -EINVAL; - } - return iResult; -} - -int32_t AliHLTTPCClusterStatComponent::DoInit(int argc, const char** argv) -{ - int32_t iResult = 0; - - if (ProcessOptionString(GetComponentArgs()) < 0) { - HLTFatal("wrong config string! %s", GetComponentArgs().c_str()); - return -EINVAL; - } - - if (fDumpClusters) { - if ((fp = fopen("clusters.dump", "w+b")) == nullptr) { - return -1; - } - } - - AliTPCcalibDB* pCalib = AliTPCcalibDB::Instance(); - const AliMagF* field = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - pCalib->SetExBField(field); - AliCDBEntry* entry = AliCDBManager::Instance()->Get("GRP/GRP/Data"); - AliGRPObject tmpGRP, *pGRP = 0; - pGRP = dynamic_cast(entry->GetObject()); - AliRunInfo runInfo(pGRP->GetLHCState(), pGRP->GetBeamType(), pGRP->GetBeamEnergy(), pGRP->GetRunType(), pGRP->GetDetectorMask()); - AliEventInfo evInfo; - evInfo.SetEventType(AliRawEventHeaderBase::kPhysicsEvent); - entry = AliCDBManager::Instance()->Get("TPC/Calib/RecoParam"); - TObject* recoParamObj = entry->GetObject(); - - static AliRecoParam fOfflineRecoParam; - if (dynamic_cast(recoParamObj)) { - TObjArray* copy = (TObjArray*)(static_cast(recoParamObj)->Clone()); - fOfflineRecoParam.AddDetRecoParamArray(1, copy); - } else if (dynamic_cast(recoParamObj)) { - AliDetectorRecoParam* copy = (AliDetectorRecoParam*)static_cast(recoParamObj)->Clone(); - fOfflineRecoParam.AddDetRecoParam(1, copy); - } - fOfflineRecoParam.SetEventSpecie(&runInfo, evInfo, 0); - AliTPCRecoParam* recParam = (AliTPCRecoParam*)fOfflineRecoParam.GetDetRecoParam(1); - pCalib->GetTransform()->SetCurrentRecoParam(recParam); - - mSliceParam = new GPUParam(); - mSliceParam->SetDefaults(GetBz()); - - return iResult; -} - -int32_t AliHLTTPCClusterStatComponent::DoDeinit() -{ - if (fDumpClusters) { - fclose(fp); - } - delete mSliceParam; - mSliceParam = nullptr; - return 0; -} - -void AliHLTTPCClusterStatComponent::TransformReverse(int32_t slice, int32_t row, float y, float z, float padtime[]) -{ - AliTPCcalibDB* calib = AliTPCcalibDB::Instance(); - AliTPCParam* param = calib->GetParameters(); - - float padWidth = 0; - // float padLength = 0; - float maxPad = 0; - float sign = slice < NSLICES / 2 ? 1 : -1; - float zwidth; - - int32_t sector; - int32_t sectorrow; - if (row < AliHLTTPCGeometry::GetNRowLow()) { - sector = slice; - sectorrow = row; - maxPad = param->GetNPadsLow(sectorrow); - // padLength = param->GetPadPitchLength(sector, sectorrow); - padWidth = param->GetPadPitchWidth(sector); - } else { - sector = slice + NSLICES; - sectorrow = row - AliHLTTPCGeometry::GetNRowLow(); - maxPad = param->GetNPadsUp(sectorrow); - // padLength = param->GetPadPitchLength(sector, sectorrow); - padWidth = param->GetPadPitchWidth(sector); - } - - padtime[0] = y * sign / padWidth + 0.5f * maxPad; - - float xyzGlobal[2] = {param->GetPadRowRadii(sector, sectorrow), y}; - AliHLTTPCGeometry::Local2Global(xyzGlobal, slice); - - float time = z * sign * 1024.f / GPUTPCGeometry::TPCLength(); - padtime[1] = (1024.f - time); -} - -void AliHLTTPCClusterStatComponent::TransformForward(int32_t slice, int32_t row, float pad, float time, float xyz[]) -{ - AliTPCcalibDB* calib = AliTPCcalibDB::Instance(); - AliTPCParam* param = calib->GetParameters(); - - float padWidth = 0; - // float padLength = 0; - float maxPad = 0; - float sign = slice < NSLICES / 2 ? 1 : -1; - float zwidth; - - int32_t sector; - int32_t sectorrow; - if (row < AliHLTTPCGeometry::GetNRowLow()) { - sector = slice; - sectorrow = row; - maxPad = param->GetNPadsLow(sectorrow); - // padLength = param->GetPadPitchLength(sector, sectorrow); - padWidth = param->GetPadPitchWidth(sector); - } else { - sector = slice + NSLICES; - sectorrow = row - AliHLTTPCGeometry::GetNRowLow(); - maxPad = param->GetNPadsUp(sectorrow); - // padLength = param->GetPadPitchLength(sector, sectorrow); - padWidth = param->GetPadPitchWidth(sector); - } - - xyz[0] = param->GetPadRowRadii(sector, sectorrow); - xyz[1] = (pad - 0.5f * maxPad) * padWidth * sign; - - float xyzGlobal[2] = {xyz[0], xyz[1]}; - AliHLTTPCGeometry::Local2Global(xyzGlobal, slice); - - xyz[2] = sign * (1024 - time) * GPUTPCGeometry::TPCLength() / 1024.f; -} - -static bool AliHLTTPCClusterStat_sorthelper(const AliHLTTPCRawCluster& a, const AliHLTTPCRawCluster& b) -{ - if (a.GetPadRow() < b.GetPadRow()) { - return (true); - } - if (a.GetPadRow() > b.GetPadRow()) { - return (false); - } - if (a.GetPad() < b.GetPad()) { - return (true); - } - if (a.GetPad() > b.GetPad()) { - return (false); - } - if (a.GetTime() < b.GetTime()) { - return (true); - } - if (a.GetTime() > b.GetTime()) { - return (false); - } - return (false); -} - -int32_t AliHLTTPCClusterStatComponent::DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& /*trigData*/, AliHLTUInt8_t* /*outputPtr*/, AliHLTUInt32_t& /*size*/, AliHLTComponentBlockDataList& /*outputBlocks*/) -{ - int32_t iResult = 0; - - if (!IsDataEvent()) { - return iResult; - } - - if (!fAggregate) { - fTotal = fEdge = fSplitPad = fSplitTime = fSplitPadTime = fSplitPadOrTime = 0; - } - int32_t nBlocks = evtData.fBlockCnt; - - AliHLTTPCRawClusterData* clustersArray[NSLICES][NPATCHES]; - AliHLTTPCClusterXYZData* clustersTransformedArray[NSLICES][NPATCHES]; - AliHLTTPCTrackHelperStruct* clustersTrackIDArray[NSLICES][NPATCHES]; - memset(clustersArray, 0, NSLICES * NPATCHES * sizeof(void*)); - memset(clustersTransformedArray, 0, NSLICES * NPATCHES * sizeof(void*)); - memset(clustersTrackIDArray, 0, NSLICES * NPATCHES * sizeof(void*)); - - AliHLTTracksData* tracks = nullptr; - - float bz = GetBz(); - - AliTPCcalibDB* pCalib = AliTPCcalibDB::Instance(); - AliTPCParam* tpcParam = pCalib->GetParameters(); - tpcParam->Update(); - tpcParam->ReadGeoMatrices(); - AliTPCTransform* transform = pCalib->GetTransform(); - const AliTPCRecoParam* rec = transform->GetCurrentRecoParam(); - transform->SetCurrentTimeStamp(GetTimeStamp()); - - for (int32_t ndx = 0; ndx < nBlocks; ndx++) { - const AliHLTComponentBlockData* iter = blocks + ndx; - - if (iter->fDataType == (AliHLTTPCDefinitions::fgkRawClustersDataType | kAliHLTDataOriginTPC)) { - int32_t slice = AliHLTTPCDefinitions::GetMinSliceNr(iter->fSpecification); - int32_t patch = AliHLTTPCDefinitions::GetMinPatchNr(iter->fSpecification); - - clustersArray[slice][patch] = (AliHLTTPCRawClusterData*)(iter->fPtr); - } - - if (iter->fDataType == AliHLTTPCDefinitions::ClustersXYZDataType()) { - int32_t slice = AliHLTTPCDefinitions::GetMinSliceNr(iter->fSpecification); - int32_t patch = AliHLTTPCDefinitions::GetMinPatchNr(iter->fSpecification); - - clustersTransformedArray[slice][patch] = (AliHLTTPCClusterXYZData*)(iter->fPtr); - if (clustersTransformedArray[slice][patch]->fCount) { - clustersTrackIDArray[slice][patch] = new AliHLTTPCTrackHelperStruct[clustersTransformedArray[slice][patch]->fCount]; - memset(clustersTrackIDArray[slice][patch], 0, clustersTransformedArray[slice][patch]->fCount * sizeof(AliHLTTPCTrackHelperStruct)); - for (int32_t i = 0; i < clustersTransformedArray[slice][patch]->fCount; i++) { - clustersTrackIDArray[slice][patch][i].fID = -1; - } - } - } - - if (iter->fDataType == (kAliHLTDataTypeTrack | kAliHLTDataOriginTPC)) { - tracks = (AliHLTTracksData*)iter->fPtr; - } - } - - if (fCompressionStudy) { - if (tracks == nullptr) { - HLTError("Tracks missing"); - return (0); - } - } - - double residualBarrelTrackY = 0, residualBarrelTrackZ = 0, residualExternalTrackY = 0, residualExternalTrackZ = 0, residualBacktransformPad = 0, residualBacktransformTime = 0; - double residualBarrelTrackYabs = 0, residualBarrelTrackZabs = 0, residualExternalTrackYabs = 0, residualExternalTrackZabs = 0, residualBacktransformPadabs = 0, residualBacktransformTimeabs = 0; - double residualFitTrackY = 0, residualFitTrackZ = 0, residualFitTrackYabs = 0, residualFitTrackZabs = 0, residualTrackRawPad = 0, residualTrackRawTime = 0, residualTrackRawPadabs = 0, residualTrackRawTimeabs = 0; - int32_t nClusterTracks = 0, nClusters = 0, nClusterTracksRaw = 0; - - const AliHLTUInt8_t* pCurrent = reinterpret_cast(tracks->fTracklets); - if (fCompressionStudy) { - GPUTPCGMPropagator prop; - prop.SetMaxSinPhi(.999); - prop.SetMaterialTPC(); - GPUTPCGMPolynomialField field; - int32_t err = GPUTPCGMPolynomialFieldManager::GetPolynomialField(field); - if (err != 0) { - HLTError("Can not initialize polynomial magnetic field"); - return -1; - } - prop.SetPolynomialField(&field); - for (unsigned i = 0; i < tracks->fCount; i++) { - const AliHLTExternalTrackParam* track = reinterpret_cast(pCurrent); - if (track->fNPoints == 0) { - continue; - } - - AliHLTGlobalBarrelTrack btrack(*track); - btrack.CalculateHelixParams(bz); - - AliExternalTrackParam etrack(btrack); - - GPUTPCGMTrackParam ftrack; - float falpha; - - int32_t hitsUsed = 0; - float averageCharge = 0; - float averageQMax = 0; - AliHLTTPCTrackHelperStruct* hitIndexCache[1024]; - for (int32_t ip = 0; ip < track->fNPoints; ip++) { - int32_t clusterID = track->fPointIDs[ip]; - int32_t slice = AliHLTTPCGeometry::CluID2Slice(clusterID); - int32_t patch = AliHLTTPCGeometry::CluID2Partition(clusterID); - int32_t index = AliHLTTPCGeometry::CluID2Index(clusterID); - - if (clustersTrackIDArray[slice][patch][index].fID != -1) { - HLTDebug("Already assigned hit %d of track %d, skipping", ip, i); - continue; - } - - if (index > clustersArray[slice][patch]->fCount) { - HLTError("Cluster index out of range"); - continue; - } - - AliHLTTPCRawCluster& cluster = clustersArray[slice][patch]->fClusters[index]; - AliHLTTPCClusterXYZ& clusterTransformed = clustersTransformedArray[slice][patch]->fClusters[index]; - - int32_t padrow = AliHLTTPCGeometry::GetFirstRow(patch) + cluster.GetPadRow(); - float x = AliHLTTPCGeometry::Row2X(padrow); - float y = 0.0f; - float z = 0.0f; - - float xyz[3]; - if (1) // Use forward (exact reverse-reverse) transformation of raw cluster (track fit in distorted coordinates) - { - TransformForward(slice, padrow, cluster.GetPad(), cluster.GetTime(), xyz); - } else { // Correct cluster coordinates using correct transformation - xyz[0] = x; - xyz[1] = clusterTransformed.fY; - xyz[2] = clusterTransformed.fZ; - } - - float alpha = slice; - if (alpha > NSLICES / 2) { - alpha -= NSLICES / 2; - } - if (alpha > NSLICES / 4) { - alpha -= NSLICES / 2; - } - alpha = (alpha + 0.5f) * M_PI / 9.f; - btrack.CalculateCrossingPoint(x, alpha /* Better use btrack.GetAlpha() ?? */, y, z); - - etrack.Propagate(alpha, x, bz); - - if (ip == 0) { - ftrack.Par()[0] = xyz[1]; - ftrack.Par()[1] = xyz[2]; - for (int32_t k = 2; k < 5; k++) { - ftrack.Par()[k] = etrack.GetParameter()[k]; - } - ftrack.SetX(xyz[0]); - falpha = alpha; - - prop.SetTrack(&ftrack, falpha); - ftrack.ResetCovariance(); - bool inFlyDirection = 1; - prop.PropagateToXAlpha(xyz[0], falpha, inFlyDirection); - } else { - bool inFlyDirection = 0; - prop.PropagateToXAlpha(xyz[0], alpha, inFlyDirection); - } - - nClusterTracks++; - residualBarrelTrackYabs += fabsf(clusterTransformed.fY - y); - residualBarrelTrackZabs += fabsf(clusterTransformed.fZ - z); - residualExternalTrackYabs += fabsf(clusterTransformed.fY - (float)etrack.GetY()); - residualExternalTrackZabs += fabsf(clusterTransformed.fZ - (float)etrack.GetZ()); - residualBarrelTrackY += clusterTransformed.fY - y; - residualBarrelTrackZ += clusterTransformed.fZ - z; - residualExternalTrackY += clusterTransformed.fY - etrack.GetY(); - residualExternalTrackZ += clusterTransformed.fZ - etrack.GetZ(); - residualFitTrackY += clusterTransformed.fY - ftrack.GetY(); - residualFitTrackZ += clusterTransformed.fZ - ftrack.GetZ(); - residualFitTrackYabs += fabsf(clusterTransformed.fY - ftrack.GetY()); - residualFitTrackZabs += fabsf(clusterTransformed.fZ - ftrack.GetZ()); - - // Show residuals wrt track position - // HLTImportant("Residual %d btrack %f %f etrack %f %f ftrack %f %f", padrow, clusterTransformed.fY - y, clusterTransformed.fZ - z, - // clusterTransformed.fY - etrack.GetY(), clusterTransformed.fZ - etrack.GetZ(), - // clusterTransformed.fY - ftrack.GetY(), clusterTransformed.fZ - ftrack.GetZ()); - - float padtime[2]; - TransformReverse(slice, padrow, ftrack.GetY(), ftrack.GetZ(), padtime); - - // Check forward / backward transformation - /*float xyzChk[3]; - TransformForward(slice, padrow, padtime[0], padtime[1], xyzChk); - HLTImportant("BackwardForward Residual %f %f %f: %f %f", ftrack.GetX(), ftrack.GetY(), ftrack.GetZ(), ftrack.GetY() - xyzChk[1], ftrack.GetZ() - xyzChk[2]);*/ - - // Show residual wrt to raw cluster position - // HLTImportant("Raw Cluster Residual %d (%d/%d) %d: %f %f (%f %f)", i, ip, track->fNPoints, padrow, cluster.GetPad() - padtime[0], cluster.GetTime() - padtime[1], clusterTransformed.fY - ftrack.GetY(), clusterTransformed.fZ - ftrack.GetZ()); - if (fabsf(cluster.GetPad() - padtime[0]) > 5 || fabsf(cluster.GetTime() - padtime[1]) > 5) { - break; - } - - if (ip != 0) { - clustersTrackIDArray[slice][patch][index].fResidualPad = cluster.GetPad() - padtime[0]; - clustersTrackIDArray[slice][patch][index].fResidualTime = cluster.GetTime() - padtime[1]; - clustersTrackIDArray[slice][patch][index].fFirstHit = 0; - - residualTrackRawPad += cluster.GetPad() - padtime[0]; - residualTrackRawTime += cluster.GetTime() - padtime[1]; - residualTrackRawPadabs += fabsf(cluster.GetPad() - padtime[0]); - residualTrackRawTimeabs += fabsf(cluster.GetTime() - padtime[1]); - nClusterTracksRaw++; - } else { - clustersTrackIDArray[slice][patch][index].fResidualPad = cluster.GetPad(); - clustersTrackIDArray[slice][patch][index].fResidualTime = cluster.GetTime(); - clustersTrackIDArray[slice][patch][index].fFirstHit = 1; - } - clustersTrackIDArray[slice][patch][index].fID = i; - clustersTrackIDArray[slice][patch][index].fTrack = track; - if (hitsUsed >= 1024) { - HLTFatal("hitIndex cache exceeded"); - } - hitIndexCache[hitsUsed] = &clustersTrackIDArray[slice][patch][index]; - hitsUsed++; - averageCharge += cluster.GetCharge(); - averageQMax += cluster.GetQMax(); - - if (ip != 0) { - int32_t rowType = padrow < 64 ? 0 : (padrow < 128 ? 2 : 1); - prop.Update(xyz[1], xyz[2], rowType, *mSliceParam, 0, 0, nullptr, false, slice > 18, -1.f, 0.f, 0.f); - } - } - if (hitsUsed) { - averageCharge /= hitsUsed; - averageQMax /= hitsUsed; - } - for (int32_t ip = 0; ip < hitsUsed; ip++) { - hitIndexCache[ip]->fAverageQMax = averageQMax; - hitIndexCache[ip]->fAverageQTot = averageCharge; - } - pCurrent += sizeof(AliHLTExternalTrackParam) + track->fNPoints * sizeof(uint32_t); - } - } - - for (uint32_t is = 0; is < NSLICES; is++) { - for (uint32_t ip = 0; ip < NPATCHES; ip++) { - AliHLTTPCRawClusterData* clusters = clustersArray[is][ip]; - AliHLTTPCClusterXYZData* clustersTransformed = clustersTransformedArray[is][ip]; - int32_t firstRow = AliHLTTPCGeometry::GetFirstRow(ip); - - if (clusters == nullptr) { - HLTDebug("Clusters missing for slice %d patch %d\n", is, ip); - continue; - } - if (fCompressionStudy && (clustersTransformed == nullptr || clusters->fCount != clustersTransformed->fCount)) { - HLTError("Cluster cound not equal"); - continue; - } - - AliHLTTPCRawCluster* sortedClusters; - if (fSort) { - if (fCompressionStudy) { - HLTFatal("Cannot sort when compressionstudy is enabled"); - } - sortedClusters = new AliHLTTPCRawCluster[clusters->fCount]; - memcpy(sortedClusters, clusters->fClusters, sizeof(AliHLTTPCRawCluster) * clusters->fCount); - std::sort(sortedClusters, sortedClusters + clusters->fCount, AliHLTTPCClusterStat_sorthelper); - } - - for (uint32_t iCluster = 0; iCluster < clusters->fCount; iCluster++) { - AliHLTTPCRawCluster& cluster = clusters->fClusters[iCluster]; - AliHLTTPCClusterXYZ& clusterTransformed = clustersTransformed->fClusters[iCluster]; - static AliHLTTPCTrackHelperStruct tmp; - AliHLTTPCTrackHelperStruct& clusterTrack = fCompressionStudy ? clustersTrackIDArray[is][ip][iCluster] : tmp; - - if (fCompressionStudy) { - int32_t row = cluster.GetPadRow() + firstRow; - - float xyz[3]; - TransformForward(is, row, cluster.GetPad(), cluster.GetTime(), xyz); - - /*float xyzOrig[3], xyzLocGlob[3]; - { - int32_t sector = AliHLTTPCGeometry::GetNRowLow() ? is : is + NSLICES; - int32_t sectorrow = AliHLTTPCGeometry::GetNRowLow() ? row : row - AliHLTTPCGeometry::GetNRowLow(); - - Double_t xx[] = {(double) sectorrow, cluster.GetPad(), cluster.GetTime()}; - transform->Transform(xx, §or, 0, 1); - - Double_t yy[] = {(double) sectorrow, cluster.GetPad(), cluster.GetTime()}; - transform->Local2RotatedGlobal(sector, yy); - for (int32_t k = 0; k < 3; k++) - { - xyzOrig[k] = xx[k]; - xyzLocGlob[k] = yy[k]; - } - }*/ - - float padtime[2]; - TransformReverse(is, row, clusterTransformed.fY, clusterTransformed.fZ, padtime); - - nClusters++; - residualBacktransformPadabs += fabsf(cluster.GetPad() - padtime[0]); - residualBacktransformTimeabs += fabsf(cluster.GetTime() - padtime[1]); - residualBacktransformPad += cluster.GetPad() - padtime[0]; - residualBacktransformTime += cluster.GetTime() - padtime[1]; - } - - fTotal++; - if (cluster.GetFlagEdge()) { - fEdge++; - } - if (cluster.GetFlagSplitPad()) { - fSplitPad++; - } - if (cluster.GetFlagSplitTime()) { - fSplitTime++; - } - if (cluster.GetFlagSplitAny()) { - fSplitPadOrTime++; - } - if (cluster.GetFlagSplitPad() && cluster.GetFlagSplitTime()) { - fSplitPadTime++; - } - - AliHLTTPCRawCluster& cluster2 = fSort ? sortedClusters[iCluster] : cluster; - - if (fPrintClusters) { - HLTImportant("Event %d Slice %d, Patch %d, Row %d, Pad %.2f, Time %.2f, SPad %.2f, STime %.2f, QMax %d, QTot %d, SplitPad %d, SplitTime %d, Edge %d, TrackId %d, ResPad %.2f ResTime %.2f AvgQTot %d AvgQMax %d", fEvent, is, ip, (int32_t)cluster2.GetPadRow(), cluster2.GetPad(), - cluster2.GetTime(), cluster2.GetSigmaPad2(), cluster2.GetSigmaTime2(), (int32_t)cluster2.GetQMax(), (int32_t)cluster2.GetCharge(), (int32_t)cluster2.GetFlagSplitPad(), (int32_t)cluster2.GetFlagSplitTime(), (int32_t)cluster2.GetFlagEdge(), (int32_t)clusterTrack.fID, - clusterTrack.fResidualPad, clusterTrack.fResidualTime, (int32_t)clusterTrack.fAverageQTot, (int32_t)clusterTrack.fAverageQMax); - } - - if (fCompressionStudy && clusterTrack.fID == -1) { - PrintDumpClustersScaled(is, ip, cluster, clusterTransformed, clusterTrack); - } - } - if (fSort) { - delete[] sortedClusters; - } - } - } - - if (fDumpClusters || fPrintClustersScaled) { - const AliHLTUInt8_t* pCurrent = reinterpret_cast(tracks->fTracklets); - for (unsigned i = 0; i < tracks->fCount; i++) { - const AliHLTExternalTrackParam* track = reinterpret_cast(pCurrent); - for (int32_t ip = 0; ip < track->fNPoints; ip++) { - int32_t clusterID = track->fPointIDs[ip]; - int32_t slice = AliHLTTPCGeometry::CluID2Slice(clusterID); - int32_t patch = AliHLTTPCGeometry::CluID2Partition(clusterID); - int32_t index = AliHLTTPCGeometry::CluID2Index(clusterID); - - AliHLTTPCRawCluster& cluster = clustersArray[slice][patch]->fClusters[index]; - AliHLTTPCClusterXYZ& clusterTransformed = clustersTransformedArray[slice][patch]->fClusters[index]; - AliHLTTPCTrackHelperStruct& clusterTrack = clustersTrackIDArray[slice][patch][index]; - - if (clusterTrack.fID == i) { - PrintDumpClustersScaled(slice, patch, cluster, clusterTransformed, clusterTrack); - } - } - pCurrent += sizeof(AliHLTExternalTrackParam) + track->fNPoints * sizeof(uint32_t); - } - } - - for (uint32_t is = 0; is < NSLICES; is++) { - for (uint32_t ip = 0; ip < NPATCHES; ip++) { - if (clustersTrackIDArray[is][ip]) { - delete[] clustersTrackIDArray[is][ip]; - } - } - } - - int32_t total = fTotal == 0 ? 1 : fTotal; - fAssigned += nClusterTracks; - HLTImportant("Total %d Assigned %d (%2.0f\%) SplitPad %d (%2.0f\%) SplitTime %d (%2.0f\%) SplitPadTime %d (%2.0f\%) SplitPadOrTime %d (%2.0f\%) Edge %d (%2.0f\%)", fTotal, fAssigned, (float)fAssigned / (float)total * 100.f, fSplitPad, (float)fSplitPad / (float)total * 100.f, fSplitTime, - (float)fSplitTime / (float)total * 100.f, fSplitPadTime, (float)fSplitPadTime / (float)total * 100.f, fSplitPadOrTime, (float)fSplitPadOrTime / (float)total * 100.f, fEdge, (float)fEdge / (float)total * 100.f); - - if (nClusterTracks) { - residualBarrelTrackY /= nClusterTracks; - residualBarrelTrackZ /= nClusterTracks; - residualExternalTrackY /= nClusterTracks; - residualExternalTrackZ /= nClusterTracks; - residualBarrelTrackYabs /= nClusterTracks; - residualBarrelTrackZabs /= nClusterTracks; - residualExternalTrackYabs /= nClusterTracks; - residualExternalTrackZabs /= nClusterTracks; - residualFitTrackYabs /= nClusterTracks; - residualFitTrackZabs /= nClusterTracks; - residualFitTrackY /= nClusterTracks; - residualFitTrackZ /= nClusterTracks; - } - if (nClusterTracksRaw) { - residualTrackRawPadabs /= nClusterTracksRaw; - residualTrackRawTimeabs /= nClusterTracksRaw; - residualTrackRawPad /= nClusterTracksRaw; - residualTrackRawTime /= nClusterTracksRaw; - } - if (nClusters) { - residualBacktransformPad /= nClusters; - residualBacktransformTime /= nClusters; - residualBacktransformPadabs /= nClusters; - residualBacktransformTimeabs /= nClusters; - } - - if (fCompressionStudy) { - HLTImportant("Average Res: BarrelTr %f %f, ExtlTr %f %f, FitTr %f %f BackTr %f %f TrkRaw %f %f", residualBarrelTrackY, residualBarrelTrackZ, residualExternalTrackY, residualExternalTrackZ, residualFitTrackY, residualFitTrackZ, residualBacktransformPad, residualBacktransformTime, - residualTrackRawPad, residualTrackRawTime); - HLTImportant("Average Abs Res: BarrelTr %f %f, ExtTr %f %f, FitTr %f %f BackTr %f %f TrkRaw %f %f", residualBarrelTrackYabs, residualBarrelTrackZabs, residualExternalTrackYabs, residualExternalTrackZabs, residualFitTrackYabs, residualFitTrackZabs, residualBacktransformPadabs, - residualBacktransformTimeabs, residualTrackRawPadabs, residualTrackRawTimeabs); - } - - fEvent++; - - return iResult; -} - -void AliHLTTPCClusterStatComponent::PrintDumpClustersScaled(int32_t is, int32_t ip, AliHLTTPCRawCluster& cluster, AliHLTTPCClusterXYZ& clusterTransformed, AliHLTTPCClusterStatComponent::AliHLTTPCTrackHelperStruct& clusterTrack) -{ - AliHLTUInt64_t pad64 = 0; - if (!isnan(cluster.GetPad())) { - pad64 = (AliHLTUInt64_t)round(cluster.GetPad() * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kPad].fScale); - } - - AliHLTUInt64_t time64 = 0; - if (!isnan(cluster.GetTime())) { - time64 = (AliHLTUInt64_t)round(cluster.GetTime() * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kTime].fScale); - } - - AliHLTUInt64_t sigmaPad64 = 0; - if (!isnan(cluster.GetSigmaPad2())) { - sigmaPad64 = (AliHLTUInt64_t)round(cluster.GetSigmaPad2() * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaY2].fScale); - } - - AliHLTUInt64_t sigmaTime64 = 0; - if (!isnan(cluster.GetSigmaTime2())) { - sigmaTime64 = (AliHLTUInt64_t)round(cluster.GetSigmaTime2() * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaZ2].fScale); - } - - if (sigmaPad64 >= (unsigned)1 << AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaY2].fBitLength) { - sigmaPad64 = (1 << AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaY2].fBitLength) - 1; - } - if (sigmaTime64 >= (unsigned)1 << AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaZ2].fBitLength) { - sigmaTime64 = (1 << AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kSigmaZ2].fBitLength) - 1; - } - - AliHLTUInt64_t pad64res = 0; - pad64res = (AliHLTUInt64_t)round(clusterTrack.fResidualPad * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kPad].fScale); - - AliHLTUInt64_t time64res = 0; - time64res = (AliHLTUInt64_t)round(clusterTrack.fResidualTime * AliHLTTPCDefinitions::fgkClusterParameterDefinitions[AliHLTTPCDefinitions::kTime].fScale); - - if (fDumpClusters) { - int32_t dumpVals[16] = {fEvent, - (int32_t)is, - (int32_t)ip, - (int32_t)cluster.GetPadRow(), - (int32_t)pad64, - (int32_t)time64, - (int32_t)sigmaPad64, - (int32_t)sigmaTime64, - (int32_t)cluster.GetQMax(), - (int32_t)cluster.GetCharge(), - (int32_t)(cluster.GetFlagEdge() * 4 + cluster.GetFlagSplitPad() * 2 + cluster.GetFlagSplitTime()), - (int32_t)clusterTrack.fID, - (int32_t)pad64res, - (int32_t)time64res, - (int32_t)clusterTrack.fAverageQTot, - (int32_t)clusterTrack.fAverageQMax}; - fwrite(dumpVals, sizeof(int32_t), 16, fp); - } - - if (fPrintClustersScaled) { - HLTImportant("Event %d Slice %d, Patch %d, Row %d, Pad %d, Time %d, SPad %d, STime %d, QMax %d, QTot %d, SplitPad %d, SplitTime %d, Edge %d, TrackID %d, PadRes %d, TimeRes %d AvgTot %d AvgMax %d", fEvent, is, ip, (int32_t)cluster.GetPadRow(), (int32_t)pad64, (int32_t)time64, (int32_t)sigmaPad64, - (int32_t)sigmaTime64, (int32_t)cluster.GetQMax(), (int32_t)cluster.GetCharge(), (int32_t)cluster.GetFlagSplitPad(), (int32_t)cluster.GetFlagSplitTime(), (int32_t)cluster.GetFlagEdge(), (int32_t)clusterTrack.fID, (int32_t)pad64res, (int32_t)time64res, (int32_t)clusterTrack.fAverageQTot, - (int32_t)clusterTrack.fAverageQMax); - } -} diff --git a/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.h b/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.h deleted file mode 100644 index 2b58755217a61..0000000000000 --- a/GPU/GPUTracking/DataCompression/AliHLTTPCClusterStatComponent.h +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLTTPCClusterStatComponent.h -/// \author David Rohr - -#ifndef GPUTPCCLUSTERSTAT_H -#define GPUTPCCLUSTERSTAT_H - -#include "AliHLTProcessor.h" -#include "AliHLTComponentBenchmark.h" -#include "AliOptionParser.h" - -class AliHLTExternalTrackParam; -class AliHLTTPCRawCluster; -class AliHLTTPCClusterXYZ; -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -struct GPUParam; -} -} // namespace GPUCA_NAMESPACE - -class AliHLTTPCClusterStatComponent : public AliHLTProcessor, public AliOptionParser -{ - public: - /** standard constructor */ - AliHLTTPCClusterStatComponent(); - /** destructor */ - virtual ~AliHLTTPCClusterStatComponent(); - - static const uint32_t NSLICES = 36; - static const uint32_t NPATCHES = 6; - - struct AliHLTTPCTrackHelperStruct { - int32_t fID; - const AliHLTExternalTrackParam* fTrack; - float fResidualPad; - float fResidualTime; - bool fFirstHit; - int64_t fAverageQMax; - int64_t fAverageQTot; - }; - - // interface methods of base class - const char* GetComponentID() { return "TPCClusterStat"; }; - void GetInputDataTypes(AliHLTComponentDataTypeList& list); - AliHLTComponentDataType GetOutputDataType(); - void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - AliHLTComponent* Spawn() { return new AliHLTTPCClusterStatComponent; } - - static void TransformReverse(int32_t slice, int32_t row, float y, float z, float padtime[]); - static void TransformForward(int32_t slice, int32_t row, float pad, float time, float xyz[]); - - void PrintDumpClustersScaled(int32_t is, int32_t ip, AliHLTTPCRawCluster& cluster, AliHLTTPCClusterXYZ& clusterTransformed, AliHLTTPCTrackHelperStruct& clusterTrack); - - protected: - // interface methods of base class - int32_t DoInit(int argc, const char** argv); - int32_t DoDeinit(); - int32_t DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& trigData, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, AliHLTComponentBlockDataList& outputBlocks); - - using AliHLTProcessor::DoEvent; - int32_t ProcessOption(TString option, TString value); - - private: - /** copy constructor prohibited */ - AliHLTTPCClusterStatComponent(const AliHLTTPCClusterStatComponent&); - /** assignment operator prohibited */ - AliHLTTPCClusterStatComponent& operator=(const AliHLTTPCClusterStatComponent&); - - GPUCA_NAMESPACE::gpu::GPUParam* mSliceParam; - - int32_t fTotal, fEdge, fSplitPad, fSplitTime, fSplitPadTime, fSplitPadOrTime, fAssigned; //! - - int32_t fCompressionStudy; //! - int32_t fPrintClusters; //! - int32_t fPrintClustersScaled; //! - int32_t fDumpClusters; //! - int32_t fAggregate; //! - int32_t fSort; //! - int32_t fEvent; - - FILE* fp; - - protected: - ClassDef(AliHLTTPCClusterStatComponent, 0); -}; -#endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h index 8e80f3223d04c..fcdfcfc9cc49a 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h @@ -17,7 +17,7 @@ #include "GPUTPCGMMergerTypes.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -68,6 +68,6 @@ struct GPUTPCClusterRejection { } }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 33886f721c5db..e8f8de7658b28 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -20,7 +20,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; // Small helper to compute Huffman probabilities namespace diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h index a07857bbcd0e3..4728f97ef704a 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h @@ -24,7 +24,7 @@ namespace o2::tpc struct ClusterNativeAccess; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCClusterStatistics { @@ -89,6 +89,6 @@ class GPUTPCClusterStatistics size_t mNTotalClusters = 0; #endif }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index f37f817a51822..8f8137a6307b0 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -17,7 +17,7 @@ #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUTPCCompression::InitializeProcessor() {} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index ce525f175e616..3244a0ab2640a 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -34,7 +34,7 @@ struct CompressedClustersFlat { } // namespace o2::tpc #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCGMMerger; @@ -125,6 +125,6 @@ GPUdi() void GPUTPCCompression::truncateSignificantBits(T& v, uint32_t nBits, ui v = val; } } -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 27d7058bd8fc2..0f5936095fdc3 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -22,7 +22,7 @@ #include "GPUTPCClusterRejection.h" #include "GPUTPCCompressionKernels.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h index 2fc114324830e..b0bb8a6c12ecc 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h @@ -22,7 +22,7 @@ namespace o2::tpc struct ClusterNative; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCCompressionKernels : public GPUKernelTemplate { @@ -124,6 +124,6 @@ class GPUTPCCompressionGatherKernels : public GPUKernelTemplate GPUdii() static void gatherMulti(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx index f8fe18e915f28..2d8b69a4be516 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx @@ -16,7 +16,7 @@ #include "GPUConstantMem.h" #include "GPUParam.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; // ATTENTION! This track model is used for the data compression. // Changes to the propagation and fit will prevent the decompression of data @@ -889,7 +889,6 @@ GPUd() float GPUTPCCompressionTrackModel::approximateBetheBloch(float beta2) GPUd() void GPUTPCCompressionTrackModel::getClusterErrors2(int32_t iRow, float z, float sinPhi, float DzDs, float& ErrY2, float& ErrZ2) const { - // Only O2 geometry considered at the moment. Is AliRoot geometry support needed? int32_t rowType = iRow < 97 ? (iRow < 63 ? 0 : 1) : (iRow < 127 ? 2 : 3); if (rowType > 2) { rowType = 2; // TODO: Add type 3 diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h index 662975692dbc8..b48bce50ff5a6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h @@ -31,7 +31,7 @@ #else // Default internal track model for compression #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { // ATTENTION! This track model is used for the data compression. // Changes to the propagation and fit will prevent the decompression of data @@ -164,6 +164,6 @@ class GPUTPCCompressionTrackModel PhysicalTrackModel mTrk; #endif }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index 7c10f0eeef74f..16c6cb4b8d61c 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -19,7 +19,7 @@ #include "GPUMemorySizeScalers.h" #include "GPULogging.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUTPCDecompression::InitializeProcessor() {} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h index 47c64008b176e..c15564426f464 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h @@ -35,7 +35,7 @@ struct CompressedClustersFlat { } // namespace o2::tpc #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCDecompression : public GPUProcessor @@ -85,5 +85,5 @@ class GPUTPCDecompression : public GPUProcessor int16_t mResourceClusterNativeAccess = -1; int16_t mResourceNClusterPerSectorRow = -1; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUTPCDECOMPRESSION_H diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx index d7f1e2ac88368..2ba80bf4d3b21 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx @@ -19,7 +19,7 @@ #include "GPUCommonAlgorithm.h" #include "TPCClusterDecompressionCore.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h index b45af622ebac8..52ec4c0eef403 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h @@ -30,7 +30,7 @@ struct CompressedClusters { } // namespace o2::tpc #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCDecompressionKernels : public GPUKernelTemplate @@ -70,5 +70,5 @@ class GPUTPCDecompressionUtilKernels : public GPUKernelTemplate GPUdi() static bool isClusterKept(const o2::tpc::ClusterNative& cl, const GPUParam& GPUrestrict() param); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUTPCDECOMPRESSIONKERNELS_H diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc index 6ec5b6bfdfe6d..43ed260f461a4 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc @@ -27,7 +27,7 @@ using namespace o2::tpc; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class TPCClusterDecompressionCore @@ -185,6 +185,6 @@ class TPCClusterDecompressionCore } } }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index 4a38c2e0d57e5..22641774cd9ee 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -22,7 +22,7 @@ #include #include "TPCClusterDecompressionCore.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; int32_t TPCClusterDecompressor::decompress(const CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec) diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h index d8e404b8a2ab7..4a40b20e8d4f5 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h @@ -25,7 +25,7 @@ struct ClusterNativeAccess; struct ClusterNative; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUParam; @@ -36,6 +36,6 @@ class TPCClusterDecompressor static int32_t decompress(const o2::tpc::CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); static int32_t decompress(const o2::tpc::CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx index 002bb1ed9e9d7..0b3ee65ef7578 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.cxx @@ -20,7 +20,7 @@ #endif #include "CalibdEdxContainer.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h index 5781984b33222..33089ff301d06 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxContainer.h @@ -280,9 +280,7 @@ class CalibdEdxContainer : public o2::gpu::FlatObject CalDet processThresholdMap(const CalDet& thresholdMap, const float maxThreshold, const int32_t nPadsInRowCl = 2, const int32_t nPadsInPadCl = 2) const; #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(CalibdEdxContainer, 2); -#endif }; } // namespace o2::tpc diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx index 3b0e718026536..1714e61015121 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.cxx @@ -19,7 +19,7 @@ #endif #include "CalibdEdxTrackTopologySpline.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; #if !defined(GPUCA_STANDALONE) diff --git a/GPU/GPUTracking/DataTypes/GPUConfigDump.cxx b/GPU/GPUTracking/DataTypes/GPUConfigDump.cxx index 4a9bb3dc195cc..7ec2df3a2f186 100644 --- a/GPU/GPUTracking/DataTypes/GPUConfigDump.cxx +++ b/GPU/GPUTracking/DataTypes/GPUConfigDump.cxx @@ -22,7 +22,7 @@ #include "utils/qconfig_helpers.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; namespace { diff --git a/GPU/GPUTracking/DataTypes/GPUConfigDump.h b/GPU/GPUTracking/DataTypes/GPUConfigDump.h index 300499f6180b7..b0c0a6d73a109 100644 --- a/GPU/GPUTracking/DataTypes/GPUConfigDump.h +++ b/GPU/GPUTracking/DataTypes/GPUConfigDump.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUSettingsRec; struct GPUSettingsProcessing; @@ -32,6 +32,6 @@ class GPUConfigDump static void dumpConfig(const GPUSettingsRec* rec, const GPUSettingsProcessing* proc, const GPUSettingsQA* qa, const GPUSettingsDisplay* display, const GPUSettingsDeviceBackend* device, const GPURecoStepConfiguration* workflow); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.cxx b/GPU/GPUTracking/DataTypes/GPUDataTypes.cxx index 11680c3de118f..c544ac610cdfa 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.cxx +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.cxx @@ -15,7 +15,7 @@ #include "GPUDataTypes.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; constexpr const char* const GPUDataTypes::DEVICE_TYPE_NAMES[]; constexpr const char* const GPUDataTypes::RECO_STEP_NAMES[]; diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index d252bb39857c2..4c275d6de1bf1 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -91,7 +91,7 @@ class CalibdEdxContainer; } // namespace tpc } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -100,9 +100,9 @@ class TPCFastTransform; struct TPCPadGainCalib; struct TPCZSLinkMapping; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -326,6 +326,6 @@ struct GPUTrackingInOutPointers { #undef ENUM_CLASS #undef ENUM_UINT } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h index 01909f5b2ab80..0788b445416b4 100644 --- a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h @@ -32,7 +32,7 @@ #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "SimulationDataFormat/MCCompLabel.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -56,6 +56,6 @@ struct GPUTPCLinearLabels { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx index 0d9df9366d863..8b8fbc3ecae20 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx @@ -15,7 +15,7 @@ #include "GPUMemorySizeScalers.h" #include "GPULogging.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) { diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index 8b3d19295e989..2cec1775dd239 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -17,7 +17,7 @@ #include "GPUDef.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUMemorySizeScalers { @@ -84,6 +84,6 @@ struct GPUMemorySizeScalers { inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.cxx b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.cxx index f4061fa12873c..3fa2f909911f0 100644 --- a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.cxx +++ b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.cxx @@ -14,7 +14,7 @@ #include "GPUNewCalibValues.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUNewCalibValues::updateFrom(const GPUNewCalibValues* from) { diff --git a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h index 5d5a31785928c..e16fde9614911 100644 --- a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h +++ b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -34,6 +34,6 @@ struct GPUNewCalibValues { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h b/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h index 8e6fc4854d900..40222021126fd 100644 --- a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h +++ b/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h @@ -94,7 +94,7 @@ class ConstMCTruthContainer } // namespace dataformats } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -143,6 +143,6 @@ class GPUTPCClusterStatistics }; #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index 58eaf161f865f..cad554f355c8e 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -20,7 +20,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -84,6 +84,6 @@ struct GPUTrackingOutputs { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index d4cee937b727f..eff95e07fe09d 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -22,7 +22,7 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -80,7 +80,7 @@ struct GPUSettingsDeviceBackend { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #ifdef GPUCA_GPUCODE_DEVICE #define QCONFIG_GPU diff --git a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.cxx b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.cxx index 475b7888a150d..58a52589404d8 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.cxx @@ -15,7 +15,7 @@ #include "GPUTPCClusterOccupancyMap.h" #include "GPUParam.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUd() uint32_t GPUTPCClusterOccupancyMapBin::getNBins(const GPUParam& param) { diff --git a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h index ac76fd6e32a41..746fb1cf7d19f 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUParam; struct GPUTPCClusterOccupancyMapBin { @@ -28,6 +28,6 @@ struct GPUTPCClusterOccupancyMapBin { GPUd() static uint32_t getTotalSize(const GPUParam& param); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 771385d24e60a..2039638c81b9a 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -17,17 +17,13 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { struct GPUTPCGMMergedTrackHit { uint32_t num; uint8_t slice, row, leg, state; -#ifdef GPUCA_ALIROOT_LIB - float x, y, z; - uint16_t amp; -#endif // NOTE: the lower states must match those from ClusterNative! enum hitState { flagSplitPad = 0x1, @@ -53,6 +49,6 @@ struct GPUTPCGMMergedTrackHitXYZ { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx index 37d32ed4c1bc5..f26d7c5e0c45d 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.cxx @@ -13,9 +13,9 @@ /// \author Sergey Gorbunov, David Rohr #include "GPUTPCGMPolynomialField.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#if defined(GPUCA_ALIROOT_LIB) & !defined(GPUCA_GPUCODE) +#if !defined(GPUCA_GPUCODE) #include "GPUCommonConstants.h" #include diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 19b72c8a7be90..13940cecc86de 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -290,6 +290,6 @@ GPUdi() float GPUTPCGMPolynomialField::GetFieldItsBz(float x, float y, float z) } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 5d43667f6e92f..68a85e36c97bc 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -26,7 +26,7 @@ #endif #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -145,5 +145,5 @@ class GPUTPCGeometry // TODO: Make values constexpr } }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDDef.h b/GPU/GPUTracking/DataTypes/GPUTRDDef.h index 75eb1f813780e..4340e854cd67d 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDDef.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDDef.h @@ -17,16 +17,6 @@ #include "GPUCommonDef.h" -#ifdef GPUCA_ALIROOT_LIB -#define TRD_TRACK_TYPE_ALIROOT -#else -#define TRD_TRACK_TYPE_O2 -#endif - -#ifdef GPUCA_ALIROOT_LIB -class AliExternalTrackParam; -class AliTrackerBase; -#else namespace o2 { namespace track @@ -40,38 +30,19 @@ template class PropagatorImpl; } // namespace base } // namespace o2 -#endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { -#ifdef GPUCA_ALIROOT_LIB -typedef double My_Float; -#else -typedef float My_Float; -#endif - -#if defined(TRD_TRACK_TYPE_ALIROOT) -typedef AliExternalTrackParam TRDBaseTrack; -class GPUTPCGMTrackParam; -typedef GPUTPCGMTrackParam TRDBaseTrackGPU; -#elif defined(TRD_TRACK_TYPE_O2) typedef o2::track::TrackParametrizationWithError TRDBaseTrack; class GPUTPCGMTrackParam; typedef GPUTPCGMTrackParam TRDBaseTrackGPU; -#endif -#ifdef GPUCA_ALIROOT_LIB -typedef AliTrackerBase TRDBasePropagator; -class GPUTPCGMPropagator; -typedef GPUTPCGMPropagator TRDBasePropagatorGPU; -#else typedef o2::base::PropagatorImpl TRDBasePropagator; class GPUTPCGMPropagator; typedef GPUTPCGMPropagator TRDBasePropagatorGPU; -#endif template class trackInterface; @@ -92,6 +63,6 @@ typedef GPUTRDTracker_t GPUTRDTracker; typedef GPUTRDTracker_t GPUTRDTrackerGPU; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDDEF_H diff --git a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h index 86bf799e1fb17..6b37afbde12d7 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h @@ -17,7 +17,7 @@ // This is the interface for the GPUTRDTrack based on the O2 track type #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -29,7 +29,7 @@ namespace gputpcgmmergertypes struct GPUTPCOuterParam; } // namespace gputpcgmmergertypes } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/TrackTPCITS.h" @@ -38,7 +38,7 @@ struct GPUTPCOuterParam; #include "ReconstructionDataFormats/TrackLTIntegral.h" #include "CommonConstants/LHCConstants.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -95,6 +95,6 @@ class trackInterface : public o2::track::TrackParCov }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx index ab3bec3434c37..d9b7e2f3e829e 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx @@ -15,21 +15,18 @@ #include "GPUTRDTrack.h" #include "GPUTRDInterfaces.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "GPUTRDTrack.inc" #if !defined(GPUCA_GPUCODE) -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { -#ifdef GPUCA_ALIROOT_LIB // Instantiate AliRoot track version -template class GPUTRDTrack_t>; -#endif #if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_O2_LIB) // Instantiate O2 track version, for O2 this happens in GPUTRDTrackO2.cxx template class GPUTRDTrack_t>; #endif template class GPUTRDTrack_t>; // Always instatiate GM track version } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 21080499f6443..7327eca1bfa56 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -36,11 +36,11 @@ class GlobalTrackID; } // namespace o2 //_____________________________________________________________________________ -#if (!defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB)) || defined(GPUCA_HAVE_O2HEADERS) +#if (!defined(GPUCA_STANDALONE)) || defined(GPUCA_HAVE_O2HEADERS) #include "GPUTRDInterfaceO2Track.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -98,10 +98,6 @@ class GPUTRDTrack_t : public T GPUd() bool getIsFindable(int32_t iLayer) const { return (mFlags >> iLayer) & 0x1; } GPUd() int32_t getNmissingConsecLayers(int32_t iLayer) const; GPUd() int32_t getIsPenaltyAdded(int32_t iLayer) const { return getIsFindable(iLayer) && getTrackletIndex(iLayer) < 0; } - // for AliRoot compatibility. To be removed once HLT/global/AliHLTGlobalEsdConverterComponent.cxx does not require them anymore - GPUd() int32_t GetTPCtrackId() const { return mRefGlobalTrackId; } - GPUd() bool GetIsStopped() const { return getIsStopped(); } - GPUd() int32_t GetNtracklets() const { return getNtracklets(); } // setters GPUd() void setRefGlobalTrackIdRaw(uint32_t id) { mRefGlobalTrackId = id; } @@ -117,10 +113,6 @@ class GPUTRDTrack_t : public T GPUd() void setHasNeighbor() { mIsCrossingNeighbor |= (1U << 6); } GPUd() void setHasPadrowCrossing() { mIsCrossingNeighbor |= (1U << 7); } - // conversion to / from HLT track structure (only for AliRoot) - GPUd() void ConvertTo(GPUTRDTrackDataRecord& t) const; - GPUd() void ConvertFrom(const GPUTRDTrackDataRecord& t); - protected: float mChi2; // total chi2. float mSignal{-1.f}; // electron Likelihood for track @@ -132,12 +124,12 @@ class GPUTRDTrack_t : public T private: GPUd() void initialize(); -#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_STANDALONE) ClassDefNV(GPUTRDTrack_t, 4); #endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDTRACK_H diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc b/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc index 44617680580f2..48fd3fc42d22f 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc @@ -27,7 +27,7 @@ GPUd() GPUTRDTrack_t::GPUTRDTrack_t() template GPUd() void GPUTRDTrack_t::initialize() { - // set all members to their default values (needed since in-class initialization not possible with AliRoot) + // TODO: Use in-class initialization instead of setting all members to their default values mChi2 = 0.f; mSignal = -1.f; mRefGlobalTrackId = 0; @@ -39,58 +39,6 @@ GPUd() void GPUTRDTrack_t::initialize() } } -#ifdef GPUCA_ALIROOT_LIB -#include "AliHLTExternalTrackParam.h" -#include "GPUTRDTrackData.h" - -template -GPUd() GPUTRDTrack_t::GPUTRDTrack_t(const AliHLTExternalTrackParam& t) : T(t) -{ - initialize(); -} - -template -GPUd() void GPUTRDTrack_t::ConvertTo(GPUTRDTrackDataRecord& t) const -{ - //------------------------------------------------------------------ - // convert to GPU structure - //------------------------------------------------------------------ - t.mAlpha = T::getAlpha(); - t.fX = T::getX(); - t.fY = T::getY(); - t.fZ = T::getZ(); - t.fq1Pt = T::getQ2Pt(); - t.mSinPhi = T::getSnp(); - t.fTgl = T::getTgl(); - for (int32_t i = 0; i < 15; i++) { - t.fC[i] = T::getCov()[i]; - } - t.fTPCTrackID = getRefGlobalTrackIdRaw(); - for (int32_t i = 0; i < kNLayers; i++) { - t.fAttachedTracklets[i] = getTrackletIndex(i); - } -} - -template -GPUd() void GPUTRDTrack_t::ConvertFrom(const GPUTRDTrackDataRecord& t) -{ - //------------------------------------------------------------------ - // convert from GPU structure - //------------------------------------------------------------------ - T::set(t.fX, t.mAlpha, &(t.fY), t.fC); - setRefGlobalTrackIdRaw(t.fTPCTrackID); - mChi2 = 0.f; - mSignal = -1.f; - mFlags = 0; - mIsCrossingNeighbor = 0; - mCollisionId = -1; - for (int32_t iLayer = 0; iLayer < kNLayers; iLayer++) { - mAttachedTracklets[iLayer] = t.fAttachedTracklets[iLayer]; - } -} - -#endif - #if defined(GPUCA_HAVE_O2HEADERS) #include "ReconstructionDataFormats/TrackTPCITS.h" #include "DataFormatsTPC/TrackTPC.h" diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrackO2.cxx b/GPU/GPUTracking/DataTypes/GPUTRDTrackO2.cxx index d2404f9d3b74b..8364093aa5c4a 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrackO2.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrackO2.cxx @@ -13,7 +13,7 @@ /// \author David Rohr #include "GPUTRDTrackO2.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "GPUTRDTrack.inc" #include "ReconstructionDataFormats/GlobalTrackID.h" diff --git a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h index 76de1116999ff..7c610403ad62e 100644 --- a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h +++ b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h @@ -22,7 +22,7 @@ #include "DataFormatsTPC/ZeroSuppression.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -56,6 +56,6 @@ struct GPUTriggerOutputs { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h index c5635b7c5d488..b4af969de46b3 100644 --- a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h +++ b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h @@ -19,7 +19,7 @@ #include "DataFormatsTPC/dEdxInfo.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -30,6 +30,6 @@ struct GPUdEdxInfo { }; #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx index cb32cad984664..40ce8c0ccda81 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx @@ -17,7 +17,7 @@ #include "GPUTPCGeometry.h" #include "DataFormatsTPC/Constants.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCPadBitMap::TPCPadBitMap() { diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h index 591ca927e57c5..5a4beda82889e 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h @@ -24,7 +24,7 @@ template class CalDet; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct TPCPadBitMap { @@ -105,6 +105,6 @@ struct TPCPadBitMap { SectorBitMap mBitMap[GPUCA_NSLICES]; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx index 198f6713a44a1..41f0ad819d1b6 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx @@ -17,7 +17,7 @@ #include "GPUTPCGeometry.h" #include "DataFormatsTPC/Constants.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCPadGainCalib::TPCPadGainCalib() { diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h index e7908a4b87efc..35aa353a384e6 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h @@ -24,7 +24,7 @@ template class CalDet; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { template @@ -157,6 +157,6 @@ struct TPCPadGainCalib { SectorPadGainCorrection mGainCorrection[GPUCA_NSLICES]; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.cxx b/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.cxx index 73d8a06a43ff9..60f960d1b25f0 100644 --- a/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.cxx +++ b/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.cxx @@ -18,7 +18,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCZSLinkMapping::TPCZSLinkMapping(o2::tpc::Mapper& mapper) { diff --git a/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.h b/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.h index 8d2e9847c48a2..9f7862cef726d 100644 --- a/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.h +++ b/GPU/GPUTracking/DataTypes/TPCZSLinkMapping.h @@ -25,7 +25,7 @@ namespace o2::tpc class Mapper; } -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct TPCZSLinkMapping { @@ -38,6 +38,6 @@ struct TPCZSLinkMapping { #endif }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Debug/GPUROOTDump.h b/GPU/GPUTracking/Debug/GPUROOTDump.h index 5734d8b5b3338..407076c98b4be 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDump.h +++ b/GPU/GPUTracking/Debug/GPUROOTDump.h @@ -26,7 +26,7 @@ class TNtuple; #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -171,6 +171,6 @@ class GPUROOTDump }; #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx index a0b560f119373..846c85e8e1cb3 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx @@ -19,7 +19,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; std::weak_ptr GPUROOTDumpCore::sInstance; diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h index 73a76ddba1706..f3f7890ebfa5b 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h @@ -21,7 +21,7 @@ class TFile; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -62,6 +62,6 @@ class GPUROOTDumpCore #endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 1c8134f11efda..a5fc85eb80713 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -22,12 +22,12 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) && !defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE) - #error You are using the CA GPU tracking without defining the build type (O2/AliRoot/Standalone). If you are running an O2 ROOT macro, please include GPUO2Interface.h first! +#if !defined(GPUCA_STANDALONE) && !defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE) + #error You are using the CA GPU tracking without defining the build type (O2/Standalone). If you are running an O2 ROOT macro, please include GPUO2Interface.h first! #endif -#if (defined(GPUCA_ALIROOT_LIB) && defined(GPUCA_O2_LIB)) || (defined(GPUCA_ALIROOT_LIB) && defined(GPUCA_STANDALONE)) || (defined(GPUCA_O2_LIB) && defined(GPUCA_STANDALONE)) - #error Invalid Compile Definitions, need to build for either AliRoot or O2 or Standalone! +#if (defined(GPUCA_O2_LIB) && defined(GPUCA_STANDALONE)) + #error Invalid Compile Definitions, need to build for either O2 or Standalone! #endif #define GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(QPTB5) (CAMath::Abs(QPTB5) > 10 ? 10 : (CAMath::Abs(QPTB5) > 5 ? 15 : 29)) // Minimum hits should depend on Pt, low Pt tracks can have few hits. 29 Hits default, 15 for < 200 mev, 10 for < 100 mev diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index e33c9463a2f48..693512b15c3c2 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -44,9 +44,8 @@ fmt::fprintf(stderr, string "\n", ##__VA_ARGS__); \ throw std::exception(); \ } -#elif defined(GPUCA_STANDALONE) || defined(GPUCA_GPUCODE_DEVICE) || (defined(GPUCA_ALIROOT_LIB) && defined(GPUCA_GPUCODE)) +#elif defined(GPUCA_STANDALONE) || defined(GPUCA_GPUCODE_DEVICE) // For standalone / CUDA / HIP, we just use printf, which should be available - // Temporarily, we also have to handle CUDA on AliRoot with O2 defaults due to ROOT / CUDA incompatibilities #include #define GPUInfo(string, ...) \ { \ @@ -71,38 +70,6 @@ exit(1); \ } #endif -#elif defined(GPUCA_ALIROOT_LIB) - // Forward to HLT Logging functions for AliRoot - #include "AliHLTLogging.h" - #define GPUInfo(...) HLTInfo(__VA_ARGS__) - #define GPUImportant(...) HLTImportant(__VA_ARGS__) - #define GPUWarning(...) HLTWarning(__VA_ARGS__) - #define GPUAlarm(...) HLTWarning(__VA_ARGS__) - #define GPUError(...) HLTError(__VA_ARGS__) - #define GPUFatal(...) HLTFatal(__VA_ARGS__) - // Workaround for static functions / classes not deriving from AliHLTLogging - namespace AliGPU - { - namespace gpu - { - // We pollute the AliGPU::gpu namespace with some anonymous functions that catch the HLT...() magic - namespace - { - AliHLTLogging gAliGPULog; // This creates a couple of bogus instances, but there are plenty anyway - template - void LoggingVarargs(Args... args) - { - gAliGPULog.LoggingVarargs(args...); - } - template - bool CheckFilter(Args... args) - { - return gAliGPULog.CheckFilter(args...); - } - const char* Class_Name() { return "GPU"; }; - } // namespace - } // namespace gpu - } // namespace AliGPU #elif defined(GPUCA_O2_LIB) || defined(GPUCA_O2_INTERFACE) // Forward to O2 LOGF logginf for O2 #include "GPUCommonLogger.h" diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b7881bd61978c..76370c17f9f53 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -27,10 +27,10 @@ // clang-format off #ifdef QCONFIG_INSTANCE -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #endif #ifdef BeginNamespace // File should not be included without defining the macros, but rootcling will do for dictionary generation -BeginNamespace(GPUCA_NAMESPACE) +BeginNamespace(o2) BeginNamespace(gpu) // Settings concerning the reconstruction, stored as parameters in GPU constant memory @@ -136,10 +136,10 @@ AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fractio AddOptionRTC(dEdxTruncHigh, uint8_t, 77, "", 0, "High truncation threshold, fraction of 128") AddOptionRTC(globalTracking, int8_t, 1, "", 0, "Enable Global Tracking (prolong tracks to adjacent sectors to find short segments)") AddOptionRTC(disableRefitAttachment, uint8_t, 0, "", 0, "Bitmask to disable certain attachment steps during refit (1: attachment, 2: propagation, 4: loop following, 8: mirroring)") -AddOptionRTC(rejectionStrategy, uint8_t, GPUCA_NAMESPACE::gpu::GPUSettings::RejectionStrategyA, "", 0, "Enable rejection of TPC clusters for compression (0 = no, 1 = strategy A, 2 = strategy B)") +AddOptionRTC(rejectionStrategy, uint8_t, o2::gpu::GPUSettings::RejectionStrategyA, "", 0, "Enable rejection of TPC clusters for compression (0 = no, 1 = strategy A, 2 = strategy B)") AddOptionRTC(mergeLoopersAfterburner, uint8_t, 1, "", 0, "Run afterburner for additional looper merging") -AddOptionRTC(compressionTypeMask, uint8_t, GPUCA_NAMESPACE::gpu::GPUSettings::CompressionFull, "", 0, "TPC Compression mode bits (1=truncate charge/width LSB, 2=differences, 4=track-model)") -AddOptionRTC(compressionSortOrder, uint8_t, GPUCA_NAMESPACE::gpu::GPUSettings::SortTime, "", 0, "Sort order of TPC compression (0 = time, 1 = pad, 2 = Z-time-pad, 3 = Z-pad-time, 4 = no sorting (use incoming order))") +AddOptionRTC(compressionTypeMask, uint8_t, o2::gpu::GPUSettings::CompressionFull, "", 0, "TPC Compression mode bits (1=truncate charge/width LSB, 2=differences, 4=track-model)") +AddOptionRTC(compressionSortOrder, uint8_t, o2::gpu::GPUSettings::SortTime, "", 0, "Sort order of TPC compression (0 = time, 1 = pad, 2 = Z-time-pad, 3 = Z-pad-time, 4 = no sorting (use incoming order))") AddOptionRTC(sigBitsCharge, uint8_t, 4, "", 0, "Number of significant bits for TPC cluster charge in compression mode 1") AddOptionRTC(sigBitsWidth, uint8_t, 3, "", 0, "Number of significant bits for TPC cluster width in compression mode 1") AddOptionRTC(forceEarlyTransform, int8_t, -1, "", 0, "Force early TPC transformation also for continuous data (-1 = auto)") @@ -299,7 +299,7 @@ AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation co AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") -AddVariable(eventDisplay, GPUCA_NAMESPACE::gpu::GPUDisplayFrontendInterface*, nullptr) +AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) AddHelp("help", 'h') @@ -588,7 +588,7 @@ AddVariableRTC(debugLevel, int8_t, 0) // Debug level EndConfig() EndNamespace() // gpu -EndNamespace() // GPUCA_NAMESPACE +EndNamespace() // o2 #endif // #ifdef BeginNamespace // clang-format on diff --git a/GPU/GPUTracking/Definitions/clusterFinderDefs.h b/GPU/GPUTracking/Definitions/clusterFinderDefs.h index a681a176f9b5c..b36a94fc2bd54 100644 --- a/GPU/GPUTracking/Definitions/clusterFinderDefs.h +++ b/GPU/GPUTracking/Definitions/clusterFinderDefs.h @@ -54,7 +54,7 @@ #define CPU_PTR(x) x #endif -namespace GPUCA_NAMESPACE::gpu::tpccf +namespace o2::gpu::tpccf { using SizeT = size_t; @@ -72,6 +72,6 @@ using Delta2 = short2; using local_id = short2; -} // namespace GPUCA_NAMESPACE::gpu::tpccf +} // namespace o2::gpu::tpccf #endif diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h b/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h deleted file mode 100644 index 8974b4bd89460..0000000000000 --- a/GPU/GPUTracking/GPUTrackingLinkDef_AliRoot.h +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTrackingLinkDef_AliRoot.h -/// \author David Rohr - -#if defined(__CLING__) - -#pragma link off all globals; -#pragma link off all classes; -#pragma link off all functions; - -#pragma link C++ class AliGPU::gpu::GPUTPCTrack + ; -#pragma link C++ class AliGPU::gpu::GPUTPCTracklet + ; -#pragma link C++ class AliGPU::gpu::GPUTPCBaseTrackParam + ; -#pragma link C++ class AliGPU::gpu::GPUTPCTrackParam + ; -#pragma link C++ class AliGPU::gpu::GPUTPCRow + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGrid + ; -#pragma link C++ class GPUTPCTrackerComponent + ; -#pragma link C++ class AliGPU::gpu::GPUTPCNeighboursFinder + ; -#pragma link C++ class AliGPU::gpu::GPUTPCNeighboursCleaner + ; -#pragma link C++ class AliGPU::gpu::GPUTPCStartHitsFinder + ; -#pragma link C++ class AliGPU::gpu::GPUTPCTrackletConstructor + ; -#pragma link C++ class AliGPU::gpu::GPUTPCTrackletSelector + ; -#pragma link C++ class GPUTPCGlobalMergerComponent + ; -#pragma link C++ class AliGPU::gpu::GPUTPCSliceOutput + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGMTrackParam + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGMSliceTrack + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGMPolynomialField + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGMPropagator + ; -#pragma link C++ class AliGPU::gpu::GPUTPCGMPhysicalTrackModel + ; -#pragma link C++ class GPUTPCGMPolynomialFieldManager + ; -#pragma link C++ class AliHLTTPCClusterStatComponent + ; - -//#pragma link C++ class AliGPU::gpu::GPUTRDTrack+; //Templated, should add linkdef for specialization, but with an ifdef for ROOT >= 6 only -//#pragma link C++ class AliGPU::gpu::GPUTRDTracker+; -#pragma link C++ class GPUTRDTrackerComponent + ; -//#pragma link C++ class AliGPU::gpu::GPUTRDTrackletWord+; -#pragma link C++ class GPUTRDTrackletReaderComponent + ; - -#endif diff --git a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.cxx b/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.cxx deleted file mode 100644 index e9b4bb758532b..0000000000000 --- a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.cxx +++ /dev/null @@ -1,492 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLTGPUDumpComponent.cxx -/// \author David Rohr - -#include "AliHLTGPUDumpComponent.h" - -#include "AliGeomManager.h" -#include "GPUReconstruction.h" -#include "GPUChainTracking.h" -#include "AliHLTTPCDefinitions.h" -#include "GPUTPCMCInfo.h" -#include "GPUTPCGMMergedTrackHit.h" -#include "AliHLTTPCClusterXYZ.h" -#include "AliHLTTPCClusterMCData.h" -#include "GPUTPCClusterData.h" -#include "AliHLTTPCRawCluster.h" -#include "AliHLTTPCGeometry.h" -#include "AliRunLoader.h" -#include "AliHeader.h" -#include "AliStack.h" -#include "AliExternalTrackParam.h" -#include "AliTrackReference.h" -#include "AliHLTTRDDefinitions.h" -#include "GPUTRDTrackletWord.h" -#include "GPUTRDTrackletLabels.h" -#include "TPCFastTransform.h" -#include "CorrectionMapsHelper.h" -#include "TPCFastTransformManager.h" -#include "AliRecoParam.h" -#include "AliTPCTransform.h" -#include "AliTPCcalibDB.h" -#include "AliCDBManager.h" -#include "AliGRPObject.h" -#include "AliCDBEntry.h" -#include "AliRunInfo.h" -#include "AliEventInfo.h" -#include "AliRawEventHeaderBase.h" -#include "AliTPCRecoParam.h" -#include -#include - -#include "TTree.h" -#include "TParticle.h" -#include "TParticlePDG.h" -#include "TPDGCode.h" - -using namespace GPUCA_NAMESPACE::gpu; - -AliHLTGPUDumpComponent::AliHLTGPUDumpComponent() : fSolenoidBz(0.f), fRec(nullptr), fChain(nullptr), fFastTransformManager(new TPCFastTransformManager), fCalib(nullptr), fRecParam(nullptr), fOfflineRecoParam(), fOrigTransform(nullptr), fIsMC(false), fInitTimestamp(0.) -{ - fRec = GPUReconstruction::CreateInstance(); - fChain = fRec->AddChain(); -} - -AliHLTGPUDumpComponent::~AliHLTGPUDumpComponent() -{ - delete fRec; - delete fFastTransformManager; -} - -const char* AliHLTGPUDumpComponent::GetComponentID() { return "GPUDump"; } - -void AliHLTGPUDumpComponent::GetInputDataTypes(vector& list) -{ - list.clear(); - list.push_back(AliHLTTPCDefinitions::RawClustersDataType()); - list.push_back(AliHLTTPCDefinitions::ClustersXYZDataType()); - list.push_back(AliHLTTPCDefinitions::AliHLTDataTypeClusterMCInfo()); - list.push_back(AliHLTTRDDefinitions::fgkTRDTrackletDataType); - list.push_back(AliHLTTRDDefinitions::fgkTRDMCTrackletDataType); -} - -AliHLTComponentDataType AliHLTGPUDumpComponent::GetOutputDataType() { return AliHLTTPCDefinitions::RawClustersDataType(); } - -void AliHLTGPUDumpComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - constBase = 10000; // minimum size - inputMultiplier = 0.6; // size relative to input -} - -AliHLTComponent* AliHLTGPUDumpComponent::Spawn() { return new AliHLTGPUDumpComponent; } - -int32_t AliHLTGPUDumpComponent::DoInit(int argc, const char** argv) -{ - fSolenoidBz = GetBz(); - fIsMC = TVirtualMC::GetMC(); - - if (!AliGeomManager::GetGeometry()) { - AliGeomManager::LoadGeometry(); - } - if (!AliGeomManager::GetGeometry()) { - HLTFatal("Can not initialise geometry"); - } - - fCalib = AliTPCcalibDB::Instance(); - if (!fCalib) { - HLTFatal("Calibration not found"); - } - fCalib->SetRun(GetRunNo()); - fCalib->UpdateRunInformations(GetRunNo()); - - const AliMagF* field = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - fCalib->SetExBField(field); - - if (!fCalib->GetTransform()) { - HLTFatal("No TPC transformation found"); - } - - AliGRPObject* pGRP = 0; - AliCDBEntry* entry = AliCDBManager::Instance()->Get("GRP/GRP/Data"); - if (!entry) { - HLTFatal("No GRP object found in data base"); - } - pGRP = dynamic_cast(entry->GetObject()); - - if (!pGRP) { - HLTFatal("Unknown format of the GRP object in data base"); - } - - AliRunInfo runInfo(pGRP->GetLHCState(), pGRP->GetBeamType(), pGRP->GetBeamEnergy(), pGRP->GetRunType(), pGRP->GetDetectorMask()); - AliEventInfo evInfo; - evInfo.SetEventType(AliRawEventHeaderBase::kPhysicsEvent); - - entry = AliCDBManager::Instance()->Get("TPC/Calib/RecoParam"); - if (!entry) { - HLTFatal("No TPC reco param entry found in data base"); - } - TObject* recoParamObj = entry->GetObject(); - if (dynamic_cast(recoParamObj)) { - TObjArray* copy = (TObjArray*)(static_cast(recoParamObj)->Clone()); - fOfflineRecoParam.AddDetRecoParamArray(1, copy); - } else if (dynamic_cast(recoParamObj)) { - AliDetectorRecoParam* copy = (AliDetectorRecoParam*)static_cast(recoParamObj)->Clone(); - fOfflineRecoParam.AddDetRecoParam(1, copy); - } else { - HLTFatal("Unknown format of the TPC Reco Param entry in the data base"); - } - - fOfflineRecoParam.SetEventSpecie(&runInfo, evInfo, 0); - fRecParam = const_cast(reinterpret_cast(fOfflineRecoParam.GetDetRecoParam(1))); - if (!fRecParam) { - HLTFatal("No TPC Reco Param entry found for the given event specification"); - } - fCalib->GetTransform()->SetCurrentRecoParam(fRecParam); - fInitTimestamp = GetTimeStamp(); - - return 0; -} - -int32_t AliHLTGPUDumpComponent::DoDeinit() { return 0; } - -int32_t AliHLTGPUDumpComponent::Reconfigure(const char* cdbEntry, const char* chainId) { return 0; } - -int32_t AliHLTGPUDumpComponent::DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& /*trigData*/, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, vector& outputBlocks) -{ - if (GetFirstInputBlock(kAliHLTDataTypeSOR) || GetFirstInputBlock(kAliHLTDataTypeEOR)) { - return 0; - } - - if (evtData.fBlockCnt <= 0) { - HLTWarning("no blocks in event"); - return 0; - } - - // Prepare everything for all slices - const AliHLTTPCClusterMCData* clusterLabels[NSLICES][NPATCHES] = {nullptr}; - const AliHLTTPCClusterXYZData* clustersXYZ[NSLICES][NPATCHES] = {nullptr}; - const AliHLTTPCRawClusterData* clustersRaw[NSLICES][NPATCHES] = {nullptr}; - bool labelsPresent = false; - const GPUTRDTrackletWord* TRDtracklets = nullptr; - int32_t nTRDTrackletsTotal = 0; - - for (uint64_t ndx = 0; ndx < evtData.fBlockCnt; ndx++) { - const AliHLTComponentBlockData& pBlock = blocks[ndx]; - int32_t slice = AliHLTTPCDefinitions::GetMinSliceNr(pBlock); - int32_t patch = AliHLTTPCDefinitions::GetMinPatchNr(pBlock); - if (pBlock.fDataType == AliHLTTPCDefinitions::RawClustersDataType()) { - clustersRaw[slice][patch] = (const AliHLTTPCRawClusterData*)pBlock.fPtr; - } else if (pBlock.fDataType == AliHLTTPCDefinitions::ClustersXYZDataType()) { - clustersXYZ[slice][patch] = (const AliHLTTPCClusterXYZData*)pBlock.fPtr; - } else if (pBlock.fDataType == AliHLTTPCDefinitions::AliHLTDataTypeClusterMCInfo()) { - clusterLabels[slice][patch] = (const AliHLTTPCClusterMCData*)pBlock.fPtr; - labelsPresent = true; - } else if (pBlock.fDataType == AliHLTTRDDefinitions::fgkTRDTrackletDataType) { - TRDtracklets = reinterpret_cast(pBlock.fPtr); - nTRDTrackletsTotal = pBlock.fSize / sizeof(GPUTRDTrackletWord); - } - } - - std::vector rawClusters[NSLICES]; - std::vector clusterData[NSLICES]; - - int32_t nClustersTotal = 0; - for (int32_t slice = 0; slice < NSLICES; slice++) { - int32_t nClustersSliceTotal = 0; - clusterData[slice].clear(); - rawClusters[slice].clear(); - for (int32_t patch = 0; patch < 6; patch++) { - if (clustersXYZ[slice][patch]) { - nClustersSliceTotal += clustersXYZ[slice][patch]->fCount; - } - } - GPUTPCClusterData cluster; - for (int32_t patch = 0; patch < 6; patch++) { - if (clustersXYZ[slice][patch] != nullptr && clustersRaw[slice][patch] != nullptr) { - const AliHLTTPCClusterXYZData& clXYZ = *clustersXYZ[slice][patch]; - const AliHLTTPCRawClusterData& clRaw = *clustersRaw[slice][patch]; - - if (clXYZ.fCount != clRaw.fCount) { - HLTError("Number of entries in raw and xyz clusters are not mached %d vs %d", clXYZ.fCount, clRaw.fCount); - continue; - } - - const int32_t firstRow = AliHLTTPCGeometry::GetFirstRow(patch); - for (int32_t ic = 0; ic < clXYZ.fCount; ic++) { - const AliHLTTPCClusterXYZ& c = clXYZ.fClusters[ic]; - const AliHLTTPCRawCluster& cRaw = clRaw.fClusters[ic]; - if (fabsf(c.GetZ()) > 300) { - continue; - } - if (c.GetX() < 1.f) { - continue; // cluster xyz position was not calculated for whatever reason - } - cluster.id = AliHLTTPCGeometry::CreateClusterID(slice, patch, ic); - cluster.x = c.GetX(); - cluster.y = c.GetY(); - cluster.z = c.GetZ(); - cluster.row = firstRow + cRaw.GetPadRow(); - cluster.flags = cRaw.GetFlags(); - if (cRaw.GetSigmaPad2() < kAlmost0 || cRaw.GetSigmaTime2() < kAlmost0) { - cluster.flags |= GPUTPCGMMergedTrackHit::flagSingle; - } - cluster.amp = cRaw.GetCharge(); -#ifdef GPUCA_FULL_CLUSTERDATA - cluster.pad = cRaw.GetPad(); - cluster.time = cRaw.GetTime(); - cluster.ampMax = cRaw.GetQMax(); - cluster.sigmaPad2 = cRaw.GetSigmaPad2(); - cluster.sigmaTime2 = cRaw.GetSigmaTime2(); -#endif - AliHLTTPCRawCluster tmp = cRaw; - tmp.fPadRow += firstRow; - if ((uint32_t)cluster.amp >= 25 * 1024) { - GPUError("Invalid cluster charge, truncating (%d >= %d)", (int32_t)cluster.amp, 25 * 1024); - cluster.amp = 25 * 1024 - 1; - } - if ((uint32_t)tmp.GetCharge() >= 25 * 1024) { - GPUError("Invalid raw cluster charge, truncating (%d >= %d)", (int32_t)tmp.GetCharge(), 25 * 1024); - tmp.SetCharge(25 * 1024 - 1); - } - if ((uint32_t)tmp.GetQMax() >= 1024) { - GPUError("Invalid raw cluster charge max, truncating (%d >= %d)", (int32_t)tmp.GetQMax(), 1024); - tmp.SetQMax(1024 - 1); - } - clusterData[slice].emplace_back(cluster); - rawClusters[slice].emplace_back(tmp); - - nClustersTotal++; - } - } - } - HLTDebug("Read %d->%d hits for slice %d", nClustersSliceTotal, (int32_t)clusterData[slice].size(), slice); - } - - if (nClustersTotal < 100) { - return (0); - } - fChain->ClearIOPointers(); - - for (int32_t i = 0; i < NSLICES; i++) { - fChain->mIOPtrs.nClusterData[i] = clusterData[i].size(); - fChain->mIOPtrs.clusterData[i] = clusterData[i].data(); - fChain->mIOPtrs.nRawClusters[i] = rawClusters[i].size(); - fChain->mIOPtrs.rawClusters[i] = rawClusters[i].data(); - HLTDebug("Slice %d - Clusters %d", i, (int32_t)clusterData[i].size()); - } - - std::vector labels; - std::vector mcInfo; - - if (labelsPresent) { - // Write cluster labels - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - GPUTPCClusterData* pCluster = clusterData[iSlice].data(); - for (uint32_t iPatch = 0; iPatch < NPATCHES; iPatch++) { - if (clusterLabels[iSlice][iPatch] == nullptr || clustersXYZ[iSlice][iPatch] == nullptr || clusterLabels[iSlice][iPatch]->fCount != clustersXYZ[iSlice][iPatch]->fCount) { - continue; - } - const AliHLTTPCClusterXYZData& clXYZ = *clustersXYZ[iSlice][iPatch]; - for (int32_t ic = 0; ic < clXYZ.fCount; ic++) { - if (pCluster->id != AliHLTTPCGeometry::CreateClusterID(iSlice, iPatch, ic)) { - continue; - } - pCluster->id = labels.size(); - labels.push_back(clusterLabels[iSlice][iPatch]->fLabels[ic]); - pCluster++; - } - } - } - - if (labels.size() != nClustersTotal) { - HLTFatal("Error getting cluster MC labels (%d labels, %d clusters)", (int32_t)labels.size(), nClustersTotal); - return (-EINVAL); - } - - fChain->mIOPtrs.nMCLabelsTPC = labels.size(); - fChain->mIOPtrs.mcLabelsTPC = labels.data(); - HLTDebug("Number of mc labels %d", (int32_t)labels.size()); - - // Write MC tracks - bool OK = false; - do { - AliRunLoader* rl = AliRunLoader::Instance(); - if (rl == nullptr) { - HLTFatal("error: RL"); - break; - } - - rl->LoadKinematics(); - rl->LoadTrackRefs(); - - int32_t nTracks = rl->GetHeader()->GetNtrack(); - mcInfo.resize(nTracks); - - AliStack* stack = rl->Stack(); - if (stack == nullptr) { - HLTFatal("error: stack"); - break; - } - TTree* TR = rl->TreeTR(); - if (TR == nullptr) { - HLTFatal("error: TR"); - break; - } - TBranch* branch = TR->GetBranch("TrackReferences"); - if (branch == nullptr) { - HLTFatal("error: branch"); - break; - } - - int32_t nPrimaries = stack->GetNprimary(); - - std::vector trackRefs(nTracks, nullptr); - TClonesArray* tpcRefs = nullptr; - branch->SetAddress(&tpcRefs); - int32_t nr = TR->GetEntries(); - for (int32_t r = 0; r < nr; r++) { - TR->GetEvent(r); - for (int32_t i = 0; i < tpcRefs->GetEntriesFast(); i++) { - AliTrackReference* tpcRef = (AliTrackReference*)tpcRefs->UncheckedAt(i); - if (tpcRef->DetectorId() != AliTrackReference::kTPC) { - continue; - } - if (tpcRef->Label() < 0 || tpcRef->Label() >= nTracks) { - HLTFatal("Invalid reference %d / %d", tpcRef->Label(), nTracks); - continue; - } - if (trackRefs[tpcRef->Label()] != nullptr) { - continue; - } - trackRefs[tpcRef->Label()] = new AliTrackReference(*tpcRef); - } - } - - memset(mcInfo.data(), 0, nTracks * sizeof(mcInfo[0])); - - for (int32_t i = 0; i < nTracks; i++) { - mcInfo[i].pid = -100; - TParticle* particle = (TParticle*)stack->Particle(i); - if (particle == nullptr) { - continue; - } - if (particle->GetPDG() == nullptr) { - continue; - } - - int32_t charge = (int32_t)particle->GetPDG()->Charge(); - int32_t prim = stack->IsPhysicalPrimary(i); - int32_t hasPrimDaughter = particle->GetFirstDaughter() != -1 && particle->GetFirstDaughter() < nPrimaries; - - mcInfo[i].charge = charge; - mcInfo[i].prim = prim; - mcInfo[i].primDaughters = hasPrimDaughter; - mcInfo[i].genRadius = sqrt(particle->Vx() * particle->Vx() + particle->Vy() * particle->Vy() + particle->Vz() * particle->Vz()); - - Int_t pid = -1; - if (TMath::Abs(particle->GetPdgCode()) == kElectron) { - pid = 0; - } - if (TMath::Abs(particle->GetPdgCode()) == kMuonMinus) { - pid = 1; - } - if (TMath::Abs(particle->GetPdgCode()) == kPiPlus) { - pid = 2; - } - if (TMath::Abs(particle->GetPdgCode()) == kKPlus) { - pid = 3; - } - if (TMath::Abs(particle->GetPdgCode()) == kProton) { - pid = 4; - } - mcInfo[i].pid = pid; - - AliTrackReference* ref = trackRefs[i]; - if (ref) { - mcInfo[i].x = ref->X(); - mcInfo[i].y = ref->Y(); - mcInfo[i].z = ref->Z(); - mcInfo[i].pX = ref->Px(); - mcInfo[i].pY = ref->Py(); - mcInfo[i].pZ = ref->Pz(); - } - - // if (ref) HLTImportant("Particle %d: Charge %d, Prim %d, PrimDaughter %d, Pt %f %f ref %p\n", i, charge, prim, hasPrimDaughter, ref->Pt(), particle->Pt(), ref); - } - for (int32_t i = 0; i < nTracks; i++) { - delete trackRefs[i]; - } - - OK = true; - } while (false); - - if (!OK) { - HLTFatal("Error accessing MC data"); - return (-EINVAL); - } - - fChain->mIOPtrs.nMCInfosTPC = mcInfo.size(); - fChain->mIOPtrs.mcInfosTPC = mcInfo.data(); - static const GPUTPCMCInfoCol mcColInfo = {0, (uint32_t)mcInfo.size()}; - fChain->mIOPtrs.mcInfosTPCCol = &mcColInfo; - fChain->mIOPtrs.nMCInfosTPCCol = 1; - HLTDebug("Number of MC infos: %d", (int32_t)mcInfo.size()); - } - uint32_t clusterNum = 0; - for (uint32_t slice = 0; slice < NSLICES; slice++) { - for (int32_t k = 0; k < fChain->mIOPtrs.nClusterData[slice]; k++) { - clusterData[slice][k].id = clusterNum++; - } - } - - fChain->mIOPtrs.nTRDTracklets = nTRDTrackletsTotal; - std::vector tracklets(nTRDTrackletsTotal); - for (int32_t i = 0; i < nTRDTrackletsTotal; i++) { - tracklets[i] = TRDtracklets[i]; - } - std::sort(tracklets.data(), tracklets.data() + nTRDTrackletsTotal); - fChain->mIOPtrs.trdTracklets = tracklets.data(); - - fChain->mIOPtrs.nTRDTriggerRecords = 1; - static float t = 0.f; - static int32_t o = 0; - fChain->mIOPtrs.trdTriggerTimes = &t; - fChain->mIOPtrs.trdTrackletIdxFirst = &o; - - HLTDebug("Number of TRD tracklets: %d", (int32_t)nTRDTrackletsTotal); - - static int32_t nEvent = 0; - char filename[256]; - std::ofstream out; - - if (nEvent == 0) { - std::unique_ptr fFastTransformIRS(new TPCFastTransform); - int64_t TimeStamp = (getenv("DUMP_TIMESTAMP_SOR") && atoi(getenv("DUMP_TIMESTAMP_SOR"))) ? fInitTimestamp : GetTimeStamp(); - if (fIsMC && !fRecParam->GetUseCorrectionMap()) { - TimeStamp = 0; - } - if (fFastTransformManager->create(*fFastTransformIRS, fCalib->GetTransform(), TimeStamp)) { - HLTFatal("Initialisation of Fast Transformation failed with error %s", fFastTransformManager->getLastError()); - } - std::unique_ptr tmpHelper; - tmpHelper->setCorrMap(fFastTransformIRS.get()); - fChain->SetTPCFastTransform(std::move(fFastTransformIRS), std::move(tmpHelper)); - - fRec->SetSettings(fSolenoidBz); - fRec->DumpSettings(); - } - - snprintf(filename, 256, GPUCA_EVDUMP_FILE ".%d.dump", nEvent++); - fChain->DumpData(filename); - return (0); -} diff --git a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h b/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h deleted file mode 100644 index a4977e6859968..0000000000000 --- a/GPU/GPUTracking/Global/AliHLTGPUDumpComponent.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLTGPUDumpComponent.h -/// \author David Rohr - -#ifndef ALIHLTGPUDUMPCOMPONENT_H -#define ALIHLTGPUDUMPCOMPONENT_H - -#include "GPUCommonDef.h" -#include "AliHLTProcessor.h" - -class AliTPCcalibDB; -class AliTPCRecoParam; -#include "AliRecoParam.h" -class AliTPCTransform; -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -class TPCFastTransform; -class TPCFastTransformManager; -class GPUReconstruction; -class GPUChainTracking; -class GPUTPCClusterData; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -class AliHLTGPUDumpComponent : public AliHLTProcessor -{ - public: - static const uint32_t NSLICES = 36; - static const uint32_t NPATCHES = 6; - - AliHLTGPUDumpComponent(); - - AliHLTGPUDumpComponent(const AliHLTGPUDumpComponent&) = delete; - AliHLTGPUDumpComponent& operator=(const AliHLTGPUDumpComponent&) = delete; - - virtual ~AliHLTGPUDumpComponent(); - - const char* GetComponentID(); - void GetInputDataTypes(vector& list); - AliHLTComponentDataType GetOutputDataType(); - virtual void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - AliHLTComponent* Spawn(); - - protected: - int32_t DoInit(int argc, const char** argv); - int32_t DoDeinit(); - int32_t Reconfigure(const char* cdbEntry, const char* chainId); - int32_t DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& trigData, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, vector& outputBlocks); - - private: - float fSolenoidBz; - GPUCA_NAMESPACE::gpu::GPUReconstruction* fRec; - GPUCA_NAMESPACE::gpu::GPUChainTracking* fChain; - GPUCA_NAMESPACE::gpu::TPCFastTransformManager* fFastTransformManager; - AliTPCcalibDB* fCalib; - AliTPCRecoParam* fRecParam; - AliRecoParam fOfflineRecoParam; - AliTPCTransform* fOrigTransform; - bool fIsMC; - int64_t fInitTimestamp; -}; - -#endif diff --git a/GPU/GPUTracking/Global/GPUChain.cxx b/GPU/GPUTracking/Global/GPUChain.cxx index 6f6bf33f3c001..6990d5e08b638 100644 --- a/GPU/GPUTracking/Global/GPUChain.cxx +++ b/GPU/GPUTracking/Global/GPUChain.cxx @@ -13,7 +13,7 @@ /// \author David Rohr #include "GPUChain.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; constexpr GPUChain::krnlRunRange GPUChain::krnlRunRangeNone; constexpr GPUChain::krnlEvent GPUChain::krnlEventNone; diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 9c67a05eec443..06650f9d9c733 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -18,7 +18,7 @@ #include "GPUReconstructionCPU.h" #include "GPUReconstructionHelpers.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -308,6 +308,6 @@ int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx index 89e56bf2d4c22..640b92a0eb0f4 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.cxx +++ b/GPU/GPUTracking/Global/GPUChainITS.cxx @@ -18,7 +18,7 @@ #include "GPUReconstructionIncludesITS.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; namespace o2::its { diff --git a/GPU/GPUTracking/Global/GPUChainITS.h b/GPU/GPUTracking/Global/GPUChainITS.h index 7ef77da54285e..37cd5acc5264c 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.h +++ b/GPU/GPUTracking/Global/GPUChainITS.h @@ -27,7 +27,7 @@ class TrackITSExt; class GPUFrameworkExternalAllocator; } // namespace o2::its -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUChainITS : public GPUChain { @@ -56,6 +56,6 @@ class GPUChainITS : public GPUChain uint32_t mMaxTracks; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index ff476716febe8..161dbcd8689d9 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -59,7 +59,7 @@ #include "utils/linux_helpers.h" #include "utils/strtag.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "GPUO2DataTypes.h" @@ -204,12 +204,10 @@ bool GPUChainTracking::ValidateSteps() return false; } bool tpcClustersAvail = (GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCClusters) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCClusterFinding) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression); -#ifndef GPUCA_ALIROOT_LIB if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !tpcClustersAvail) { GPUError("Invalid Inputs for track merging, TPC Clusters required"); return false; } -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 if (GetRecoSteps() & GPUDataTypes::RecoStep::TPCClusterFinding) { GPUError("Can not run TPC GPU Cluster Finding with Run 2 Data"); @@ -383,9 +381,7 @@ int32_t GPUChainTracking::Init() } } if (GetProcessingSettings().eventDisplay) { -#ifndef GPUCA_ALIROOT_LIB mEventDisplay.reset(GPUDisplayInterface::getDisplay(GetProcessingSettings().eventDisplay, this, GetQA())); -#endif if (mEventDisplay == nullptr) { throw std::runtime_error("Error loading event display"); } @@ -604,8 +600,6 @@ void GPUChainTracking::AllocateIOMemory() AllocateIOMemoryHelper(mIOPtrs.nTRDTriggerRecords, mIOPtrs.trdTrackletIdxFirst, mIOMem.trdTrackletIdxFirst); } -void GPUChainTracking::LoadClusterErrors() { param().LoadClusterErrors(); } - void GPUChainTracking::SetTPCFastTransform(std::unique_ptr&& tpcFastTransform, std::unique_ptr&& tpcTransformHelper) { mTPCFastTransformU = std::move(tpcFastTransform); diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 9ca370d6cc308..6d6d82b518097 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -51,7 +51,7 @@ class MatLayerCylSet; } } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -193,7 +193,6 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega void SetCalibObjects(const GPUCalibObjectsConst& obj) { processors()->calibObjects = obj; } void SetCalibObjects(const GPUCalibObjects& obj) { memcpy((void*)&processors()->calibObjects, (const void*)&obj, sizeof(obj)); } void SetUpdateCalibObjects(const GPUCalibObjectsConst& obj, const GPUNewCalibValues& vals); - void LoadClusterErrors(); void SetSubOutputControl(int32_t i, GPUOutputControl* v) { mSubOutputControls[i] = v; } void SetFinalInputCallback(std::function v) { mWaitForFinalInputs = v; } @@ -327,6 +326,6 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega int32_t OutputStream() const { return mRec->NStreams() - 2; } }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 8eddab63df35c..f17c2f7de4720 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -43,7 +43,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; using namespace o2::dataformats; @@ -716,7 +716,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) using ChargeMapType = decltype(*clustererShadow.mPchargeMap); using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 01e4d011d08b9..9878ad032cd3b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -24,7 +24,7 @@ #endif #include "utils/strtag.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; int32_t GPUChainTracking::RunTPCCompression() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 1b959cac94fd0..06ba08527bfdc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -29,7 +29,7 @@ #define PROFILE_MAX_SIZE (100 * 1024 * 1024) -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; static inline uint32_t RGB(uint8_t r, uint8_t g, uint8_t b) { return (uint32_t)r | ((uint32_t)g << 8) | ((uint32_t)b << 16); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h index 52e8fda8666b6..31ef86bcd6f70 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h @@ -18,7 +18,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -29,6 +29,6 @@ struct GPUChainTrackingFinalContext { bool ready = false; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 16af17863bcb1..106f71cd745d7 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -52,7 +52,7 @@ #include "utils/linux_helpers.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "GPUO2DataTypes.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index aba8617ee244d..8dd5140db6952 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -19,7 +19,7 @@ #include "utils/strtag.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 50ed3f115c02b..1d53177942b54 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -16,7 +16,7 @@ #include "GPULogging.h" #include "GPUO2DataTypes.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; int32_t GPUChainTracking::RunRefit() { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index 00b71aed7cb8b..b21745f64af0d 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -22,7 +22,7 @@ #include "utils/strtag.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; int32_t GPUChainTracking::GlobalTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index 760f64833514e..d2e1ae295de05 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -23,13 +23,12 @@ #include "GPUTRDTrackerKernels.h" #include "utils/strtag.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::trd; template int32_t GPUChainTracking::RunTRDTracking() { -#ifndef GPUCA_ALIROOT_LIB auto& Tracker = processors()->getTRDTracker(); if (!Tracker.IsInitialized()) { return 1; @@ -129,7 +128,6 @@ int32_t GPUChainTracking::RunTRDTracking() } mRec->PopNonPersistentMemory(RecoStep::TRDTracking, qStr2Tag("TRDTRACK")); -#endif // GPUCA_ALIROOT_LIB return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index 5b7cf945a15c9..326bfbb4d2313 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -29,7 +29,7 @@ #endif #include "utils/strtag.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; bool GPUChainTracking::NeedTPCClustersOnGPU() diff --git a/GPU/GPUTracking/Global/GPUErrors.cxx b/GPU/GPUTracking/Global/GPUErrors.cxx index d9835c25abbfe..7f3ed1d8206d9 100644 --- a/GPU/GPUTracking/Global/GPUErrors.cxx +++ b/GPU/GPUTracking/Global/GPUErrors.cxx @@ -18,7 +18,7 @@ #include "GPUDefMacros.h" #include "GPULogging.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #define GPUCA_MAX_ERRORS 255u diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index 8da9b7de5b299..377736a5dacb9 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -45,6 +45,6 @@ class GPUErrors }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx index bf6cd029a981b..445bb1a9c56fd 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx @@ -19,7 +19,7 @@ #include "GPUTPCClusterOccupancyMap.h" #include "GPUErrors.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; void GPUTrackingInputProvider::InitializeProcessor() {} diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h index 4d9236de079fc..751c9f0229f3d 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h @@ -27,7 +27,7 @@ struct ClusterNativeAccess; } // namespace tpc } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -91,6 +91,6 @@ class GPUTrackingInputProvider : public GPUProcessor }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/ITS/GPUITSFitter.cxx b/GPU/GPUTracking/ITS/GPUITSFitter.cxx index f5a360145a9e9..a954d430f7ac3 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitter.cxx +++ b/GPU/GPUTracking/ITS/GPUITSFitter.cxx @@ -19,7 +19,7 @@ #include "GPUITSTrack.h" #include "GPUReconstruction.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifndef GPUCA_GPUCODE void GPUITSFitter::InitializeProcessor() diff --git a/GPU/GPUTracking/ITS/GPUITSFitter.h b/GPU/GPUTracking/ITS/GPUITSFitter.h index 8a84a0df9da7f..9c0995a80749f 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitter.h +++ b/GPU/GPUTracking/ITS/GPUITSFitter.h @@ -27,7 +27,7 @@ struct Cluster; class Cell; } // namespace o2::its -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUITSTrack; @@ -97,6 +97,6 @@ class GPUITSFitter : public GPUProcessor int16_t mMemoryResTracks = -1; int16_t mMemoryResMemory = -1; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx index 5e6a70e1b8876..b81e816d6fc1d 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx +++ b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx @@ -26,7 +26,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2; using namespace o2::its; diff --git a/GPU/GPUTracking/ITS/GPUITSFitterKernels.h b/GPU/GPUTracking/ITS/GPUITSFitterKernels.h index 9b3d300fc2bec..5a2a30de28823 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitterKernels.h +++ b/GPU/GPUTracking/ITS/GPUITSFitterKernels.h @@ -21,7 +21,7 @@ namespace o2::its struct TrackingFrameInfo; } // namespace o2::its -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCGMPropagator; class GPUITSFitter; @@ -37,6 +37,6 @@ class GPUITSFitterKernels : public GPUKernelTemplate protected: GPUd() static bool fitTrack(GPUITSFitter& Fitter, GPUTPCGMPropagator& prop, GPUITSTrack& track, int32_t start, int32_t end, int32_t step); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/ITS/GPUITSTrack.h b/GPU/GPUTracking/ITS/GPUITSTrack.h index 3ae48eeca97c5..5063985692a43 100644 --- a/GPU/GPUTracking/ITS/GPUITSTrack.h +++ b/GPU/GPUTracking/ITS/GPUITSTrack.h @@ -18,7 +18,7 @@ #include "GPUTPCGMMergerTypes.h" #include "GPUTPCGMTrackParam.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUITSTrack : public GPUTPCGMTrackParam { @@ -27,6 +27,6 @@ class GPUITSTrack : public GPUTPCGMTrackParam float mAlpha; int32_t mClusters[7]; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h index 5c1d1d11861bd..70d9676c4fe26 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -128,6 +128,6 @@ class GPUTPCGMBorderTrack ClassDefNV(GPUTPCGMBorderTrack, 1); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index c61dcd220e0c6..00d4b1822bdc1 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -18,7 +18,7 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTPCGMMergedTrackHit.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -126,6 +126,6 @@ class GPUTPCGMMergedTrack #endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6c90330efecc3..2278afd112384 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -62,20 +62,14 @@ #include "GPUO2FakeClasses.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; using namespace gputpcgmmergertypes; static constexpr int32_t kMaxParts = 400; static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; -//#define OFFLINE_FITTER - -#if !defined(GPUCA_ALIROOT_LIB) || defined(GPUCA_GPUCODE) -#undef OFFLINE_FITTER -#endif - -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct MergeLooperParam { float refz; @@ -83,7 +77,7 @@ struct MergeLooperParam { float y; uint32_t id; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #ifndef GPUCA_GPUCODE @@ -1808,12 +1802,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread const ClusterNative& c = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[trackClusters[i].id]; state = c.getFlags(); } -#ifdef GPUCA_ALIROOT_LIB - cl[i].x = clXYZ[i].x; - cl[i].y = clXYZ[i].y; - cl[i].z = clXYZ[i].z; - cl[i].amp = clXYZ[i].amp; -#endif cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags cl[i].row = trackClusters[i].row; if (!Param().rec.nonConsecutiveIDs) // We already have global consecutive numbers from the slice tracker, and we need to keep them for late cluster attachment diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 931b58d41e21b..a9b510e1714ba 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -42,7 +42,7 @@ struct ClusterNative; } } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -298,6 +298,6 @@ class GPUTPCGMMerger : public GPUProcessor GPUTPCGMLoopData* mLoopData; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCGMMERGER_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index e1de4030d672c..b0a150bbd6a92 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -40,7 +40,7 @@ #include "CorrectionMapsHelper.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace gputpcgmmergertypes; void GPUTPCGMMerger::DumpSliceTracks(std::ostream& out) const diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 103fc0a4a08cc..c96fab2343d82 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -12,14 +12,13 @@ /// \file GPUTPCGMMergerGPU.cxx /// \author David Rohr -#if !defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB) // GPU Merger was not available for Run 2 #include "GPUTPCGMMergerGPU.h" #include "GPUCommonAlgorithm.h" #if defined(WITH_OPENMP) && !defined(GPUCA_GPUCODE) #include "GPUReconstruction.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) @@ -220,4 +219,3 @@ GPUdii() void GPUTPCGMMergerMergeLoopers::Thread<2>(int32_t nBlocks, int32_t nTh { merger.MergeLoopersMain(nBlocks, nThreads, iBlock, iThread); } -#endif // !defined(GPUCA_GPUCODE) || !defined(GPUCA_ALIROOT_LIB) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index 958d5081baf81..e1432830117c1 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -19,7 +19,7 @@ #include "GPUConstantMem.h" #include "GPUTPCGMMergerTypes.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -27,67 +27,53 @@ class GPUTPCGMMergerGeneral : public GPUKernelTemplate { public: GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) typedef GPUTPCGMMerger processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors.tpcMerger; } -#endif }; class GPUTPCGMMergerTrackFit : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t mode); -#endif }; class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerSliceRefit : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); -#endif }; class GPUTPCGMMergerUnpackGlobal : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); -#endif }; class GPUTPCGMMergerUnpackSaveNumber : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t id); -#endif }; class GPUTPCGMMergerUnpackResetIds : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t id); -#endif }; class GPUTPCGMMergerResolve : public GPUTPCGMMergerGeneral @@ -96,37 +82,29 @@ class GPUTPCGMMergerResolve : public GPUTPCGMMergerGeneral struct GPUSharedMemory : public gputpcgmmergertypes::GPUResolveSharedMemory { }; -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); -#endif }; class GPUTPCGMMergerClearLinks : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int8_t nOutput); -#endif }; class GPUTPCGMMergerMergeWithinPrepare : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerMergeSlicesPrepare : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t border0, int32_t border1, int8_t useOrigTrackParam); -#endif }; class GPUTPCGMMergerMergeBorders : public GPUTPCGMMergerGeneral @@ -137,94 +115,74 @@ class GPUTPCGMMergerMergeBorders : public GPUTPCGMMergerGeneral step1 = 1, step2 = 2, variant = 3 }; -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, Args... args); -#endif }; class GPUTPCGMMergerMergeCE : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerLinkGlobalTracks : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerCollect : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerPrepareClusters : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerSortTracks : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerSortTracksQPt : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerSortTracksPrepare : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerFinalize : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; class GPUTPCGMMergerMergeLoopers : public GPUTPCGMMergerGeneral { public: -#if !defined(GPUCA_ALIROOT_LIB) || !defined(GPUCA_GPUCODE) template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 3cd6870524060..3c8f21420a14f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -18,7 +18,7 @@ #include "GPUTPCDef.h" #include "GPUGeneralKernels.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -61,6 +61,6 @@ struct GPUTPCOuterParam { } // namespace gputpcgmmergertypes } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.cxx index b33392cd27db1..f237d1d18057e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.cxx @@ -15,7 +15,7 @@ #include "GPUTPCGMPhysicalTrackModel.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUd() int32_t GPUTPCGMPhysicalTrackModel::PropagateToXBzLight(float x, float Bz, float& GPUrestrict() dLp) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h index c65e6df6c320f..d77cb861affa3 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h @@ -27,7 +27,7 @@ * */ -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -273,6 +273,6 @@ GPUdi() void GPUTPCGMPhysicalTrackModel::Rotate(float alpha) UpdateValues(); } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx index c444f3a399aae..7793fac7e03ac 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx @@ -17,7 +17,7 @@ #include "GPUTPCGMPolynomialField.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(StoredField_t fieldType, float nominalFieldkG, GPUTPCGMPolynomialField& field) { @@ -206,605 +206,3 @@ int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(float nominalFieldkG, return GetPolynomialField(type, nominalFieldkG, field); } - -/****************************************************************************************** - * - * the following code only works inside AliRoot framework with initialised magnetic field - * - *******************************************************************************************/ - -#if defined(GPUCA_ALIROOT_LIB) & !defined(GPUCA_GPUCODE) - -#include "AliHLTTPCPolynomFit.h" - -#include - -#include "AliTracker.h" -#include "AliHLTTPCGeometry.h" -#include "AliTRDgeometry.h" -#include "TGeoGlobalMagField.h" -#include "AliMagF.h" - -#include "TFile.h" -#include "TMath.h" -#include "TNtuple.h" -#include "Riostream.h" - -#include "TMatrixD.h" -#include "TH1F.h" -#include "TStyle.h" - -int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(GPUTPCGMPolynomialField& field) -{ - // - // get pre-calculated polynomial field approximation of the TPC region appropriate for the current AliTracker field map (if exists) - // returns !=0 error when the polynomial field is not exist - // - - // check which field map is in use - - field.Reset(); - - AliMagF* fld = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - - if (!fld) { - return -1; - } - - AliMagF::BMap_t mapType = fld->GetMapType(); - - StoredField_t type = kUnknown; - - if (fld->IsUniform()) { - type = kUniform; - } else if (mapType == AliMagF::k2kG) { - type = k2kG; - } else if (mapType == AliMagF::k5kG) { - type = k5kG; - } - - return GetPolynomialField(type, AliTracker::GetBz(), field); -} - -int32_t GPUTPCGMPolynomialFieldManager::FitFieldTpc(AliMagF* inputFld, GPUTPCGMPolynomialField& polyField, double step) -{ - // - // Fit magnetic field with polynoms - // - - const double kAlmost0Field = 1.e-13; - - AliMagF* fld = inputFld; - - if (!fld) { - // fld = new AliMagF("Fit", "Fit", 1., 1., AliMagF::k2kG); - fld = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - } - if (!fld) { - return -1; - } - - const double sectorAngleShift = 10. / 180. * TMath::Pi(); - const double sectorAngle = 20. / 180. * TMath::Pi(); - const int32_t nRows = AliHLTTPCGeometry::GetNRows(); - - double xMin = AliHLTTPCGeometry::Row2X(0); - double xMax = AliHLTTPCGeometry::Row2X(nRows - 1); - double rMin = xMin; - double rMax = xMax / TMath::Cos(sectorAngle / 2.); - - double dA = 1. / rMax; // angular step == 1 cm at outer radius - dA *= step; - int32_t nSectorParticles = (int32_t)(sectorAngle / dA); - if (nSectorParticles < 1) { - nSectorParticles = 1; - } - dA = sectorAngle / nSectorParticles; - - double dZ = 1. * step; // step in z == 1 cm - - double zMin = -AliHLTTPCGeometry::GetZLength(); - double zMax = AliHLTTPCGeometry::GetZLength(); - - double alMin = -sectorAngle / 2.; - double alMax = sectorAngle / 2. - 0.5 * dA; - - Double_t solenoidBzkG = fld->SolenoidField(); - Double_t solenoidBzkGInv = (TMath::Abs(solenoidBzkG) > kAlmost0Field) ? 1. / solenoidBzkG : 0.; - - std::cout << "solenoidBz = " << solenoidBzkG << " kG" << std::endl; - - const int32_t M = GPUTPCGMPolynomialField::NTPCM; - AliHLTTPCPolynomFit fitBx(M); - AliHLTTPCPolynomFit fitBy(M); - AliHLTTPCPolynomFit fitBz(M); - - for (int32_t sector = 0; sector < 18; sector++) { - std::cout << "sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (int32_t row = 0; row < AliHLTTPCGeometry::GetNRows(); row++) { - double xl = AliHLTTPCGeometry::Row2X(row); - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - // std::cout<<"sector = "<IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - - float f[M]; - GPUTPCGMPolynomialField::GetPolynomsTpc(x, y, z, f); - fitBx.AddMeasurement(f, B[0]); - fitBy.AddMeasurement(f, B[1]); - fitBz.AddMeasurement(f, B[2]); - } - } - } - } - - // field coefficients - float cX[M]; - float cY[M]; - float cZ[M]; - - int32_t errX = fitBx.Fit(cX); - int32_t errY = fitBy.Fit(cY); - int32_t errZ = fitBz.Fit(cZ); - - if (errX != 0 || errY != 0 || errZ != 0) { - std::cout << "Fit of polynamial field failed!!!: errX " << errX << " errY " << errY << " errZ " << errZ << std::endl; - if (fld != inputFld) { - delete fld; - } - return -1; - } - - GPUTPCGMPolynomialField fittedField; - fittedField.SetFieldTpc(cX, cY, cZ); - - // scale result - double nominalBz = solenoidBzkG * gpu_common_constants::kCLight; - - for (int32_t i = 0; i < M; i++) { - cX[i] = nominalBz * cX[i]; - cY[i] = nominalBz * cY[i]; - cZ[i] = nominalBz * cZ[i]; - } - polyField.SetFieldNominal(nominalBz); - polyField.SetFieldTpc(cX, cY, cZ); - - gStyle->SetOptStat(1111111); - - TH1F histBx("Performance B_x", "Error B_x", 1000, -0.005, 0.005); - TH1F histBy("Performance B_y", "Error B_y", 1000, -0.005, 0.005); - TH1F histBz("Performance B_z", "Error B_z", 1000, -0.005, 0.005); - - for (int32_t sector = 0; sector < 18; sector++) { - std::cout << "check quality: sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "check quality: angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (int32_t row = 0; row < AliHLTTPCGeometry::GetNRows(); row++) { - double xl = AliHLTTPCGeometry::Row2X(row); - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - for (double z = zMin; z <= zMax; z += dZ) { - Double_t xyz[3] = {x, y, z}; - Double_t B[3]; - if (fld->IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - float approxB[3]; - fittedField.GetField(x, y, z, approxB); - - histBx.Fill(approxB[0] - B[0]); - histBy.Fill(approxB[1] - B[1]); - histBz.Fill(approxB[2] - B[2]); - } - } - } - } - - TFile file("FieldTpcFitQA.root", "RECREATE"); - file.cd(); - - histBx.Write(); - histBy.Write(); - histBz.Write(); - - file.Write(); - file.Close(); - - std::cout << "Fitted polynomial field: " << std::endl; - fittedField.Print(); - - return 0; -} - -int32_t GPUTPCGMPolynomialFieldManager::FitFieldTrd(AliMagF* inputFld, GPUTPCGMPolynomialField& polyField, double step) -{ - // - // Fit magnetic field with polynoms - // - - const double kAlmost0Field = 1.e-13; - - AliMagF* fld = inputFld; - - if (!fld) { - // fld = new AliMagF("Fit", "Fit", 1., 1., AliMagF::k2kG); - fld = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - } - if (!fld) { - return -1; - } - - const double sectorAngle = AliTRDgeometry::GetAlpha(); - const double sectorAngleShift = sectorAngle / 2; - - double zMax = 751.0 / 2.; - double zMin = -zMax; - double xMin = AliHLTTPCGeometry::Row2X(AliHLTTPCGeometry::GetNRows() - 1); - double xMax = AliTRDgeometry::GetXtrdEnd(); - double rMin = xMin; - double rMax = xMax / TMath::Cos(sectorAngle / 2.); - - double dA = 1. / rMax; // angular step == 1 cm at outer radius - dA *= step; - int32_t nSectorParticles = (int32_t)(sectorAngle / dA); - if (nSectorParticles < 1) { - nSectorParticles = 1; - } - dA = sectorAngle / nSectorParticles; - - double dZ = 1. * step; // step in z == 1 cm - - double alMin = -sectorAngle / 2.; - double alMax = sectorAngle / 2. - 0.5 * dA; - - Double_t solenoidBzkG = fld->SolenoidField(); - Double_t solenoidBzkGInv = (TMath::Abs(solenoidBzkG) > kAlmost0Field) ? 1. / solenoidBzkG : 0.; - - std::cout << "solenoidBz = " << solenoidBzkG << " kG" << std::endl; - - const int32_t M = GPUTPCGMPolynomialField::NTRDM; - AliHLTTPCPolynomFit fitBx(M); - AliHLTTPCPolynomFit fitBy(M); - AliHLTTPCPolynomFit fitBz(M); - - for (int32_t sector = 0; sector < AliTRDgeometry::Nsector(); sector++) { - std::cout << "sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (double xl = xMin; xl <= xMax; xl += step) { - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - // std::cout<<"sector = "<IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - - float f[M]; - GPUTPCGMPolynomialField::GetPolynomsTrd(x, y, z, f); - fitBx.AddMeasurement(f, B[0]); - fitBy.AddMeasurement(f, B[1]); - fitBz.AddMeasurement(f, B[2]); - } - } - } - } - - // field coefficients - float cX[M]; - float cY[M]; - float cZ[M]; - - int32_t errX = fitBx.Fit(cX); - int32_t errY = fitBy.Fit(cY); - int32_t errZ = fitBz.Fit(cZ); - - if (errX != 0 || errY != 0 || errZ != 0) { - std::cout << "Fit of polynamial field failed!!!" << std::endl; - if (fld != inputFld) { - delete fld; - } - return -1; - } - - GPUTPCGMPolynomialField fittedField; - fittedField.SetFieldTrd(cX, cY, cZ); - - // scale result - double nominalBz = solenoidBzkG * gpu_common_constants::kCLight; - - for (int32_t i = 0; i < M; i++) { - cX[i] = nominalBz * cX[i]; - cY[i] = nominalBz * cY[i]; - cZ[i] = nominalBz * cZ[i]; - } - polyField.SetFieldNominal(nominalBz); - polyField.SetFieldTrd(cX, cY, cZ); - - gStyle->SetOptStat(1111111); - - TH1F histBx("Performance B_x", "Error B_x", 1000, -0.005, 0.005); - TH1F histBy("Performance B_y", "Error B_y", 1000, -0.005, 0.005); - TH1F histBz("Performance B_z", "Error B_z", 1000, -0.005, 0.005); - - for (int32_t sector = 0; sector < AliTRDgeometry::Nsector(); sector++) { - std::cout << "check quality: sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "check quality: angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (double xl = xMin; xl <= xMax; xl += step) { - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - for (double z = zMin; z <= zMax; z += dZ) { - Double_t xyz[3] = {x, y, z}; - Double_t B[3]; - if (fld->IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - float approxB[3]; - fittedField.GetFieldTrd(x, y, z, approxB); - - histBx.Fill(approxB[0] - B[0]); - histBy.Fill(approxB[1] - B[1]); - histBz.Fill(approxB[2] - B[2]); - } - } - } - } - - TFile file("FieldTrdFitQA.root", "RECREATE"); - file.cd(); - - histBx.Write(); - histBy.Write(); - histBz.Write(); - - file.Write(); - file.Close(); - - std::cout << "Fitted polynomial field: " << std::endl; - fittedField.Print(); - - return 0; -} - -int32_t GPUTPCGMPolynomialFieldManager::FitFieldIts(AliMagF* inputFld, GPUTPCGMPolynomialField& polyField, double step) -{ - // - // Fit magnetic field with polynoms - // - - const double kAlmost0Field = 1.e-13; - - AliMagF* fld = inputFld; - - if (!fld) { - // fld = new AliMagF("Fit", "Fit", 1., 1., AliMagF::k2kG); - fld = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - } - if (!fld) { - return -1; - } - - const double sectorAngleShift = 10. / 180. * TMath::Pi(); - const double sectorAngle = 20. / 180. * TMath::Pi(); - - const double xITS = 3.9; - const double zITS = 24.5; - - double xMin = 0.5; - double xMax = AliHLTTPCGeometry::Row2X(0); - double rMin = xMin; - double rMax = xMax / TMath::Cos(sectorAngle / 2.); - - double dA = .1 / rMax; // angular step == 0.1 cm at the outer radius - dA *= step; - int32_t nSectorParticles = (int32_t)(sectorAngle / dA); - if (nSectorParticles < 1) { - nSectorParticles = 1; - } - dA = sectorAngle / nSectorParticles; - - double dX = .5 * step; // step in local x == 0.1 cm - double dZ = 1. * step; // step in z == 1 cm - - double zMin = -AliHLTTPCGeometry::GetZLength(); - double zMax = AliHLTTPCGeometry::GetZLength(); - - double alMin = -sectorAngle / 2.; - double alMax = sectorAngle / 2. - 0.5 * dA; - - Double_t solenoidBzkG = fld->SolenoidField(); - Double_t solenoidBzkGInv = (TMath::Abs(solenoidBzkG) > kAlmost0Field) ? 1. / solenoidBzkG : 0.; - - std::cout << "solenoidBz = " << solenoidBzkG << " kG" << std::endl; - - const int32_t M = GPUTPCGMPolynomialField::NITSM; - AliHLTTPCPolynomFit fitBx(M); - AliHLTTPCPolynomFit fitBy(M); - AliHLTTPCPolynomFit fitBz(M); - - double coneSlope = (zMax - zITS) / (xMax - xITS); - - for (int32_t sector = 0; sector < 18; sector++) { - std::cout << "sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (double xl = xMin; xl <= xMax; xl += dX) { - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - // std::cout<<"sector = "<IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - - float f[M]; - GPUTPCGMPolynomialField::GetPolynomsIts(x, y, z, f); - fitBx.AddMeasurement(f, B[0]); - fitBy.AddMeasurement(f, B[1]); - fitBz.AddMeasurement(f, B[2]); - } - } - } - } - - // field coefficients - float cX[M]; - float cY[M]; - float cZ[M]; - - int32_t errX = fitBx.Fit(cX); - int32_t errY = fitBy.Fit(cY); - int32_t errZ = fitBz.Fit(cZ); - - if (errX != 0 || errY != 0 || errZ != 0) { - std::cout << "Fit of polynamial field failed!!!: errX " << errX << " errY " << errY << " errZ " << errZ << std::endl; - if (fld != inputFld) { - delete fld; - } - return -1; - } - - GPUTPCGMPolynomialField fittedField; - fittedField.SetFieldIts(cX, cY, cZ); - - // scale result - double nominalBz = solenoidBzkG * gpu_common_constants::kCLight; - - for (int32_t i = 0; i < M; i++) { - cX[i] = nominalBz * cX[i]; - cY[i] = nominalBz * cY[i]; - cZ[i] = nominalBz * cZ[i]; - } - polyField.SetFieldNominal(nominalBz); - polyField.SetFieldIts(cX, cY, cZ); - - gStyle->SetOptStat(1111111); - - TH1F histBx("Performance B_x", "Error B_x", 1000, -0.005, 0.005); - TH1F histBy("Performance B_y", "Error B_y", 1000, -0.005, 0.005); - TH1F histBz("Performance B_z", "Error B_z", 1000, -0.005, 0.005); - - for (int32_t sector = 0; sector < 18; sector++) { - std::cout << "check quality: sector = " << sector << std::endl; - double asec = sectorAngleShift + sector * sectorAngle; - double cs = TMath::Cos(asec); - double ss = TMath::Sin(asec); - for (double al = alMin; al < alMax; al += dA) { - std::cout << "check quality: angle " << al / TMath::Pi() * 180. << " grad " << std::endl; - double tg = TMath::Tan(al); - for (double xl = xMin; xl <= xMax; xl += dX) { - double yl = xl * tg; - double x = xl * cs - yl * ss; - double y = xl * ss + yl * cs; - double zCone = zITS + (xl - xITS) * coneSlope; - for (double z = -zCone; z <= zCone; z += dZ) { - Double_t xyz[3] = {x, y, z}; - Double_t B[3]; - if (fld->IsUniform()) { - B[0] = B[1] = 0.; - B[2] = fld->SolenoidField(); - } else { - fld->Field(xyz, B); - } - B[0] *= solenoidBzkGInv; - B[1] *= solenoidBzkGInv; - B[2] *= solenoidBzkGInv; - float approxB[3]; - fittedField.GetFieldIts(x, y, z, approxB); - - histBx.Fill(approxB[0] - B[0]); - histBy.Fill(approxB[1] - B[1]); - histBz.Fill(approxB[2] - B[2]); - } - } - } - } - - TFile file("FieldItsFitQA.root", "RECREATE"); - file.cd(); - - histBx.Write(); - histBy.Write(); - histBz.Write(); - - file.Write(); - file.Close(); - - std::cout << "Fitted polynomial field: " << std::endl; - fittedField.Print(); - - return 0; -} - -#endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index a58c3485321fd..15f2bd880e351 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -18,13 +18,13 @@ #include "GPUCommonDef.h" class AliMagF; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { class GPUTPCGMPolynomialField; } -} // namespace GPUCA_NAMESPACE +} // namespace o2 /** * @class GPUTPCGMPolynomialFieldManager @@ -43,31 +43,11 @@ class GPUTPCGMPolynomialFieldManager /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG */ - static int32_t GetPolynomialField(float nominalFieldkG, GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field); - -#if defined(GPUCA_ALIROOT_LIB) & !defined(GPUCA_GPUCODE) - - /* Get pre-calculated polynomial field for the current ALICE field (if exists) - */ - static int32_t GetPolynomialField(GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field); - - /* Fit given field for TPC - */ - static int32_t FitFieldTpc(AliMagF* fld, GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field, double step = 1.); - - /* Fit given field for TRD - */ - static int32_t FitFieldTrd(AliMagF* fld, GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field, double step = 1.); - - /* Fit given field for ITS - */ - static int32_t FitFieldIts(AliMagF* fld, GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field, double step = 1.); - -#endif + static int32_t GetPolynomialField(float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); /* Get pre-calculated polynomial field of type "type", scaled with respect to nominalFieldkG */ - static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, GPUCA_NAMESPACE::gpu::GPUTPCGMPolynomialField& field); + static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); }; #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 32941e032019c..6355db9483b05 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -27,7 +27,7 @@ #include "AliMagF.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUd() void GPUTPCGMPropagator::GetBxByBzBase(float cosAlpha, float sinAlpha, float X, float Y, float Z, float B[3]) const { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index 0a35875764ae5..eaff9be4f5e46 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -29,7 +29,7 @@ class MatLayerCylSet; } // namespace base } // namespace o2 -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -276,6 +276,6 @@ GPUdi() float GPUTPCGMPropagator::getGlobalY(float X, float Y) const } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx index f7f753d8302d9..3c774b13ce5b1 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx @@ -20,7 +20,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUParam.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sliceTr, float alpha, int32_t slice) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h index 627fc5c73c21c..a2179b6c66b2a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h @@ -20,7 +20,7 @@ #include "GPUCommonMath.h" #include "GPUO2DataTypes.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -143,6 +143,6 @@ class GPUTPCGMSliceTrack ClassDefNV(GPUTPCGMSliceTrack, 1); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 74cc12e9bbd9a..bb450f1f4112f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -40,10 +40,6 @@ #include "GPUTPCGMMergerTypes.h" #include "GPUParam.inc" -#ifdef GPUCA_ALIROOT_LIB -#include "AliExternalTrackParam.h" -#endif - #ifdef GPUCA_CADEBUG_ENABLED #include "../utils/qconfig.h" #include "AliHLTTPCClusterMCData.h" @@ -54,7 +50,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_t iTrk, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* GPUrestrict() clustersXYZ, int32_t& GPUrestrict() N, int32_t& GPUrestrict() NTolerated, float& GPUrestrict() Alpha, int32_t attempt, float maxSinPhi, gputpcgmmergertypes::GPUTPCOuterParam* GPUrestrict() outerParam) @@ -1080,59 +1076,6 @@ GPUd() bool GPUTPCGMTrackParam::CheckNumericalQuality(float overrideCovYY) const return ok; } -#if defined(GPUCA_ALIROOT_LIB) & !defined(GPUCA_GPUCODE) -bool GPUTPCGMTrackParam::GetExtParam(AliExternalTrackParam& T, double alpha) const -{ - //* Convert from GPUTPCGMTrackParam to AliExternalTrackParam parameterisation, - //* the angle alpha is the global angle of the local X axis - - bool ok = CheckNumericalQuality(); - - double par[5], cov[15]; - for (int32_t i = 0; i < 5; i++) { - par[i] = mP[i]; - } - for (int32_t i = 0; i < 15; i++) { - cov[i] = mC[i]; - } - - if (par[2] > GPUCA_MAX_SIN_PHI) { - par[2] = GPUCA_MAX_SIN_PHI; - } - if (par[2] < -GPUCA_MAX_SIN_PHI) { - par[2] = -GPUCA_MAX_SIN_PHI; - } - - if (CAMath::Abs(par[4]) < 1.e-5) { - par[4] = 1.e-5; // some other software will crash if q/Pt==0 - } - if (CAMath::Abs(par[4]) > 1. / 0.08) { - ok = 0; // some other software will crash if q/Pt is too big - } - T.Set((double)mX, alpha, par, cov); - return ok; -} - -void GPUTPCGMTrackParam::SetExtParam(const AliExternalTrackParam& T) -{ - //* Convert from AliExternalTrackParam parameterisation - - for (int32_t i = 0; i < 5; i++) { - mP[i] = T.GetParameter()[i]; - } - for (int32_t i = 0; i < 15; i++) { - mC[i] = T.GetCovariance()[i]; - } - mX = T.GetX(); - if (mP[2] > GPUCA_MAX_SIN_PHI) { - mP[2] = GPUCA_MAX_SIN_PHI; - } - if (mP[2] < -GPUCA_MAX_SIN_PHI) { - mP[2] = -GPUCA_MAX_SIN_PHI; - } -} -#endif - GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() track, int32_t iTrk, GPUTPCGMMerger* GPUrestrict() merger, int32_t attempt) // TODO: Inline me, once __forceinline__ is fixed by HIP { if (!track.OK()) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 7ce6167b653d2..a2d7dcf2b3e3d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -27,7 +27,7 @@ class AliExternalTrackParam; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -207,11 +207,6 @@ class GPUTPCGMTrackParam GPUd() static void RefitTrack(GPUTPCGMMergedTrack& track, int32_t iTrk, GPUTPCGMMerger* merger, int32_t attempt); -#if defined(GPUCA_ALIROOT_LIB) & !defined(GPUCA_GPUCODE) - bool GetExtParam(AliExternalTrackParam& T, double alpha) const; - void SetExtParam(const AliExternalTrackParam& T); -#endif - GPUdi() void ConstrainSinPhi(float limit = GPUCA_MAX_SIN_PHI) { if (mP[2] > limit) { @@ -291,6 +286,6 @@ GPUdi() float GPUTPCGMTrackParam::GetMirroredY(float Bz) const return GetY() - 2.f * CAMath::Sqrt(cosPhi2) / qptBz; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 1a00f2cc1abff..78015b347a8c6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -24,7 +24,7 @@ #include "TObjArray.h" #include "AliTPCclusterMI.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index 12253296b62fc..9f6467923f56a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -21,7 +21,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t) diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h index e54fb8885091d..4daee67643cfd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCGMMerger; @@ -40,6 +40,6 @@ class GPUTPCGlobalDebugSortKernels : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker, int8_t parameter); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUTPCGLOBALDEBUGSORTKERNELS_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx deleted file mode 100644 index b905c72915670..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.cxx +++ /dev/null @@ -1,575 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGlobalMergerComponent.cxx -/// \author David Rohr, Sergey Gorbunov, Matthias Kretz - -#include "GPUTPCGlobalMergerComponent.h" -#include "GPUReconstruction.h" -#include "GPUChainTracking.h" -#include "GPUTPCSliceOutput.h" - -#include "GPUTPCDef.h" - -#include "GPUTPCGMMerger.h" -#include "GPUTPCGMMergedTrack.h" - -#include "AliHLTTPCDefinitions.h" -#include "GPUTPCDefinitions.h" -#include "AliHLTTPCGeometry.h" - -#include "AliExternalTrackParam.h" -#include "AliCDBEntry.h" -#include "AliCDBManager.h" -#include "TObjString.h" -#include "TObjArray.h" -#include "AliHLTExternalTrackParam.h" - -#include -#include -#include - -using namespace GPUCA_NAMESPACE::gpu; -using namespace std; - -// ROOT macro for the implementation of ROOT specific class methods -ClassImp(GPUTPCGlobalMergerComponent); - -const GPUChainTracking* GPUTPCGlobalMergerComponent::fgCurrentMergerReconstruction = nullptr; - -GPUTPCGlobalMergerComponent::GPUTPCGlobalMergerComponent() : AliHLTProcessor(), fSolenoidBz(0), fClusterErrorCorrectionY(0), fClusterErrorCorrectionZ(0), fNWays(1), fNWaysOuter(0), fNoClear(false), fBenchmark("GlobalMerger"), fRec(nullptr), fChain(nullptr) -{ - // see header file for class documentation -} - -GPUTPCGlobalMergerComponent::GPUTPCGlobalMergerComponent(const GPUTPCGlobalMergerComponent&) : AliHLTProcessor(), fSolenoidBz(0), fClusterErrorCorrectionY(0), fClusterErrorCorrectionZ(0), fNWays(1), fNWaysOuter(0), fNoClear(false), fBenchmark("GlobalMerger"), fRec(nullptr), fChain(nullptr) -{ - // dummy -} - -GPUTPCGlobalMergerComponent& GPUTPCGlobalMergerComponent::operator=(const GPUTPCGlobalMergerComponent&) -{ - // dummy - return *this; -} - -GPUTPCGlobalMergerComponent::~GPUTPCGlobalMergerComponent() -{ - if (fRec) { - delete fRec; - } -}; - -// Public functions to implement AliHLTComponent's interface. -// These functions are required for the registration process - -const char* GPUTPCGlobalMergerComponent::GetComponentID() -{ - // see header file for class documentation - return "TPCCAGlobalMerger"; -} - -void GPUTPCGlobalMergerComponent::GetInputDataTypes(AliHLTComponentDataTypeList& list) -{ - // see header file for class documentation - list.clear(); - list.push_back(GPUTPCDefinitions::fgkTrackletsDataType); -} - -AliHLTComponentDataType GPUTPCGlobalMergerComponent::GetOutputDataType() -{ - // see header file for class documentation - return kAliHLTMultipleDataType; -} - -int32_t GPUTPCGlobalMergerComponent::GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList) -{ - // see header file for class documentation - - tgtList.clear(); - tgtList.push_back(kAliHLTDataTypeTrack | kAliHLTDataOriginTPC); - tgtList.push_back(AliHLTTPCDefinitions::TracksOuterDataType() | kAliHLTDataOriginTPC); - return tgtList.size(); -} - -void GPUTPCGlobalMergerComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - // see header file for class documentation - // XXX TODO: Find more realistic values. - constBase = 0; - inputMultiplier = 1.0; -} - -AliHLTComponent* GPUTPCGlobalMergerComponent::Spawn() -{ - // see header file for class documentation - return new GPUTPCGlobalMergerComponent; -} - -void GPUTPCGlobalMergerComponent::SetDefaultConfiguration() -{ - // Set default configuration for the CA merger component - // Some parameters can be later overwritten from the OCDB - - fSolenoidBz = -5.00668; - fClusterErrorCorrectionY = 0; - fClusterErrorCorrectionZ = 0; - fNWays = 1; - fNWaysOuter = 0; - fNoClear = false; - fBenchmark.Reset(); - fBenchmark.SetTimer(0, "total"); - fBenchmark.SetTimer(1, "reco"); -} - -int32_t GPUTPCGlobalMergerComponent::ReadConfigurationString(const char* arguments) -{ - // Set configuration parameters for the CA merger component from the string - - int32_t iResult = 0; - if (!arguments) { - return iResult; - } - - TString allArgs = arguments; - TString argument; - int32_t bMissingParam = 0; - - TObjArray* pTokens = allArgs.Tokenize(" "); - - int32_t nArgs = pTokens ? pTokens->GetEntries() : 0; - - for (int32_t i = 0; i < nArgs; i++) { - argument = ((TObjString*)pTokens->At(i))->GetString(); - if (argument.IsNull()) { - continue; - } - - if (argument.CompareTo("-solenoidBz") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - HLTWarning("argument -solenoidBz is deprecated, magnetic field set up globally (%f)", GetBz()); - continue; - } - - if (argument.CompareTo("-errorCorrectionY") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fClusterErrorCorrectionY = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("Cluster Y error correction factor set to: %f", fClusterErrorCorrectionY); - continue; - } - - if (argument.CompareTo("-errorCorrectionZ") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fClusterErrorCorrectionZ = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("Cluster Z error correction factor set to: %f", fClusterErrorCorrectionZ); - continue; - } - - if (argument.CompareTo("-nways") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fNWays = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - HLTInfo("nways set to: %d", fNWays); - continue; - } - - if (argument.CompareTo("-nwaysouter") == 0) { - fNWaysOuter = 1; - HLTInfo("nwaysouter enabled"); - continue; - } - - if (argument.CompareTo("-noclear") == 0) { - fNoClear = true; - HLTInfo("noclear enabled"); - continue; - } - - HLTError("Unknown option \"%s\"", argument.Data()); - iResult = -EINVAL; - } - delete pTokens; - - if (bMissingParam) { - HLTError("Specifier missed for parameter \"%s\"", argument.Data()); - iResult = -EINVAL; - } - - return iResult; -} - -int32_t GPUTPCGlobalMergerComponent::ReadCDBEntry(const char* cdbEntry, const char* chainId) -{ - // see header file for class documentation - - const char* defaultNotify = ""; - - if (!cdbEntry) { - cdbEntry = "HLT/ConfigTPC/TPCCAGlobalMerger"; - defaultNotify = " (default)"; - chainId = 0; - } - - HLTInfo("configure from entry \"%s\"%s, chain id %s", cdbEntry, defaultNotify, (chainId != nullptr && chainId[0] != 0) ? chainId : ""); - AliCDBEntry* pEntry = AliCDBManager::Instance()->Get(cdbEntry); //,GetRunNo()); - - if (!pEntry) { - HLTError("cannot fetch object \"%s\" from CDB", cdbEntry); - return -EINVAL; - } - - TObjString* pString = dynamic_cast(pEntry->GetObject()); - - if (!pString) { - HLTError("configuration object \"%s\" has wrong type, required TObjString", cdbEntry); - return -EINVAL; - } - - HLTInfo("received configuration object string: \"%s\"", pString->GetString().Data()); - - return ReadConfigurationString(pString->GetString().Data()); -} - -int32_t GPUTPCGlobalMergerComponent::Configure(const char* cdbEntry, const char* chainId, const char* commandLine) -{ - // Configure the component - // There are few levels of configuration, - // parameters which are set on one step can be overwritten on the next step - - //* read hard-coded values - - SetDefaultConfiguration(); - - //* read the default CDB entry - - int32_t iResult = ReadCDBEntry(nullptr, chainId); - if (iResult) { - return iResult; - } - - //* read magnetic field - - fSolenoidBz = GetBz(); - - //* read the actual CDB entry if required - - iResult = (cdbEntry) ? ReadCDBEntry(cdbEntry, chainId) : 0; - if (iResult) { - return iResult; - } - - //* read extra parameters from input (if they are) - - if (commandLine && commandLine[0] != '\0') { - HLTInfo("received configuration string from HLT framework: \"%s\"", commandLine); - iResult = ReadConfigurationString(commandLine); - if (iResult) { - return iResult; - } - } - - fRec = GPUReconstruction::CreateInstance("CPU", true); - if (fRec == nullptr) { - return -EINVAL; - } - fChain = fRec->AddChain(); - - // Initialize the merger - - GPUSettingsGRP grp; - GPUSettingsRec rec; - GPUSettingsProcessing devProc; - grp.solenoidBzNominalGPU = fSolenoidBz; - if (fClusterErrorCorrectionY > 1.e-4) { - rec.tpc.clusterError2CorrectionY = fClusterErrorCorrectionY * fClusterErrorCorrectionY; - } - if (fClusterErrorCorrectionZ > 1.e-4) { - rec.tpc.clusterError2CorrectionZ = fClusterErrorCorrectionZ * fClusterErrorCorrectionZ; - } - rec.tpc.nWays = fNWays; - rec.tpc.nWaysOuter = fNWaysOuter; - rec.tpc.mergerInterpolateErrors = false; - rec.nonConsecutiveIDs = true; - rec.tpc.mergerReadFromTrackerDirectly = false; - devProc.ompThreads = 1; - devProc.ompKernels = false; - - GPURecoStepConfiguration steps; - steps.steps.set(GPUDataTypes::RecoStep::TPCMerging); - steps.inputs.set(GPUDataTypes::InOutType::TPCSectorTracks); - steps.outputs.set(GPUDataTypes::InOutType::TPCMergedTracks); - - fRec->SetSettings(&grp, &rec, &devProc, &steps); - fChain->LoadClusterErrors(); - if (fRec->Init()) { - return -EINVAL; - } - - return 0; -} - -int32_t GPUTPCGlobalMergerComponent::DoInit(int argc, const char** argv) -{ - // see header file for class documentation - - TString arguments = ""; - for (int32_t i = 0; i < argc; i++) { - if (!arguments.IsNull()) { - arguments += " "; - } - arguments += argv[i]; - } - - int32_t retVal = Configure(nullptr, nullptr, arguments.Data()); - - return retVal; -} - -int32_t GPUTPCGlobalMergerComponent::Reconfigure(const char* cdbEntry, const char* chainId) -{ - // Reconfigure the component from OCDB - - return Configure(cdbEntry, chainId, nullptr); -} - -int32_t GPUTPCGlobalMergerComponent::DoDeinit() -{ - // see header file for class documentation - if (fChain == fgCurrentMergerReconstruction) { - fgCurrentMergerReconstruction = nullptr; - } - delete fRec; - fRec = nullptr; - - return 0; -} - -int32_t GPUTPCGlobalMergerComponent::DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& /*trigData*/, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, AliHLTComponentBlockDataList& outputBlocks) -{ - // see header file for class documentation - int32_t iResult = 0; - uint32_t maxBufferSize = size; - - size = 0; - - if (!outputPtr) { - return -ENOSPC; - } - if (!IsDataEvent()) { - return 0; - } - fBenchmark.StartNewEvent(); - fBenchmark.Start(0); - - fChain->GetTPCMerger().Clear(); - - int32_t nSlicesSet = 0; - const AliHLTComponentBlockData* const blocksEnd = blocks + evtData.fBlockCnt; - for (const AliHLTComponentBlockData* block = blocks; block < blocksEnd; ++block) { - if (block->fDataType != GPUTPCDefinitions::fgkTrackletsDataType) { - continue; - } - - fBenchmark.AddInput(block->fSize); - - int32_t slice = AliHLTTPCDefinitions::GetMinSliceNr(*block); - if (slice < 0 || slice >= AliHLTTPCGeometry::GetNSlice()) { - HLTError("invalid slice number %d extracted from specification 0x%08lx, skipping block of type %s", slice, block->fSpecification, DataType2Text(block->fDataType).c_str()); - // just remember the error, if there are other valid blocks ignore the error, return code otherwise - iResult = -EBADF; - continue; - } - - if (slice != AliHLTTPCDefinitions::GetMaxSliceNr(*block)) { - // the code was not written for/ never used with multiple slices in one data block/ specification - HLTWarning("specification 0x%08lx indicates multiple slices in data block %s: never used before, please audit the code", block->fSpecification, DataType2Text(block->fDataType).c_str()); - } - GPUTPCSliceOutput* sliceOut = reinterpret_cast(block->fPtr); - fChain->GetTPCMerger().SetSliceData(slice, sliceOut); - nSlicesSet++; - } - if (nSlicesSet != 36) { - if (nSlicesSet != 0) { - HLTError("Incomplete input data"); - return (-EINVAL); - } - return 0; - } - fBenchmark.Start(1); - fChain->RunTPCTrackingMerger(); - if (fChain->CheckErrorCodes()) { - return (-EINVAL); - } - fBenchmark.Stop(1); - - // Fill output - uint32_t mySize = 0; - { - AliHLTTracksData* outPtr = (AliHLTTracksData*)(outputPtr); - AliHLTExternalTrackParam* currOutTrack = outPtr->fTracklets; - mySize = ((AliHLTUInt8_t*)currOutTrack) - ((AliHLTUInt8_t*)outputPtr); - outPtr->fCount = 0; - int32_t nTracks = fChain->GetTPCMerger().NOutputTracks(); - - for (int32_t itr = 0; itr < nTracks; itr++) { - // convert GPUTPCGMMergedTrack to AliHLTTrack - - const GPUTPCGMMergedTrack& track = fChain->GetTPCMerger().OutputTracks()[itr]; - if (!track.OK()) { - continue; - } - uint32_t dSize = sizeof(AliHLTExternalTrackParam) + track.NClusters() * sizeof(uint32_t); - - if (mySize + dSize > maxBufferSize) { - HLTWarning("Output buffer size exceed (buffer size %d, current size %d), %d tracks are not stored", maxBufferSize, mySize, nTracks - itr + 1); - iResult = -ENOSPC; - break; - } - - // first convert to AliExternalTrackParam - - AliExternalTrackParam tp; - track.GetParam().GetExtParam(tp, track.GetAlpha()); - - // normalize the angle to +-Pi - - currOutTrack->fAlpha = tp.GetAlpha() - CAMath::Round(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi(); - currOutTrack->fX = tp.GetX(); - currOutTrack->fY = tp.GetY(); - currOutTrack->fZ = tp.GetZ(); - currOutTrack->fLastX = track.LastX(); - currOutTrack->fLastY = track.LastY(); - currOutTrack->fLastZ = track.LastZ(); - - currOutTrack->fq1Pt = tp.GetSigned1Pt(); - currOutTrack->fSinPhi = tp.GetSnp(); - currOutTrack->fTgl = tp.GetTgl(); - for (int32_t i = 0; i < 15; i++) { - currOutTrack->fC[i] = tp.GetCovariance()[i]; - } - currOutTrack->fTrackID = itr; - currOutTrack->fFlags = 0; - currOutTrack->fNPoints = 0; - for (int32_t i = 0; i < track.NClusters(); i++) { - if (fChain->GetTPCMerger().Clusters()[track.FirstClusterRef() + i].state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - currOutTrack->fPointIDs[currOutTrack->fNPoints++] = fChain->GetTPCMerger().Clusters()[track.FirstClusterRef() + i].num; - } - dSize = sizeof(AliHLTExternalTrackParam) + currOutTrack->fNPoints * sizeof(uint32_t); - - currOutTrack = (AliHLTExternalTrackParam*)(((Byte_t*)currOutTrack) + dSize); - mySize += dSize; - outPtr->fCount++; - } - - AliHLTComponentBlockData resultData; - FillBlockData(resultData); - resultData.fOffset = 0; - resultData.fSize = mySize; - resultData.fDataType = kAliHLTDataTypeTrack | kAliHLTDataOriginTPC; - resultData.fSpecification = AliHLTTPCDefinitions::EncodeDataSpecification(0, 35, 0, 5); - outputBlocks.push_back(resultData); - fBenchmark.AddOutput(resultData.fSize); - - size = resultData.fSize; - } - - if (fNWays > 1 && fNWaysOuter) { - uint32_t newSize = 0; - AliHLTTracksData* outPtr = (AliHLTTracksData*)(outputPtr + size); - AliHLTExternalTrackParam* currOutTrack = outPtr->fTracklets; - newSize = ((AliHLTUInt8_t*)currOutTrack) - (outputPtr + size); - outPtr->fCount = 0; - int32_t nTracks = fChain->GetTPCMerger().NOutputTracks(); - - for (int32_t itr = 0; itr < nTracks; itr++) { - const GPUTPCGMMergedTrack& track = fChain->GetTPCMerger().OutputTracks()[itr]; - if (!track.OK()) { - continue; - } - uint32_t dSize = sizeof(AliHLTExternalTrackParam); - - if (mySize + newSize + dSize > maxBufferSize) { - HLTWarning("Output buffer size exceed (buffer size %d, current size %d), %d tracks are not stored", maxBufferSize, mySize + newSize + dSize, nTracks - itr + 1); - iResult = -ENOSPC; - break; - } - - // first convert to AliExternalTrackParam - - AliExternalTrackParam tp; - track.GetParam().GetExtParam(tp, track.GetAlpha()); - - // normalize the angle to +-Pi - - currOutTrack->fAlpha = track.OuterParam().alpha - CAMath::Round(tp.GetAlpha() / CAMath::TwoPi()) * CAMath::TwoPi(); - currOutTrack->fX = track.OuterParam().X; - currOutTrack->fY = track.OuterParam().P[0]; - currOutTrack->fZ = track.OuterParam().P[1]; - currOutTrack->fLastX = track.LastX(); - currOutTrack->fLastY = track.LastY(); - currOutTrack->fLastZ = track.LastZ(); - - currOutTrack->fq1Pt = track.OuterParam().P[4]; - currOutTrack->fSinPhi = track.OuterParam().P[2]; - currOutTrack->fTgl = track.OuterParam().P[3]; - for (int32_t i = 0; i < 15; i++) { - currOutTrack->fC[i] = track.OuterParam().C[i]; - } - currOutTrack->fTrackID = itr; - currOutTrack->fFlags = 0; - currOutTrack->fNPoints = 0; - - currOutTrack = (AliHLTExternalTrackParam*)(((Byte_t*)currOutTrack) + dSize); - newSize += dSize; - outPtr->fCount++; - } - - AliHLTComponentBlockData resultData; - FillBlockData(resultData); - resultData.fOffset = mySize; - resultData.fSize = newSize; - resultData.fDataType = AliHLTTPCDefinitions::TracksOuterDataType() | kAliHLTDataOriginTPC; - resultData.fSpecification = AliHLTTPCDefinitions::EncodeDataSpecification(0, 35, 0, 5); - outputBlocks.push_back(resultData); - fBenchmark.AddOutput(resultData.fSize); - - size = resultData.fSize; - } - - HLTInfo("CAGlobalMerger:: output %d tracks / %d hits", fChain->GetTPCMerger().NOutputTracks(), fChain->GetTPCMerger().NOutputTrackClusters()); - - if (fNoClear) { - fgCurrentMergerReconstruction = fChain; - } else { - fChain->GetTPCMerger().Clear(); - } - - fBenchmark.Stop(0); - HLTInfo(fBenchmark.GetStatistics()); - return iResult; -} - -const GPUTPCGMMerger* GPUTPCGlobalMergerComponent::GetCurrentMerger() -{ - if (fgCurrentMergerReconstruction == nullptr) { - return nullptr; - } - return &fgCurrentMergerReconstruction->GetTPCMerger(); -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.h b/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.h deleted file mode 100644 index b34c5d3d799b6..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalMergerComponent.h +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGlobalMergerComponent.h -/// \author David Rohr, Sergey Gorbunov, Matthias Kretz - -#ifndef GPUTPCGLOBALMERGERCOMPONENT_H -#define GPUTPCGLOBALMERGERCOMPONENT_H - -/// @file GPUTPCGlobalMergerComponent.h -/// @author Matthias Kretz -/// @date -/// @brief HLT TPC CA global merger component. -/// - -#ifndef GPUCA_ALIROOT_LIB -#define GPUCA_ALIROOT_LIB -#endif - -#include "AliHLTProcessor.h" -#include "AliHLTComponentBenchmark.h" -#include "GPUParam.h" - -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -class GPUTPCGMMerger; -class GPUReconstruction; -class GPUChainTracking; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -/** - * @class GPUTPCGlobalMergerComponent - * The TPC global merger component - * - * Interface to the global merger of the CA tracker for HLT. - */ -class GPUTPCGlobalMergerComponent : public AliHLTProcessor -{ - public: - /** - * Constructs a GPUTPCGlobalMergerComponent. - */ - GPUTPCGlobalMergerComponent(); - - /** - * Destructs the GPUTPCGlobalMergerComponent - */ - virtual ~GPUTPCGlobalMergerComponent(); - - // Public functions to implement AliHLTComponent's interface. - // These functions are required for the registration process - - /** - * @copydoc AliHLTComponent::GetComponentID - */ - const char* GetComponentID(); - - /** - * @copydoc AliHLTComponent::GetInputDataTypes - */ - void GetInputDataTypes(AliHLTComponentDataTypeList& list); - int32_t GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList); - - /** - * @copydoc AliHLTComponent::GetOutputDataType - */ - AliHLTComponentDataType GetOutputDataType(); - - /** - * @copydoc AliHLTComponent::GetOutputDataSize - */ - virtual void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - - /** - * @copydoc AliHLTComponent::Spawn - */ - AliHLTComponent* Spawn(); - - static const GPUCA_NAMESPACE::gpu::GPUTPCGMMerger* GetCurrentMerger(); - - protected: - // Protected functions to implement AliHLTComponent's interface. - // These functions provide initialization as well as the actual processing - // capabilities of the component. - - /** - * @copydoc AliHLTComponent::DoInit - */ - int32_t DoInit(int argc, const char** argv); - - /** - * @copydoc AliHLTComponent::DoDeinit - */ - int32_t DoDeinit(); - - /** reconfigure **/ - int32_t Reconfigure(const char* cdbEntry, const char* chainId); - - /** - * @copydoc @ref AliHLTProcessor::DoEvent - */ - int32_t DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& trigData, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, AliHLTComponentBlockDataList& outputBlocks); - - using AliHLTProcessor::DoEvent; - - private: - static GPUTPCGlobalMergerComponent fgGPUTPCGlobalMergerComponent; - - // disable copy - GPUTPCGlobalMergerComponent(const GPUTPCGlobalMergerComponent&); - GPUTPCGlobalMergerComponent& operator=(const GPUTPCGlobalMergerComponent&); - - /** set configuration parameters **/ - void SetDefaultConfiguration(); - int32_t ReadConfigurationString(const char* arguments); - int32_t ReadCDBEntry(const char* cdbEntry, const char* chainId); - int32_t Configure(const char* cdbEntry, const char* chainId, const char* commandLine); - - /** the global merger object */ - - double fSolenoidBz; // magnetic field - double fClusterErrorCorrectionY; // correction for the cluster error during pre-fit - double fClusterErrorCorrectionZ; // correction for the cluster error during pre-fit - int32_t fNWays; // Setting for merger - int8_t fNWaysOuter; // Store outer param after n-way fit - bool fNoClear; // Do not clear memory after processing an event - static const GPUCA_NAMESPACE::gpu::GPUChainTracking* fgCurrentMergerReconstruction; // Pointer to current merger in case memory is not cleared after processing the event - AliHLTComponentBenchmark fBenchmark; // benchmark - GPUCA_NAMESPACE::gpu::GPUParam mParam; // ca params - GPUCA_NAMESPACE::gpu::GPUReconstruction* fRec; // GPUReconstruction - GPUCA_NAMESPACE::gpu::GPUChainTracking* fChain; - - ClassDef(GPUTPCGlobalMergerComponent, 0); -}; - -#endif // GPUTPCGLOBALMERGERCOMPONENT_H diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 8cca91c0a0033..643ca7b7a99df 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -30,7 +30,7 @@ #include "GPUTrackParamConvert.h" #include "GPUCommonTypeTraits.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::track; using namespace o2::base; using namespace o2::tpc; diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx index f7e3bca47a0fc..f99544f239bb7 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefitKernel.cxx @@ -16,7 +16,7 @@ #include "GPUTrackingRefitKernel.h" #include "GPUTrackingRefit.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template GPUdii() void GPUTrackingRefitKernel::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h index c2fc7e58061da..0eabd82e59a02 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h @@ -17,7 +17,7 @@ #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -81,6 +81,6 @@ struct GPUTPCBaseTrackParam { float mP[5]; // 'active' track parameters: Y, Z, SinPhi, DzDs, q/Pt }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h b/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h index cf35efbd8af6c..1961ffabd791c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h @@ -17,7 +17,7 @@ #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -38,6 +38,6 @@ struct GPUTPCClusterData { #endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // CLUSTERDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx index 18813d53932d8..bada60b9cec80 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCreateOccupancyMap.h" #include "GPUTPCClusterOccupancyMap.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCCreateOccupancyMap::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUTPCClusterOccupancyMapBin* GPUrestrict() map) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h index 8b96ad8e74183..91f5816f69df2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUTPCClusterOccupancyMapBin; @@ -34,6 +34,6 @@ class GPUTPCCreateOccupancyMap : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx index 5c3e473aab0c9..bd33927408a26 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx @@ -16,7 +16,7 @@ #include "GPUTPCTracker.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h index 2789621a7de5c..9065b220bb44d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -43,6 +43,6 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCCREATESLICEDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h b/GPU/GPUTracking/SliceTracker/GPUTPCDef.h index a134954451e8c..4b4f130faed65 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCDef.h @@ -21,7 +21,7 @@ #define CALINK_INVAL ((calink) -1) #define CALINK_DEAD_CHANNEL ((calink) -2) -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -34,7 +34,7 @@ typedef uint32_t cahit; #endif struct cahit2 { cahit x, y; }; } -} // GPUCA_NAMESPACE::GPU +} // o2::GPU #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME // Needs full clusterdata #define GPUCA_FULL_CLUSTERDATA diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx index c86249fbb6f77..cdc72047ef0a4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx @@ -20,7 +20,7 @@ #include "GPUCommonMath.h" #include "GPUParam.inc" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h index 5dc469e2654f5..c3f765f42cec5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h @@ -18,7 +18,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -62,6 +62,6 @@ class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx index 56d2e88db1c28..367a447b25a89 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx @@ -14,7 +14,7 @@ #include "GPUTPCGrid.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #if !defined(assert) && !defined(GPUCA_GPUCODE) #include diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h index a3cd7916f0e6d..1fbb1c5a23c45 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h @@ -17,7 +17,7 @@ #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -67,6 +67,6 @@ class GPUTPCGrid float mStepZInv; //* inverse bin size in Z }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCGRID_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHit.h b/GPU/GPUTracking/SliceTracker/GPUTPCHit.h index 0fe86f8ef21a3..34a59b2f08dd2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCHit.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCHit.h @@ -17,7 +17,7 @@ #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -44,6 +44,6 @@ class GPUTPCHit friend class GPUTPCNeighboursFinder; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCHIT_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h b/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h index 9fb27d7a1a892..19cfde1d76f4b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h @@ -15,7 +15,7 @@ #ifndef GPUTPCHITID_H #define GPUTPCHITID_H -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -30,6 +30,6 @@ class GPUTPCHitId int32_t mId; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCHITID_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h b/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h index 25b423b965f07..13f2753db6c93 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h @@ -15,7 +15,7 @@ #ifndef GPUTPCMCINFO_H #define GPUTPCMCINFO_H -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -40,6 +40,6 @@ struct GPUTPCMCInfoCol { uint32_t num; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx index 9293801f5f5f9..04b303949e9f4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx @@ -15,7 +15,7 @@ #include "GPUTPCNeighboursCleaner.h" #include "GPUTPCTracker.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCNeighboursCleaner::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h index 311fef5204c3d..1682e18244732 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -49,6 +49,6 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCNEIGHBOURSCLEANER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx index 69d05fc3176b4..36254243e81b8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx @@ -17,7 +17,7 @@ #include "GPUTPCTracker.h" //#include "GPUCommonMath.h" #include "GPUDefMacros.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h index 882428821ae84..2d71d948ad9e1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -60,6 +60,6 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCNEIGHBOURSFINDER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx index 8ee5e2cbddd62..3d0102f2938e6 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx @@ -13,7 +13,7 @@ /// \author Sergey Gorbunov, Ivan Kisel, David Rohr #include "GPUTPCRow.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #if !defined(GPUCA_GPUCODE) GPUTPCRow::GPUTPCRow() : mNHits(0), mX(0), mMaxY(0), mGrid(), mHy0(0), mHz0(0), mHstepY(0), mHstepZ(0), mHstepYi(0), mHstepZi(0), mHitNumberOffset(0), mFirstHitInBinOffset(0) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h index 7c8e96c8352a8..d401311683f28 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCRow.h @@ -18,7 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGrid.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -79,6 +79,6 @@ class GPUTPCRow uint32_t mFirstHitInBinOffset; // offset in Tracker::mRowData to find the FirstHitInBin }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCROW_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx index ba5da49ff6ff9..9f06b00f30c3f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx @@ -22,7 +22,7 @@ #include "GPUCommonAlgorithm.h" #include "GPUTPCSectorDebugSortKernels.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h index 04001603a2a28..5617f9745311e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCTracker; @@ -38,6 +38,6 @@ class GPUTPCSectorDebugSortKernels : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUTPCSECTORDEBUGSORTKERNELS_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index 48c490a6f5559..e02cba2144920 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -28,7 +28,7 @@ #include "GPUReconstruction.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifndef GPUCA_GPUCODE diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h index c45c35c667f25..72e9f9d2c19d5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h @@ -21,7 +21,7 @@ #include "GPUParam.h" #include "GPUProcessor.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -192,6 +192,6 @@ GPUdi() void GPUTPCSliceData::SetHitWeight(const GPUTPCRow& row, uint32_t hitInd GPUdi() int32_t GPUTPCSliceData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCSLICEDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h index 59d079d7e6328..1d958de1ff7a4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h @@ -17,7 +17,7 @@ #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -64,6 +64,6 @@ class GPUTPCSliceOutCluster #endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx index b7f876dc87e2e..06b87c7a682d3 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx @@ -17,7 +17,7 @@ #include "GPUCommonMath.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; uint32_t GPUTPCSliceOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h index 6d322601789b6..3b5712ccbb8f4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h @@ -18,7 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCTrack.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -78,5 +78,5 @@ class GPUTPCSliceOutput size_t mMemorySize; // Amount of memory really used }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx index 2b097ab8f1835..7b60e0621e78f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx @@ -16,7 +16,7 @@ #include "GPUTPCTracker.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h index ed49fad8fc6dc..f818e6986dbc6 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -49,6 +49,6 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCSTARTHITSFINDER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx index 84ad70b58b964..034fa8f053f42 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx @@ -19,7 +19,7 @@ #include "GPUCommonMath.h" #include "GPUDefMacros.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCStartHitsSorter::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h index 4c937b0414e30..0877b6c15a511 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -49,6 +49,6 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCSTARTHITSSORTER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx index 573c1f6f9c8ba..72cabd7c35ad2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx @@ -13,4 +13,4 @@ /// \author Sergey Gorbunov, David Rohr #include "GPUTPCTrack.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h index 9553435fc49ab..18418bc031d7e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h @@ -19,7 +19,7 @@ #include "GPUTPCDef.h" #include "GPUTPCSliceOutCluster.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -68,6 +68,6 @@ class GPUTPCTrack private: }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACK_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h index 972c62ffe7e20..c9ab6158179bd 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h @@ -17,7 +17,7 @@ #include "GPUTPCTrackParam.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -88,6 +88,6 @@ GPUdi() void GPUTPCTrackLinearisation::Set(float SinPhi1, float CosPhi1, float D SetQPt(QPt1); } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKLINEARISATION_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx index 5c1c99c4d75b2..68ced574a18a9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx @@ -16,7 +16,7 @@ #include "GPUTPCTrackParam.h" #include "GPUTPCGeometry.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; // // Circle in XY: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h index 792cba4f519e1..72f9d5fbaa23d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h @@ -19,7 +19,7 @@ #include "GPUTPCDef.h" #include "GPUCommonMath.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -182,6 +182,6 @@ GPUdi() void GPUTPCTrackParam::InitParam() SetZOffset(0); } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKPARAM_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index 4970ff90a934c..c038146cf8497 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -34,7 +34,7 @@ #include "GPUMemorySizeScalers.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index bd1ca018186e4..10259c80ac80c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -27,7 +27,7 @@ #include "GPUTPCTracklet.h" #include "GPUProcessor.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -281,6 +281,6 @@ class GPUTPCTracker : public GPUProcessor static int32_t StarthitSortComparison(const void* a, const void* b); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKER_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.cxx deleted file mode 100644 index 4539cf334bddd..0000000000000 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.cxx +++ /dev/null @@ -1,709 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCTrackerComponent.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUTPCTrackerComponent.h" -#include "GPUReconstruction.h" -#include "GPUChainTracking.h" -#include "GPUParam.h" - -#include "AliHLTTPCRawCluster.h" -#include "AliHLTTPCClusterXYZ.h" -#include "AliHLTTPCClusterMCData.h" -#include "AliHLTTPCGeometry.h" -#include "AliHLTTPCDefinitions.h" -#include "GPUTPCDefinitions.h" -#include "AliExternalTrackParam.h" -#include "TMath.h" -#include "AliCDBEntry.h" -#include "AliCDBManager.h" -#include "TObjString.h" -#include "TObjArray.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCClusterData.h" -#include "GPUTPCGMMergedTrackHit.h" -#if __GNUC__ >= 3 -using namespace std; -#endif - -using namespace GPUCA_NAMESPACE::gpu; - -const AliHLTComponentDataType GPUTPCDefinitions::fgkTrackletsDataType = AliHLTComponentDataTypeInitializer("CATRACKL", kAliHLTDataOriginTPC); - -/** ROOT macro for the implementation of ROOT specific class methods */ -ClassImp(GPUTPCTrackerComponent); - -GPUTPCTrackerComponent::GPUTPCTrackerComponent() - : fSolenoidBz(0), fMinNTrackClusters(-1), fMinTrackPt(GPUCA_MIN_TRACK_PTB5_DEFAULT), fClusterZCut(500.), mNeighboursSearchArea(0), fClusterErrorCorrectionY(0), fClusterErrorCorrectionZ(0), fBenchmark("CATracker"), fAllowGPU(0), fGPUHelperThreads(-1), fCPUTrackers(0), fGlobalTracking(0), fGPUDeviceNum(-1), fGPUType("CPU"), fGPUStuckProtection(0), fAsync(0), fSearchWindowDZDR(0.), fRec(0), fChain(0), fAsyncProcessor() -{ - // see header file for class documentation - // or - // refer to README to build package - // or - // visit http://web.ift.uib.no/~kjeks/doc/alice-hlt -} - -GPUTPCTrackerComponent::GPUTPCTrackerComponent(const GPUTPCTrackerComponent&) - : AliHLTProcessor(), fSolenoidBz(0), fMinNTrackClusters(-1), fMinTrackPt(GPUCA_MIN_TRACK_PTB5_DEFAULT), fClusterZCut(500.), mNeighboursSearchArea(0), fClusterErrorCorrectionY(0), fClusterErrorCorrectionZ(0), fBenchmark("CATracker"), fAllowGPU(0), fGPUHelperThreads(-1), fCPUTrackers(0), fGlobalTracking(0), fGPUDeviceNum(-1), fGPUType("CPU"), fGPUStuckProtection(0), fAsync(0), fSearchWindowDZDR(0.), fRec(0), fChain(0), fAsyncProcessor() -{ - // see header file for class documentation - HLTFatal("copy constructor untested"); -} - -GPUTPCTrackerComponent& GPUTPCTrackerComponent::operator=(const GPUTPCTrackerComponent&) -{ - // see header file for class documentation - HLTFatal("assignment operator untested"); - return *this; -} - -GPUTPCTrackerComponent::~GPUTPCTrackerComponent() -{ - // see header file for class documentation - if (fRec) { - delete fRec; - } -} - -// -// Public functions to implement AliHLTComponent's interface. -// These functions are required for the registration process -// - -const char* GPUTPCTrackerComponent::GetComponentID() -{ - // see header file for class documentation - return "TPCCATracker"; -} - -void GPUTPCTrackerComponent::GetInputDataTypes(vector& list) -{ - // see header file for class documentation - list.clear(); - list.push_back(AliHLTTPCDefinitions::RawClustersDataType()); - list.push_back(AliHLTTPCDefinitions::ClustersXYZDataType()); - list.push_back(AliHLTTPCDefinitions::AliHLTDataTypeClusterMCInfo()); -} - -AliHLTComponentDataType GPUTPCTrackerComponent::GetOutputDataType() -{ - // see header file for class documentation - return GPUTPCDefinitions::fgkTrackletsDataType; -} - -void GPUTPCTrackerComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - // define guess for the output data size - constBase = 10000; // minimum size - inputMultiplier = 0.6; // size relative to input -} - -AliHLTComponent* GPUTPCTrackerComponent::Spawn() -{ - // see header file for class documentation - return new GPUTPCTrackerComponent; -} - -void GPUTPCTrackerComponent::SetDefaultConfiguration() -{ - // Set default configuration for the CA tracker component - // Some parameters can be later overwritten from the OCDB - - fSolenoidBz = -5.00668; - fMinNTrackClusters = -1; - fMinTrackPt = GPUCA_MIN_TRACK_PTB5_DEFAULT; - fClusterZCut = 500.; - mNeighboursSearchArea = 0; - fClusterErrorCorrectionY = 0; - fClusterErrorCorrectionZ = 0; - fBenchmark.Reset(); - fBenchmark.SetTimer(0, "total"); - fBenchmark.SetTimer(1, "reco"); -} - -int32_t GPUTPCTrackerComponent::ReadConfigurationString(const char* arguments) -{ - // Set configuration parameters for the CA tracker component from the string - - int32_t iResult = 0; - if (!arguments) { - return iResult; - } - - TString allArgs = arguments; - TString argument; - int32_t bMissingParam = 0; - - TObjArray* pTokens = allArgs.Tokenize(" "); - - int32_t nArgs = pTokens ? pTokens->GetEntries() : 0; - - for (int32_t i = 0; i < nArgs; i++) { - argument = ((TObjString*)pTokens->At(i))->GetString(); - if (argument.IsNull()) { - continue; - } - - if (argument.CompareTo("-solenoidBz") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - HLTWarning("argument -solenoidBz is deprecated, magnetic field set up globally (%f)", GetBz()); - continue; - } - - if (argument.CompareTo("-minNClustersOnTrack") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fMinNTrackClusters = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - HLTInfo("minNClustersOnTrack set to: %d", fMinNTrackClusters); - continue; - } - - if (argument.CompareTo("-minTrackPt") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fMinTrackPt = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("minTrackPt set to: %f", fMinTrackPt); - continue; - } - - if (argument.CompareTo("-clusterZCut") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fClusterZCut = TMath::Abs(((TObjString*)pTokens->At(i))->GetString().Atof()); - HLTInfo("ClusterZCut set to: %f", fClusterZCut); - continue; - } - - if (argument.CompareTo("-neighboursSearchArea") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - mNeighboursSearchArea = TMath::Abs(((TObjString*)pTokens->At(i))->GetString().Atof()); - HLTInfo("NeighboursSearchArea set to: %f", mNeighboursSearchArea); - continue; - } - - if (argument.CompareTo("-errorCorrectionY") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fClusterErrorCorrectionY = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("Cluster Y error correction factor set to: %f", fClusterErrorCorrectionY); - continue; - } - - if (argument.CompareTo("-errorCorrectionZ") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fClusterErrorCorrectionZ = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("Cluster Z error correction factor set to: %f", fClusterErrorCorrectionZ); - continue; - } - - if (argument.CompareTo("-allowGPU") == 0) { - fAllowGPU = 1; - HLTInfo("Will try to run tracker on GPU"); - continue; - } - - if (argument.CompareTo("-GlobalTracking") == 0) { - fGlobalTracking = 1; - HLTInfo("Global Tracking Activated"); - continue; - } - - if (argument.CompareTo("-GPUHelperThreads") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fGPUHelperThreads = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - HLTInfo("Number of GPU Helper Threads set to: %d", fGPUHelperThreads); - continue; - } - - if (argument.CompareTo("-CPUTrackers") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fCPUTrackers = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - HLTInfo("Number of CPU Trackers set to: %d", fCPUTrackers); - continue; - } - - if (argument.CompareTo("-SearchWindowDZDR") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fSearchWindowDZDR = ((TObjString*)pTokens->At(i))->GetString().Atof(); - HLTInfo("Search Window DZDR set to: %f", fSearchWindowDZDR); - continue; - } - - if (argument.CompareTo("-GPUDeviceNum") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fGPUDeviceNum = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - HLTInfo("Using GPU Device Number %d", fGPUDeviceNum); - continue; - } - - if (argument.CompareTo("-GPUType") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fGPUType = ((TObjString*)pTokens->At(i))->GetString(); - continue; - } - - if (argument.CompareTo("-GPUStuckProtection") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fGPUStuckProtection = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - continue; - } - - if (argument.CompareTo("-AsyncGPUStuckProtection") == 0) { - if ((bMissingParam = (++i >= pTokens->GetEntries()))) { - break; - } - fAsync = ((TObjString*)pTokens->At(i))->GetString().Atoi(); - continue; - } - - HLTError("Unknown option \"%s\"", argument.Data()); - iResult = -EINVAL; - } - delete pTokens; - - if (bMissingParam) { - HLTError("Specifier missed for parameter \"%s\"", argument.Data()); - iResult = -EINVAL; - } - - return iResult; -} - -int32_t GPUTPCTrackerComponent::ReadCDBEntry(const char* cdbEntry, const char* chainId) -{ - // see header file for class documentation - - const char* defaultNotify = ""; - - if (!cdbEntry) { - cdbEntry = "HLT/ConfigTPC/TPCCATracker"; - defaultNotify = " (default)"; - chainId = 0; - } - - HLTInfo("configure from entry \"%s\"%s, chain id %s", cdbEntry, defaultNotify, (chainId != nullptr && chainId[0] != 0) ? chainId : ""); - AliCDBEntry* pEntry = AliCDBManager::Instance()->Get(cdbEntry); //,GetRunNo()); - - if (!pEntry) { - HLTError("cannot fetch object \"%s\" from CDB", cdbEntry); - return -EINVAL; - } - - TObjString* pString = dynamic_cast(pEntry->GetObject()); - - if (!pString) { - HLTError("configuration object \"%s\" has wrong type, required TObjString", cdbEntry); - return -EINVAL; - } - - HLTInfo("received configuration object string: \"%s\"", pString->GetString().Data()); - - return ReadConfigurationString(pString->GetString().Data()); -} - -int32_t GPUTPCTrackerComponent::Configure(const char* cdbEntry, const char* chainId, const char* commandLine) -{ - // Configure the component - // There are few levels of configuration, - // parameters which are set on one step can be overwritten on the next step - - //* read hard-coded values - SetDefaultConfiguration(); - - //* read the default CDB entry - int32_t iResult1 = ReadCDBEntry(nullptr, chainId); - - //* read magnetic field - fSolenoidBz = GetBz(); - - //* read the actual CDB entry if required - int32_t iResult2 = (cdbEntry) ? ReadCDBEntry(cdbEntry, chainId) : 0; - - //* read extra parameters from input (if they are) - int32_t iResult3 = 0; - - if (commandLine && commandLine[0] != '\0') { - HLTInfo("received configuration string from HLT framework: \"%s\"", commandLine); - iResult3 = ReadConfigurationString(commandLine); - } - - if (fRec) { - ConfigureSlices(); - } - - return iResult1 ? iResult1 : (iResult2 ? iResult2 : iResult3); -} - -int32_t GPUTPCTrackerComponent::ConfigureSlices() -{ - // Initialize the tracker slices - GPUSettingsRec rec; - GPUSettingsGRP grp; - GPUSettingsProcessing devProc; - - grp.solenoidBzNominalGPU = fSolenoidBz; - grp.grpContinuousMaxTimeBin = 0; // triggered events - if (mNeighboursSearchArea > 0) { - rec.tpc.neighboursSearchArea = mNeighboursSearchArea; - } - if (fClusterErrorCorrectionY > 1.e-4) { - rec.tpc.clusterError2CorrectionY = fClusterErrorCorrectionY * fClusterErrorCorrectionY; - } - if (fClusterErrorCorrectionZ > 1.e-4) { - rec.tpc.clusterError2CorrectionZ = fClusterErrorCorrectionZ * fClusterErrorCorrectionZ; - } - rec.tpc.minNClustersTrackSeed = fMinNTrackClusters; - rec.SetMinTrackPtB5(fMinTrackPt * fabsf(fSolenoidBz / 5)); - rec.tpc.searchWindowDZDR = fSearchWindowDZDR; - devProc.nDeviceHelperThreads = fGPUHelperThreads; - rec.tpc.globalTracking = fGlobalTracking; - devProc.stuckProtection = fGPUStuckProtection; - rec.nonConsecutiveIDs = true; - rec.tpc.mergerReadFromTrackerDirectly = false; - devProc.ompThreads = 1; - devProc.ompKernels = false; - - GPURecoStepConfiguration steps; - steps.steps.set(GPUDataTypes::RecoStep::TPCSliceTracking); - steps.inputs.set(GPUDataTypes::InOutType::TPCClusters); - steps.outputs.set(GPUDataTypes::InOutType::TPCSectorTracks); - - fRec->SetSettings(&grp, &rec, &devProc, &steps); - fChain->LoadClusterErrors(); - return fRec->Init(); -} - -void* GPUTPCTrackerComponent::TrackerInit(void* par) -{ - // Create tracker instance and set parameters - fRec = GPUReconstruction::CreateInstance(fAllowGPU ? fGPUType.Data() : "CPU", true); - if (fRec == nullptr) { - return ((void*)-1); - } - fChain = fRec->AddChain(); - - if (ConfigureSlices()) { - return ((void*)-1); - } - return (nullptr); -} - -int32_t GPUTPCTrackerComponent::DoInit(int argc, const char** argv) -{ - if (fRec) { - return EINPROGRESS; - } - - // Configure the CA tracker component - TString arguments = ""; - for (int32_t i = 0; i < argc; i++) { - if (!arguments.IsNull()) { - arguments += " "; - } - arguments += argv[i]; - } - - int32_t retVal = Configure(nullptr, nullptr, arguments.Data()); - if (retVal == 0) { - if (fAsync) { - if (fAsyncProcessor.Initialize(1)) { - return (-ENODEV); - } - void* initRetVal; - if (fAsyncProcessor.InitializeAsyncMemberTask(this, &GPUTPCTrackerComponent::TrackerInit, nullptr, &initRetVal) != 0) { - return (-ENODEV); - } - if (initRetVal) { - return (-ENODEV); - } - } else { - if (TrackerInit(nullptr) != nullptr) { - return (-ENODEV); - } - } - } - - return (retVal); -} - -void* GPUTPCTrackerComponent::TrackerExit(void* par) -{ - if (fRec) { - delete fRec; - } - fRec = nullptr; - return (nullptr); -} - -int32_t GPUTPCTrackerComponent::DoDeinit() -{ - // see header file for class documentation - if (fAsync) { - void* initRetVal = nullptr; - fAsyncProcessor.InitializeAsyncMemberTask(this, &GPUTPCTrackerComponent::TrackerExit, nullptr, &initRetVal); - fAsyncProcessor.Deinitialize(); - } else { - TrackerExit(nullptr); - } - return 0; -} - -int32_t GPUTPCTrackerComponent::Reconfigure(const char* cdbEntry, const char* chainId) -{ - // Reconfigure the component from OCDB . - return Configure(cdbEntry, chainId, nullptr); -} - -int32_t GPUTPCTrackerComponent::DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& /*trigData*/, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, vector& outputBlocks) -{ - //* process event - if (!fRec) { - HLTError("CATracker not initialized properly"); - return -ENOENT; - } - - AliHLTTPCTrackerWrapperData tmpPar; - tmpPar.fEvtData = &evtData; - tmpPar.fBlocks = blocks; - tmpPar.mOutputPtr = outputPtr; - tmpPar.fSize = &size; - tmpPar.mOutputBlocks = &outputBlocks; - - static int32_t trackerTimeout = 0; - if (trackerTimeout) { - size = 0; - return (0); - } - - int32_t retVal; - if (fAsync) { - void* asyncRetVal = nullptr; - if (fAsyncProcessor.InitializeAsyncMemberTask(this, &GPUTPCTrackerComponent::TrackerDoEvent, &tmpPar, &asyncRetVal, fAsync) != 0) { - HLTError("Tracking timed out, disabling this tracker instance"); - trackerTimeout = 1; - size = 0; - return (-ENODEV); - } else { - retVal = (int32_t)(size_t)asyncRetVal; - } - } else { - retVal = (int32_t)(size_t)TrackerDoEvent(&tmpPar); - } - return (retVal); -} - -void* GPUTPCTrackerComponent::TrackerDoEvent(void* par) -{ - AliHLTTPCTrackerWrapperData* tmpPar = reinterpret_cast(par); - - const AliHLTComponentEventData& evtData = *(tmpPar->fEvtData); - const AliHLTComponentBlockData* blocks = tmpPar->fBlocks; - AliHLTUInt8_t* outputPtr = tmpPar->mOutputPtr; - AliHLTUInt32_t& size = *(tmpPar->fSize); - vector& outputBlocks = *(tmpPar->mOutputBlocks); - - AliHLTUInt32_t maxBufferSize = size; - size = 0; // output size - - if (GetFirstInputBlock(kAliHLTDataTypeSOR) || GetFirstInputBlock(kAliHLTDataTypeEOR)) { - return 0; - } - - fBenchmark.StartNewEvent(); - fBenchmark.Start(0); - - // Logging( kHLTLogWarning, "HLT::TPCCATracker::DoEvent", "DoEvent", "CA::DoEvent()" ); - if (evtData.fBlockCnt <= 0) { - HLTWarning("no blocks in event"); - return 0; - } - - // Prepare everything for all slices - const AliHLTTPCClusterXYZData* clustersXYZ[NSLICES][fgkNPatches] = {nullptr}; - const AliHLTTPCRawClusterData* clustersRaw[NSLICES][fgkNPatches] = {nullptr}; - - for (uint64_t ndx = 0; ndx < evtData.fBlockCnt; ndx++) { - const AliHLTComponentBlockData& pBlock = blocks[ndx]; - int32_t slice = AliHLTTPCDefinitions::GetMinSliceNr(pBlock); - int32_t patch = AliHLTTPCDefinitions::GetMinPatchNr(pBlock); - if (pBlock.fDataType == AliHLTTPCDefinitions::RawClustersDataType()) { - clustersRaw[slice][patch] = (const AliHLTTPCRawClusterData*)pBlock.fPtr; - } else if (pBlock.fDataType == AliHLTTPCDefinitions::ClustersXYZDataType()) { - clustersXYZ[slice][patch] = (const AliHLTTPCClusterXYZData*)pBlock.fPtr; - } - } - - GPUTPCClusterData* clusterData[NSLICES] = {nullptr}; - int32_t nClusters[NSLICES] = {0}; - - int32_t nClustersTotal = 0; - for (int32_t slice = 0; slice < NSLICES; slice++) { - int32_t nClustersSliceTotal = 0; - for (int32_t patch = 0; patch < 6; patch++) { - if (clustersXYZ[slice][patch]) { - nClustersSliceTotal += clustersXYZ[slice][patch]->fCount; - } - } - if (nClustersSliceTotal > 500000) { - HLTWarning("Too many clusters in tracker input: Slice %d, Number of Clusters %d, slice not included in tracking", slice, nClustersSliceTotal); - nClusters[slice] = nClustersSliceTotal; - } else if (nClustersSliceTotal == 0) { - nClusters[slice] = nClustersSliceTotal; - } else { - clusterData[slice] = new GPUTPCClusterData[nClustersSliceTotal]; - nClusters[slice] = nClustersSliceTotal; - GPUTPCClusterData* pCluster = clusterData[slice]; - for (int32_t patch = 0; patch < 6; patch++) { - if (clustersXYZ[slice][patch] != nullptr && clustersRaw[slice][patch] != nullptr) { - const AliHLTTPCClusterXYZData& clXYZ = *clustersXYZ[slice][patch]; - const AliHLTTPCRawClusterData& clRaw = *clustersRaw[slice][patch]; - - if (clXYZ.fCount != clRaw.fCount) { - HLTError("Number of entries in raw and xyz clusters are not mached %d vs %d", clXYZ.fCount, clRaw.fCount); - continue; - } - - const int32_t firstRow = AliHLTTPCGeometry::GetFirstRow(patch); - for (int32_t ic = 0; ic < clXYZ.fCount; ic++) { - const AliHLTTPCClusterXYZ& c = clXYZ.fClusters[ic]; - const AliHLTTPCRawCluster& cRaw = clRaw.fClusters[ic]; - if (c.GetZ() > fClusterZCut || c.GetZ() < -fClusterZCut) { - continue; - } - if (c.GetX() < 1.f) { - continue; // cluster xyz position was not calculated for whatever reason - } - pCluster->id = AliHLTTPCGeometry::CreateClusterID(slice, patch, ic); - pCluster->x = c.GetX(); - pCluster->y = c.GetY(); - pCluster->z = c.GetZ(); - pCluster->row = firstRow + cRaw.GetPadRow(); - pCluster->flags = cRaw.GetFlags(); - if (cRaw.GetSigmaPad2() < kAlmost0 || cRaw.GetSigmaTime2() < kAlmost0) { - pCluster->flags |= GPUTPCGMMergedTrackHit::flagSingle; - } - pCluster->amp = cRaw.GetCharge(); -#ifdef GPUCA_FULL_CLUSTERDATA - pCluster->pad = cRaw.GetPad(); - pCluster->time = cRaw.GetTime(); - pCluster->ampMax = cRaw.GetQMax(); - pCluster->sigmaPad2 = cRaw.GetSigmaPad2(); - pCluster->sigmaTime2 = cRaw.GetSigmaTime2(); -#endif - pCluster++; - } - } - } - nClusters[slice] = pCluster - clusterData[slice]; - nClustersTotal += nClusters[slice]; - HLTDebug("Read %d->%d hits for slice %d", nClustersSliceTotal, nClusters[slice], slice); - } - } - - if (nClustersTotal == 0) { - // No input, skip processing - fBenchmark.Stop(0); - return (0); - } - - fChain->ClearIOPointers(); - for (int32_t i = 0; i < NSLICES; i++) { - fChain->mIOPtrs.clusterData[i] = clusterData[i]; - fChain->mIOPtrs.nClusterData[i] = nClusters[i]; - } - - // Prepare Output - fRec->SetOutputControl(outputPtr, maxBufferSize); - - // reconstruct the event - fBenchmark.Start(1); - try { - fRec->PrepareEvent(); - } catch (const std::bad_alloc& e) { - printf("Memory Allocation Error\n"); - return ((void*)(size_t)-EINVAL); - } - if (fChain->RunTPCTrackingSlices()) { - HLTError("Error running tracking!"); - return ((void*)(size_t)-EINVAL); - } - if (fChain->CheckErrorCodes()) { - return ((void*)(size_t)-EINVAL); - } - fBenchmark.Stop(1); - HLTInfo("Processed %d clusters", nClustersTotal); - for (int32_t i = 0; i < NSLICES; i++) { - fChain->GetTPCSliceTrackers()[i].Clear(); - } - - int32_t ret = 0; - size = 0; - - if (fRec->OutputControl().size == 1) { - HLTWarning("Output buffer size exceeded buffer size %d, tracks are not stored", maxBufferSize); - ret = -ENOSPC; - } else { - for (int32_t slice = 0; slice < NSLICES; slice++) { - GPUTPCSliceOutput* pOut = fChain->GetTPCSliceTrackers()[slice].Output(); - if (!pOut) { - continue; - } - HLTDebug("%d tracks found for slice %d", pOut->NTracks(), slice); - uint32_t blockSize = pOut->Size(); - if (blockSize > 0) { - AliHLTComponentBlockData bd; - FillBlockData(bd); - bd.fOffset = ((char*)pOut - (char*)outputPtr); - bd.fSize = blockSize; - bd.fSpecification = AliHLTTPCDefinitions::EncodeDataSpecification(slice, slice, 0, fgkNPatches); - bd.fDataType = GPUTPCDefinitions::fgkTrackletsDataType; - outputBlocks.push_back(bd); - size += bd.fSize; - fBenchmark.AddOutput(bd.fSize); - } - } - } - - for (int32_t i = 0; i < NSLICES; i++) { - if (clusterData[i]) { - delete[] clusterData[i]; - } - } - - fBenchmark.Stop(0); - HLTInfo(fBenchmark.GetStatistics()); - - return ((void*)(size_t)ret); -} diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.h deleted file mode 100644 index 5b09f50bf62bc..0000000000000 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerComponent.h +++ /dev/null @@ -1,139 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCTrackerComponent.h -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#ifndef GPUTPCTRACKERCOMPONENT_H -#define GPUTPCTRACKERCOMPONENT_H - -#ifndef GPUCA_ALIROOT_LIB -#define GPUCA_ALIROOT_LIB -#endif - -#include "GPUCommonDef.h" -#include "AliHLTProcessor.h" -#include "AliHLTComponentBenchmark.h" -#include "AliHLTAsyncMemberProcessor.h" - -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -class GPUTPCClusterData; -class GPUReconstruction; -class GPUChainTracking; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -/** - * @class GPUTPCTrackerComponent - * The Cellular Automaton tracker component. - */ -class GPUTPCTrackerComponent : public AliHLTProcessor -{ - public: - /** standard constructor */ - GPUTPCTrackerComponent(); - - /** dummy copy constructor, defined according to effective C++ style */ - GPUTPCTrackerComponent(const GPUTPCTrackerComponent&); - - /** dummy assignment op, but defined according to effective C++ style */ - GPUTPCTrackerComponent& operator=(const GPUTPCTrackerComponent&); - - /** standard destructor */ - virtual ~GPUTPCTrackerComponent(); - - // Public functions to implement AliHLTComponent's interface. - // These functions are required for the registration process - - /** @see component interface @ref AliHLTComponent::GetComponentID */ - const char* GetComponentID(); - - /** @see component interface @ref AliHLTComponent::GetInputDataTypes */ - void GetInputDataTypes(vector& list); - - /** @see component interface @ref AliHLTComponent::GetOutputDataType */ - AliHLTComponentDataType GetOutputDataType(); - - /** @see component interface @ref AliHLTComponent::GetOutputDataSize */ - virtual void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - - /** @see component interface @ref AliHLTComponent::Spawn */ - AliHLTComponent* Spawn(); - - protected: - // Protected functions to implement AliHLTComponent's interface. - // These functions provide initialization as well as the actual processing - // capabilities of the component. - - /** @see component interface @ref AliHLTComponent::DoInit */ - int32_t DoInit(int argc, const char** argv); - - /** @see component interface @ref AliHLTComponent::DoDeinit */ - int32_t DoDeinit(); - - /** reconfigure **/ - int32_t Reconfigure(const char* cdbEntry, const char* chainId); - - /** @see component interface @ref AliHLTProcessor::DoEvent */ - int32_t DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& trigData, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, vector& outputBlocks); - - private: - struct AliHLTTPCTrackerWrapperData { - const AliHLTComponentEventData* fEvtData; - const AliHLTComponentBlockData* fBlocks; - AliHLTUInt8_t* mOutputPtr; - AliHLTUInt32_t* fSize; - vector* mOutputBlocks; - }; - - static const int32_t NSLICES = 36; //* N slices - static const int32_t fgkNPatches = 6; //* N slices - - /** magnetic field */ - double fSolenoidBz; // see above - int32_t fMinNTrackClusters; //* required min number of clusters on the track - double fMinTrackPt; //* required min Pt of tracks - double fClusterZCut; //* cut on cluster Z position (for noise rejection at the age of TPC) - double mNeighboursSearchArea; //* area in cm for the neighbour search algorithm - double fClusterErrorCorrectionY; // correction for the cluster errors - double fClusterErrorCorrectionZ; // correction for the cluster errors - - AliHLTComponentBenchmark fBenchmark; // benchmarks - int8_t fAllowGPU; //* Allow this tracker to run on GPU - int32_t fGPUHelperThreads; // Number of helper threads for GPU tracker, set to -1 to use default number - int32_t fCPUTrackers; // Number of CPU trackers to run in addition to GPU tracker - int8_t fGlobalTracking; // Activate global tracking feature - int32_t fGPUDeviceNum; // GPU Device to use, default -1 for auto detection - TString fGPUType; // GPU type to use "CUDA", "HIP", "OCL" - int32_t fGPUStuckProtection; // Protect from stuck GPUs - int32_t fAsync; // Run tracking in async thread to catch GPU hangs.... - float fSearchWindowDZDR; // See TPCCAParam - GPUCA_NAMESPACE::gpu::GPUReconstruction* fRec; // GPUReconstruction - GPUCA_NAMESPACE::gpu::GPUChainTracking* fChain; - - /** set configuration parameters **/ - void SetDefaultConfiguration(); - int32_t ReadConfigurationString(const char* arguments); - int32_t ReadCDBEntry(const char* cdbEntry, const char* chainId); - int32_t Configure(const char* cdbEntry, const char* chainId, const char* commandLine); - int32_t ConfigureSlices(); - - AliHLTAsyncMemberProcessor fAsyncProcessor; - void* TrackerInit(void*); - void* TrackerExit(void*); - void* TrackerDoEvent(void*); - - ClassDef(GPUTPCTrackerComponent, 0); -}; -#endif // GPUTPCTRACKERCOMPONENT_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx index c1f731105dc5a..5c2ed83d47966 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx @@ -24,7 +24,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUTPCTracker::DumpOutput(std::ostream& out) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h index 9190cdb94aa5f..873368f1635a0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h @@ -18,7 +18,7 @@ #include "GPUTPCBaseTrackParam.h" #include "GPUTPCDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -55,6 +55,6 @@ class GPUTPCTracklet uint32_t mFirstHit; // first hit in row hit array }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKLET_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index e7735b4b2580c..27d531543bf6d 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -29,7 +29,7 @@ #include "GPUParam.inc" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUdii() void GPUTPCTrackletConstructor::InitTracklet(GPUTPCTrackParam& GPUrestrict() tParam) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index a961501207911..88a2d9c94d305 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -109,6 +109,6 @@ class GPUTPCTrackletConstructor }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx index b8cbbae06e8b0..d5492602a4283 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx @@ -19,7 +19,7 @@ #include "GPUTPCTracklet.h" #include "GPUCommonMath.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h index 115f0785fc212..80a29d21edac3 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -53,6 +53,6 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTPCTRACKLETSELECTOR_H diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 8bef787b85e8e..ae92f8a380bda 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -62,11 +62,11 @@ #include "GPUChainITS.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; // #define BROKEN_EVENTS -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { extern GPUSettingsStandalone configStandalone; } diff --git a/GPU/GPUTracking/Standalone/tools/createGeo.C b/GPU/GPUTracking/Standalone/tools/createGeo.C index b0837ff2604b4..5684f2cc92da6 100644 --- a/GPU/GPUTracking/Standalone/tools/createGeo.C +++ b/GPU/GPUTracking/Standalone/tools/createGeo.C @@ -7,7 +7,7 @@ #include "GPUReconstruction.h" #include "GPUChainTracking.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void createGeo() { @@ -16,7 +16,6 @@ void createGeo() gm->createPadPlaneArray(); gm->createClusterMatrixArray(); o2::trd::GeometryFlat gf(*gm); - //if (!gf.readMatricesFromFile()) return; // uncomment this line when the matrices dumped from AliRoot should be used gSystem->Load("libO2GPUTracking"); GPUReconstruction* rec = GPUReconstruction::CreateInstance(GPUReconstruction::DeviceType::CPU); GPUChainTracking* chain = rec->AddChain(); diff --git a/GPU/GPUTracking/Standalone/tools/createLUT.C b/GPU/GPUTracking/Standalone/tools/createLUT.C index 4cda80a8b5a9c..83e46da29b9a3 100644 --- a/GPU/GPUTracking/Standalone/tools/createLUT.C +++ b/GPU/GPUTracking/Standalone/tools/createLUT.C @@ -5,7 +5,7 @@ #include "GPUReconstruction.h" #include "GPUChainTracking.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void createLUT() { diff --git a/GPU/GPUTracking/TPCClusterFinder/Array2D.h b/GPU/GPUTracking/TPCClusterFinder/Array2D.h index b8f01f199cc98..b62176fdc4365 100644 --- a/GPU/GPUTracking/TPCClusterFinder/Array2D.h +++ b/GPU/GPUTracking/TPCClusterFinder/Array2D.h @@ -18,7 +18,7 @@ #include "clusterFinderDefs.h" #include "ChargePos.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { template @@ -121,6 +121,6 @@ using TPCMapMemoryLayout = LinearLayout; template using Array2D = AbstractArray2D>; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h index 235fc6444e8af..a53f73ed69e26 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -192,6 +192,6 @@ GPUconstexpr() uint32_t NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] = } // namespace cfconsts } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h index 48e3cbf6200fe..de5dfe9e1d683 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h @@ -18,7 +18,7 @@ #include "clusterFinderDefs.h" #include "GPUCommonMath.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct CfFragment { @@ -117,6 +117,6 @@ struct CfFragment { } }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h index 2e929ecdcf9be..4504b8288aee0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h @@ -20,7 +20,7 @@ #include "Array2D.h" #include "CfConsts.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class CfUtils @@ -274,6 +274,6 @@ class CfUtils } }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index f852212d180f8..10e375ee6f4bd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -60,6 +60,6 @@ struct ChargePos { inline constexpr ChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index e8176ecb60d78..d145aaed705d9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -18,8 +18,8 @@ #include "GPUParam.h" #include "DataFormatsTPC/ClusterNative.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param) const { diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index c6a05c46a7642..26decbf0a5b14 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -18,7 +18,7 @@ #include "clusterFinderDefs.h" #include "PackedCharge.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace tpc @@ -56,6 +56,6 @@ class ClusterAccumulator }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h index 99e2e998c04fc..d6107a6503e86 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h @@ -22,7 +22,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -84,6 +84,6 @@ struct GPUTPCCFChainContext { }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx index 287dad6f5367f..8f184836de6df 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx @@ -17,8 +17,8 @@ #include "DataFormatsTPC/Digit.h" #include "TPCPadGainCalib.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFChargeMapFiller::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h index 8a0d8089ad1f5..44df676c0d73e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h @@ -27,7 +27,7 @@ namespace o2::tpc class Digit; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct ChargePos; @@ -65,6 +65,6 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate static GPUd() size_t findTransition(int32_t, const tpc::Digit*, size_t, size_t); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 9b249f9ef77a6..4a167b7d53890 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -25,8 +25,8 @@ #endif #endif -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index 1e1b94cd43b74..670eb3a8700c6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -20,7 +20,7 @@ #include "clusterFinderDefs.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate @@ -58,6 +58,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index c051f01a71bf1..ad07f2b93f3e0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -22,8 +22,8 @@ #include "MCLabelAccumulator.h" #endif -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index ff8820efc1d69..c04a9e167529f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -27,7 +27,7 @@ namespace o2::tpc struct ClusterNative; } // namespace o2::tpc -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class ClusterAccumulator; @@ -71,6 +71,6 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index daacbc0994295..3727e23bcf16c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -23,8 +23,8 @@ #include "TPCPadGainCalib.h" #include "TPCZSLinkMapping.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; using namespace o2::tpc; using namespace o2::tpc::constants; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h index bf34b78227f19..abd6deefd4c28 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h @@ -23,7 +23,7 @@ #include "DataFormatsTPC/ZeroSuppressionLinkBased.h" #include "DetectorsRaw/RDHUtils.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCClusterFinder; @@ -181,6 +181,6 @@ class GPUTPCCFDecodeZSDenseLink : public GPUTPCCFDecodeZSLinkBase GPUd() static uint16_t DecodeTBMultiThread(processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx index 5c609a9775bd9..dab8123698abf 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx @@ -18,8 +18,8 @@ #include "ChargePos.h" #include "GPUDefMacros.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h index 0fb27c07e2825..e907728e089b9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h @@ -23,7 +23,7 @@ #include "Array2D.h" #include "PackedCharge.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCCFDeconvolution : public GPUKernelTemplate @@ -59,6 +59,6 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate static GPUdi() uint8_t countPeaksOuter(uint16_t, uint8_t, const uint8_t*); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.cxx index baba573b67aa8..b64f4f3f6b689 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.cxx @@ -13,8 +13,8 @@ /// \author David Rohr #include "GPUTPCCFGather.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFGather::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, o2::tpc::ClusterNative* ptr) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h index da486741ea62c..a7b3b4938b1dd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h @@ -18,7 +18,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCClusterFinder; @@ -43,6 +43,6 @@ class GPUTPCCFGather : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx index a44bf2f327054..f7488821d545f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx @@ -18,8 +18,8 @@ #include "GPUHostDataTypes.h" #endif -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; #if !defined(GPUCA_GPUCODE) void GPUTPCCFMCLabelFlattener::setGlobalOffsetsAndAllocate( diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h index b1c266cf5fed1..6bde9bf468eaa 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h @@ -21,7 +21,7 @@ #include "GPUTPCClusterFinder.h" #include "GPUConstantMem.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUTPCLinearLabels; @@ -57,6 +57,6 @@ class GPUTPCCFMCLabelFlattener : public GPUKernelTemplate static void setGlobalOffsetsAndAllocate(GPUTPCClusterFinder&, GPUTPCLinearLabels&); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index ec590bd5d1ab3..05fddda5bec68 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -18,8 +18,8 @@ #include "CfUtils.h" #include "ChargePos.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFNoiseSuppression::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index 41f463fd4fe89..a11fbeb7d852f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -22,7 +22,7 @@ #include "Array2D.h" #include "PackedCharge.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct ChargePos; @@ -72,6 +72,6 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate static GPUd() void findMinimaAndPeaks(const Array2D&, const Array2D&, const GPUSettingsRec&, float, const ChargePos&, ChargePos*, PackedCharge*, uint64_t*, uint64_t*, uint64_t*); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx index be403c98c4acc..30fdac92e8607 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx @@ -19,8 +19,8 @@ #include "PackedCharge.h" #include "TPCPadGainCalib.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFPeakFinder::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h index 4aa3574abd3ff..5cb5b208c2fde 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h @@ -22,7 +22,7 @@ #include "Array2D.h" #include "PackedCharge.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct ChargePos; @@ -58,6 +58,6 @@ class GPUTPCCFPeakFinder : public GPUKernelTemplate static GPUd() bool isPeak(GPUSharedMemory&, tpccf::Charge, const ChargePos&, uint16_t, const Array2D&, const GPUSettingsRec&, ChargePos*, PackedCharge*); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index 909d7eb6b7192..edc4fd6bab56c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -18,8 +18,8 @@ #include "ChargePos.h" #include "CfUtils.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index 8b2206da3088c..9de0aab11e6f1 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -20,7 +20,7 @@ #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUTPCCFStreamCompaction : public GPUKernelTemplate @@ -58,6 +58,6 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate static GPUd() int32_t CompactionElems(processorType& clusterer, int32_t stage); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index f8ff6b15465c8..e009ac12389b4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -23,7 +23,7 @@ #include "ChargePos.h" #include "Array2D.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace o2::tpc; void GPUTPCClusterFinder::InitializeProcessor() diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index d4838dda26fdd..a02d32f250604 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -41,7 +41,7 @@ class Digit; } // namespace o2 -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUTPCClusterMCInterimArray; struct TPCPadGainCalib; @@ -153,6 +153,6 @@ class GPUTPCClusterFinder : public GPUProcessor #endif }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index 9b52a0ec94170..eb5d7505eea22 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -17,8 +17,8 @@ #include "Array2D.h" #include "DataFormatsTPC/Digit.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; void GPUTPCClusterFinder::DumpDigits(std::ostream& out) { diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx index c68f10d388d3c..19ef7aa9ecd0d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx @@ -18,8 +18,8 @@ #include "GPUTPCClusterFinder.h" #include "SimulationDataFormat/MCTruthContainer.h" -using namespace GPUCA_NAMESPACE::gpu; -using namespace GPUCA_NAMESPACE::gpu::tpccf; +using namespace o2::gpu; +using namespace o2::gpu::tpccf; MCLabelAccumulator::MCLabelAccumulator(GPUTPCClusterFinder& clusterer) : mIndexMap(clusterer.mPindexMap), mLabels(clusterer.mPinputLabels), mOutput(clusterer.mPlabelsByRow) diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h index 53446dd4391ac..176fbea02befe 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h @@ -32,7 +32,7 @@ using ConstMCLabelContainerView = o2::dataformats::ConstMCTruthContainerView mClusterLabels; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h b/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h index 9d13f431adc2b..03c1ca9b79ae7 100644 --- a/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h +++ b/GPU/GPUTracking/TPCClusterFinder/PackedCharge.h @@ -18,7 +18,7 @@ #include "clusterFinderDefs.h" #include "GPUCommonMath.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class PackedCharge @@ -57,6 +57,6 @@ class PackedCharge BasicType mVal; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx index 0f3f50bb17a58..3d6b45c372ea0 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx @@ -18,7 +18,7 @@ #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUTPCConvert::InitializeProcessor() {} diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h index 3e46b31d1a597..222c2ffa65648 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h @@ -18,7 +18,7 @@ #include "GPUDef.h" #include "GPUProcessor.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -54,6 +54,6 @@ class GPUTPCConvert : public GPUProcessor int16_t mMemoryResMemory = -1; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h index 8df31462d4995..8dfe4ac6c28bc 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h @@ -20,7 +20,7 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -47,6 +47,6 @@ class GPUTPCConvertImpl }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx index 4d59a3ca4e428..dc01b3782daf9 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx @@ -19,7 +19,7 @@ #include "GPUO2DataTypes.h" #include "GPUTPCConvertImpl.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template <> GPUdii() void GPUTPCConvertKernel::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h index 5bfe05de0c631..085260dc48067 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h @@ -17,7 +17,7 @@ #include "GPUGeneralKernels.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -29,6 +29,6 @@ class GPUTPCConvertKernel : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h index 49a6178dbbff1..8564b0ea7371c 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h @@ -17,35 +17,7 @@ #include "GPUCommonDef.h" -#ifdef GPUCA_ALIROOT_LIB -#include "AliTRDgeometry.h" -#include "AliTRDpadPlane.h" -#include "AliGeomManager.h" -#include "TGeoMatrix.h" - -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ - -typedef AliTRDpadPlane GPUTRDpadPlane; - -class GPUTRDGeometry : public AliTRDgeometry -{ - public: - static bool CheckGeometryAvailable() { return AliGeomManager::GetGeometry(); } - - // Make sub-functionality available directly in GPUTRDGeometry - double GetPadPlaneWidthIPad(int32_t det) const { return GetPadPlane(det)->GetWidthIPad(); } - double GetPadPlaneRowPos(int32_t layer, int32_t stack, int32_t row) const { return GetPadPlane(layer, stack)->GetRowPos(row); } - double GetPadPlaneRowSize(int32_t layer, int32_t stack, int32_t row) const { return GetPadPlane(layer, stack)->GetRowSize(row); } - int32_t GetGeomManagerVolUID(int32_t det, int32_t modId) const { return AliGeomManager::LayerToVolUID(AliGeomManager::ELayerID(AliGeomManager::kTRD1 + GetLayer(det)), modId); } - float GetCdrHght() const { return CdrHght(); } -}; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -#elif defined(GPUCA_HAVE_O2HEADERS) //&& defined(GPUCA_GPUCODE) +#if defined(GPUCA_HAVE_O2HEADERS) //&& defined(GPUCA_GPUCODE) class TObjArray; #include "GPUDef.h" @@ -54,7 +26,7 @@ class TObjArray; #include "DataFormatsTRD/Constants.h" #include "GPUCommonTransform3D.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -109,13 +81,13 @@ class GPUTRDGeometry : private o2::trd::GeometryFlat static constexpr int32_t kNstack = o2::trd::constants::NSTACK; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -#else // below are dummy definitions to enable building the standalone version with AliRoot +#else // below are dummy definitions to enable building the standalone version without O2 Headers #include "GPUDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -178,8 +150,8 @@ class GPUTRDGeometry static constexpr const int32_t kNstack = 0; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -#endif // !GPUCA_ALIROOT_LIB && !defined(GPUCA_HAVE_O2HEADERS) +#endif // !defined(GPUCA_HAVE_O2HEADERS) #endif // GPUTRDGEOMETRY_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index f0ae1bde58334..16347aae5c535 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -15,7 +15,7 @@ #ifndef GPUTRDINTERFACES_H #define GPUTRDINTERFACES_H -// This is an interface header for making the TRD tracking portable between O2, AliRoot, and HLT standalone framework +// This is an interface header for making the TRD tracking portable between O2, and Ru2 format #include "GPUCommonDef.h" #include "GPUCommonMath.h" @@ -23,7 +23,7 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTRDDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -32,110 +32,14 @@ class trackInterface; template class propagatorInterface; } // namespace gpu -} // namespace GPUCA_NAMESPACE - -#ifdef GPUCA_ALIROOT_LIB // Interface for AliRoot, build only with AliRoot -#include "AliExternalTrackParam.h" -#include "AliHLTExternalTrackParam.h" -#include "AliTrackerBase.h" - -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ - -template <> -class trackInterface : public AliExternalTrackParam -{ - - public: - trackInterface() : AliExternalTrackParam(){}; - trackInterface(const trackInterface& param) : AliExternalTrackParam(param){}; - trackInterface(const AliExternalTrackParam& param) = delete; - trackInterface(const AliHLTExternalTrackParam& param) : AliExternalTrackParam() - { - float paramTmp[5] = {param.fY, param.fZ, param.fSinPhi, param.fTgl, param.fq1Pt}; - Set(param.fX, param.fAlpha, paramTmp, param.fC); - } - trackInterface(const GPUTPCGMMergedTrack& trk) : AliExternalTrackParam() - { - Set(trk.GetParam().GetX(), trk.GetAlpha(), trk.GetParam().GetPar(), trk.GetParam().GetCov()); - } - trackInterface(const gputpcgmmergertypes::GPUTPCOuterParam& param) : AliExternalTrackParam() - { - Set(param.X, param.alpha, param.P, param.C); - } - - // parameter + covariance - float getX() const { return GetX(); } - float getAlpha() const { return GetAlpha(); } - float getY() const { return GetY(); } - float getZ() const { return GetZ(); } - float getSnp() const { return GetSnp(); } - float getTgl() const { return GetTgl(); } - float getQ2Pt() const { return GetSigned1Pt(); } - float getEta() const { return Eta(); } - float getPt() const { return Pt(); } - float getSigmaY2() const { return GetSigmaY2(); } - float getSigmaZ2() const { return GetSigmaZ2(); } - - const My_Float* getPar() const { return GetParameter(); } - const My_Float* getCov() const { return GetCovariance(); } - void resetCovariance(float s) { ResetCovariance(10.f); } - void updateCovZ2(float) {} - bool CheckNumericalQuality() const { return true; } - - // parameter manipulation - bool update(const My_Float p[2], const My_Float cov[3]) { return Update(p, cov); } - float getPredictedChi2(const My_Float p[2], const My_Float cov[3]) const { return GetPredictedChi2(p, cov); } - bool rotate(float alpha) { return Rotate(alpha); } - - void set(float x, float alpha, const float param[5], const float cov[15]) { Set(x, alpha, param, cov); } - - typedef AliExternalTrackParam baseClass; -}; - -template <> -class propagatorInterface : public AliTrackerBase -{ - - public: - typedef void propagatorParam; - propagatorInterface(const propagatorParam* = nullptr) : AliTrackerBase(), mParam(nullptr){}; - propagatorInterface(const propagatorInterface&) = delete; - propagatorInterface& operator=(const propagatorInterface&) = delete; - - bool propagateToX(float x, float maxSnp, float maxStep) { return PropagateTrackToBxByBz(mParam, x, 0.13957f, maxStep, false, maxSnp); } - int32_t getPropagatedYZ(float x, float& projY, float& projZ) - { - Double_t yz[2] = {0.}; - mParam->GetYZAt(x, GetBz(), yz); - projY = yz[0]; - projZ = yz[1]; - return 0; - } - - void setTrack(trackInterface* trk) { mParam = trk; } - void setFitInProjections(bool flag) {} - - float getAlpha() { return (mParam) ? mParam->GetAlpha() : 99999.f; } - bool update(const My_Float p[2], const My_Float cov[3]) { return (mParam) ? mParam->update(p, cov) : false; } - float getPredictedChi2(const My_Float p[2], const My_Float cov[3]) { return (mParam) ? mParam->getPredictedChi2(p, cov) : 99999.f; } - bool rotate(float alpha) { return (mParam) ? mParam->rotate(alpha) : false; } - - trackInterface* mParam; -}; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -#endif // GPUCA_ALIROOT_LIB +} // namespace o2 #if defined(GPUCA_HAVE_O2HEADERS) // Interface for O2, build only with O2 #include "DetectorsBase/Propagator.h" #include "GPUTRDInterfaceO2Track.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -159,7 +63,7 @@ class propagatorInterface GPUdi() void setFitInProjections(bool flag) {} GPUdi() float getAlpha() { return (mParam) ? mParam->getAlpha() : 99999.f; } - GPUdi() bool update(const My_Float p[2], const My_Float cov[3]) + GPUdi() bool update(const float p[2], const float cov[3]) { if (mParam) { gpustd::array pTmp = {p[0], p[1]}; @@ -169,7 +73,7 @@ class propagatorInterface return false; } } - GPUdi() float getPredictedChi2(const My_Float p[2], const My_Float cov[3]) + GPUdi() float getPredictedChi2(const float p[2], const float cov[3]) { if (mParam) { gpustd::array pTmp = {p[0], p[1]}; @@ -186,7 +90,7 @@ class propagatorInterface }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUCA_HAVE_O2HEADERS @@ -198,7 +102,7 @@ class propagatorInterface #include "ReconstructionDataFormats/TrackTPCITS.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -222,20 +126,6 @@ class trackInterface : public GPUTPCGMTrackParam }; GPUdDefault() trackInterface(const trackInterface& param) = default; GPUdDefault() trackInterface& operator=(const trackInterface& param) = default; -#ifdef GPUCA_ALIROOT_LIB - trackInterface(const AliHLTExternalTrackParam& param) : GPUTPCGMTrackParam(), mAlpha(param.fAlpha) - { - SetX(param.fX); - SetPar(0, param.fY); - SetPar(1, param.fZ); - SetPar(2, param.fSinPhi); - SetPar(3, param.fTgl); - SetPar(4, param.fq1Pt); - for (int32_t i = 0; i < 15; i++) { - SetCov(i, param.fC[i]); - } - }; -#endif #if defined(GPUCA_HAVE_O2HEADERS) GPUd() trackInterface(const o2::dataformats::TrackTPCITS& param) : GPUTPCGMTrackParam(), mAlpha(param.getParamOut().getAlpha()) { @@ -342,18 +232,18 @@ class propagatorInterface : public GPUTPCGMPropagator } return false; } - GPUd() bool update(const My_Float p[2], const My_Float cov[3]) + GPUd() bool update(const float p[2], const float cov[3]) { // TODO sigma_yz not taken into account yet, is not zero due to pad tilting! return Update(p[0], p[1], 0, false, cov[0], cov[2]) == 0 ? true : false; } GPUd() float getAlpha() { return GetAlpha(); } // TODO sigma_yz not taken into account yet, is not zero due to pad tilting! - GPUd() float getPredictedChi2(const My_Float p[2], const My_Float cov[3]) const { return PredictChi2(p[0], p[1], cov[0], cov[2]); } + GPUd() float getPredictedChi2(const float p[2], const float cov[3]) const { return PredictChi2(p[0], p[1], cov[0], cov[2]); } trackInterface* mTrack; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDINTERFACES_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h index 23e26d8354343..f7e89169cde24 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h @@ -18,7 +18,7 @@ #ifndef GPUCA_TPC_GEOMETRY_O2 // compatibility to Run 2 data types -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -45,13 +45,13 @@ class GPUTRDSpacePoint }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #else // compatibility with Run 3 data types #include "DataFormatsTRD/CalibratedTracklet.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -63,7 +63,7 @@ class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet static_assert(sizeof(GPUTRDSpacePoint) == sizeof(o2::trd::CalibratedTracklet), "Incorrect memory layout"); } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index 0f184036e73ea..c14e61071e2d8 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -15,9 +15,6 @@ //#define ENABLE_GPUTRDDEBUG #define ENABLE_WARNING 0 #define ENABLE_INFO 0 -#ifdef GPUCA_ALIROOT_LIB -#define ENABLE_GPUMC -#endif #include "GPUTRDTracker.h" #include "GPUTRDTrackletWord.h" @@ -26,7 +23,7 @@ #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; class GPUTPCGMPolynomialField; @@ -38,11 +35,6 @@ class GPUTPCGMPolynomialField; #endif // WITH_OPENMP #include #include -#ifdef GPUCA_ALIROOT_LIB -#include "TDatabasePDG.h" -#include "AliMCParticle.h" -#include "AliMCEvent.h" -#endif // GPUCA_ALIROOT_LIB #include "GPUChainTracking.h" @@ -125,12 +117,6 @@ void GPUTRDTracker_t::InitializeProcessor() // Initialise tracker //-------------------------------------------------------------------- -#ifdef GPUCA_ALIROOT_LIB - for (int32_t iCandidate = 0; iCandidate < mNCandidates * 2 * mMaxThreads; ++iCandidate) { - new (&mCandidates[iCandidate]) TRDTRK; - } -#endif - UpdateGeometry(); mDebug->ExpandVectors(); @@ -143,7 +129,7 @@ void GPUTRDTracker_t::UpdateGeometry() //-------------------------------------------------------------------- // Update Geometry of TRDTracker //-------------------------------------------------------------------- - mGeo = (TRD_GEOMETRY_CONST GPUTRDGeometry*)GetConstantMem()->calibObjects.trdGeometry; + mGeo = (const GPUTRDGeometry*)GetConstantMem()->calibObjects.trdGeometry; if (!mGeo) { GPUFatal("TRD geometry must be provided externally"); } @@ -186,8 +172,8 @@ void GPUTRDTracker_t::UpdateGeometry() // obtain average radius of TRD chambers float x0[kNLayers] = {300.2f, 312.8f, 325.4f, 338.0f, 350.6f, 363.2f}; // used as default value in case no transformation matrix can be obtained auto* matrix = mGeo->GetClusterMatrix(0); - My_Float loc[3] = {mGeo->AnodePos(), 0.f, 0.f}; - My_Float glb[3] = {0.f, 0.f, 0.f}; + float loc[3] = {mGeo->AnodePos(), 0.f, 0.f}; + float glb[3] = {0.f, 0.f, 0.f}; for (int32_t iDet = 0; iDet < kNChambers; ++iDet) { matrix = mGeo->GetClusterMatrix(iDet); if (!matrix) { @@ -304,11 +290,7 @@ GPUdi() const GPUTRDPropagatorGPU::propagatorParam* GPUTRDTracker_t GPUdi() const typename PROP::propagatorParam* GPUTRDTracker_t::getPropagatorParam() { -#if defined GPUCA_ALIROOT_LIB - return nullptr; -#else return GetConstantMem()->calibObjects.o2Propagator; -#endif } template @@ -338,11 +320,7 @@ GPUd() int32_t GPUTRDTracker_t::LoadTrack(const TRDTRK& trk, uint3 if (checkTrack && !CheckTrackTRDCandidate(trk)) { return 2; } -#ifdef GPUCA_ALIROOT_LIB - new (&mTracks[mNTracks]) TRDTRK(trk); // We need placement new, since the class is virtual -#else mTracks[mNTracks] = trk; -#endif mTracks[mNTracks].setRefGlobalTrackIdRaw(tpcTrackId); if (attribs) { mTrackAttribs[mNTracks] = *attribs; @@ -428,37 +406,6 @@ GPUd() void GPUTRDTracker_t::DoTrackingThread(int32_t iTrk, int32_ } } -#ifndef GPUCA_ALIROOT_LIB // AliRoot TRD geometry functions are non-const, and cannot work with a const geometry -template -GPUd() bool GPUTRDTracker_t::ConvertTrkltToSpacePoint(const GPUTRDGeometry& geo, GPUTRDTrackletWord& trklt, GPUTRDSpacePoint& sp) -{ - // converts a single GPUTRDTrackletWord into GPUTRDSpacePoint - // returns true if successfull - int32_t det = trklt.GetDetector(); - if (!geo.ChamberInGeometry(det)) { - return false; - } - auto* matrix = geo.GetClusterMatrix(det); - if (!matrix) { - return false; - } - const GPUTRDpadPlane* pp = geo.GetPadPlane(det); - int32_t trkltZbin = trklt.GetZbin(); - My_Float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates - My_Float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates - xTrkltDet[0] = geo.AnodePos() - sRadialOffset; - xTrkltDet[1] = trklt.GetY(); - xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2); - matrix->LocalToMaster(xTrkltDet, xTrkltSec); - sp.setX(xTrkltSec[0]); - sp.setY(xTrkltSec[1]); - sp.setZ(xTrkltSec[2]); - sp.setDy(trklt.GetdY()); - - return true; -} -#endif - template GPUd() bool GPUTRDTracker_t::CalculateSpacePoints(int32_t iCollision) { @@ -495,8 +442,8 @@ GPUd() bool GPUTRDTracker_t::CalculateSpacePoints(int32_t iCollisi int32_t trkltIdxStart = trkltIdxOffset + iFirstTrackletInDet; for (int32_t trkltIdx = trkltIdxStart; trkltIdx < trkltIdxStart + nTrackletsInDet; ++trkltIdx) { int32_t trkltZbin = tracklets[trkltIdx].GetZbin(); - My_Float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates - My_Float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates + float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates + float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates xTrkltDet[0] = mGeo->AnodePos() + sRadialOffset; xTrkltDet[1] = tracklets[trkltIdx].GetY(); xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2); @@ -681,8 +628,8 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK zPosCorr -= zShiftTrk; // shift tracklet instead of track in order to avoid having to do a re-fit for each collision float deltaY = yPosCorr - projY; float deltaZ = zPosCorr - projZ; - My_Float trkltPosTmpYZ[2] = {yPosCorr, zPosCorr}; - My_Float trkltCovTmp[3] = {0.f}; + float trkltPosTmpYZ[2] = {yPosCorr, zPosCorr}; + float trkltCovTmp[3] = {0.f}; if ((CAMath::Abs(deltaY) < roadY) && (CAMath::Abs(deltaZ) < roadZ)) { // TODO: check if this is still necessary after the cut before propagation of track // tracklet is in windwow: get predicted chi2 for update and store tracklet index if best guess RecalcTrkltCov(tilt, trkWork->getSnp(), pad->GetRowSize(tracklets[trkltIdx].GetZbin()), trkltCovTmp); @@ -772,8 +719,8 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK if (!((trkWork->getSigmaZ2() < (padLength * padLength / 12.f)) && (CAMath::Abs(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getZ() - trkWork->getZ()) < padLength))) { tiltCorrUp = 0.f; } - My_Float trkltPosUp[2] = {spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getY() - tiltCorrUp, zPosCorrUp}; - My_Float trkltCovUp[3] = {0.f}; + float trkltPosUp[2] = {spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getY() - tiltCorrUp, zPosCorrUp}; + float trkltCovUp[3] = {0.f}; RecalcTrkltCov(tilt, trkWork->getSnp(), pad->GetRowSize(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetZbin()), trkltCovUp); #ifdef ENABLE_GPUTRDDEBUG @@ -1015,7 +962,7 @@ GPUd() float GPUTRDTracker_t::GetAlphaOfSector(const int32_t sec) } template -GPUd() void GPUTRDTracker_t::RecalcTrkltCov(const float tilt, const float snp, const float rowSize, My_Float (&cov)[3]) +GPUd() void GPUTRDTracker_t::RecalcTrkltCov(const float tilt, const float snp, const float rowSize, float (&cov)[3]) { //-------------------------------------------------------------------- // recalculate tracklet covariance taking track phi angle into account @@ -1161,16 +1108,16 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const #ifndef GPUCA_GPUCODE -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { // instantiate version for AliExternalTrackParam / o2::TrackParCov data types -#if defined(GPUCA_ALIROOT_LIB) || defined(GPUCA_HAVE_O2HEADERS) +#if defined(GPUCA_HAVE_O2HEADERS) template class GPUTRDTracker_t; #endif // always instantiate version for GPU Track Model template class GPUTRDTracker_t; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index d688e2957846d..59e753e239cf9 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -30,17 +30,11 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { -#ifdef GPUCA_ALIROOT_LIB -#define TRD_GEOMETRY_CONST -#else -#define TRD_GEOMETRY_CONST const -#endif - class GPUTRDTrackletWord; class GPUTRDGeometry; class GPUChainTracking; @@ -102,7 +96,7 @@ class GPUTRDTracker_t : public GPUProcessor int16_t MemoryPermanent() const { return mMemoryPermanent; } - GPUhd() void OverrideGPUGeometry(TRD_GEOMETRY_CONST GPUTRDGeometry* geo) { mGeo = geo; } + GPUhd() void OverrideGPUGeometry(const GPUTRDGeometry* geo) { mGeo = geo; } void Reset(); template GPUd() bool PreCheckTrackTRDCandidate(const T& trk) const @@ -126,7 +120,7 @@ class GPUTRDTracker_t : public GPUProcessor GPUd() float GetAngularResolution(float snp) const { return mDyA2 + mDyC2 * (snp - mDyB) * (snp - mDyB); } // a^2 + c^2 * (snp - b)^2 GPUd() float ConvertAngleToDy(float snp) const { return mAngleToDyA + mAngleToDyB * snp + mAngleToDyC * snp * snp; } // a + b*snp + c*snp^2 is more accurate than sin(phi) = (dy / xDrift) / sqrt(1+(dy/xDrift)^2) GPUd() float GetAngularPull(float dYtracklet, float snp) const; - GPUd() void RecalcTrkltCov(const float tilt, const float snp, const float rowSize, My_Float (&cov)[3]); + GPUd() void RecalcTrkltCov(const float tilt, const float snp, const float rowSize, float (&cov)[3]); GPUd() void FindChambersInRoad(const TRDTRK* t, const float roadY, const float roadZ, const int32_t iLayer, int32_t* det, const float zMax, const float alpha, const float zShiftTrk) const; GPUd() bool IsGeoFindable(const TRDTRK* t, const int32_t layer, const float alpha, const float zShiftTrk) const; GPUd() void InsertHypothesis(Hypothesis hypo, int32_t& nCurrHypothesis, int32_t idxOffset); @@ -181,7 +175,7 @@ class GPUTRDTracker_t : public GPUProcessor Hypothesis* mHypothesis; // array with multiple track hypothesis TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame - TRD_GEOMETRY_CONST GPUTRDGeometry* mGeo; // TRD geometry + const GPUTRDGeometry* mGeo; // TRD geometry /// ---- error parametrization depending on magnetic field ---- float mRPhiA2; // parameterization for tracklet position resolution float mRPhiB; // parameterization for tracklet position resolution @@ -203,6 +197,6 @@ class GPUTRDTracker_t : public GPUProcessor GPUTRDTrackerDebug* mDebug; // debug output }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDTRACKER_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.cxx deleted file mode 100644 index c6a60afc9b4bd..0000000000000 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.cxx +++ /dev/null @@ -1,509 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTRDTrackerComponent.cxx -/// \brief A TRD tracker processing component for the GPU - -/// \author Ole Schmidt - -#include "TSystem.h" -#include "TTimeStamp.h" -#include "TObjString.h" -#include "TClonesArray.h" -#include "TObjArray.h" -#include "AliESDEvent.h" -#include "AliHLTErrorGuard.h" -#include "AliHLTDataTypes.h" -#include "GPUTRDGeometry.h" -#include "GPUTRDTracker.h" -#include "GPUTRDTrack.h" -#include "GPUTRDTrackerComponent.h" -#include "GPUTRDSpacePoint.h" -#include "GPUTRDTrackletWord.h" -#include "GPUTRDTrackletLabels.h" -#include "AliHLTTRDDefinitions.h" -#include "AliHLTTPCDefinitions.h" -#include "GPUTRDTrackPoint.h" -#include "AliHLTGlobalBarrelTrack.h" -#include "AliExternalTrackParam.h" -#include "AliHLTExternalTrackParam.h" -#include "AliHLTTrackMCLabel.h" -#include "GPUTRDTrackData.h" -#include "AliGeomManager.h" -#include "GPUReconstruction.h" -#include "GPUChainTracking.h" -#include "GPUSettings.h" -#include -#include -#include - -using namespace GPUCA_NAMESPACE::gpu; - -ClassImp(GPUTRDTrackerComponent); - -GPUTRDTrackerComponent::GPUTRDTrackerComponent() - : fTracker(0x0), fGeo(0x0), fTrackList(0x0), fDebugTrackOutput(false), fVerboseDebugOutput(false), fRequireITStrack(false), fBenchmark("TRDTracker") -{ -} - -GPUTRDTrackerComponent::GPUTRDTrackerComponent(const GPUTRDTrackerComponent&) : fTracker(0x0), fGeo(0x0), fRec(0x0), fChain(0x0), fTrackList(0x0), AliHLTProcessor(), fDebugTrackOutput(false), fVerboseDebugOutput(false), fRequireITStrack(false), fBenchmark("TRDTracker") -{ - // see header file for class documentation - HLTFatal("copy constructor untested"); -} - -GPUTRDTrackerComponent& GPUTRDTrackerComponent::operator=(const GPUTRDTrackerComponent&) -{ - // see header file for class documentation - HLTFatal("assignment operator untested"); - return *this; -} - -GPUTRDTrackerComponent::~GPUTRDTrackerComponent() { delete fTracker; } - -const char* GPUTRDTrackerComponent::GetComponentID() { return "TRDTracker"; } - -void GPUTRDTrackerComponent::GetInputDataTypes(std::vector& list) -{ - list.clear(); - list.push_back(kAliHLTDataTypeTrack | kAliHLTDataOriginITS); - // list.push_back( kAliHLTDataTypeTrack|kAliHLTDataOriginTPC ); - list.push_back(AliHLTTPCDefinitions::TracksOuterDataType() | kAliHLTDataOriginTPC); - list.push_back(kAliHLTDataTypeTrackMC | kAliHLTDataOriginTPC); - list.push_back(AliHLTTRDDefinitions::fgkTRDTrackletDataType); - list.push_back(AliHLTTRDDefinitions::fgkTRDMCTrackletDataType); -} - -AliHLTComponentDataType GPUTRDTrackerComponent::GetOutputDataType() { return kAliHLTMultipleDataType; } - -int32_t GPUTRDTrackerComponent::GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList) -{ - // see header file for class documentation - tgtList.clear(); - tgtList.push_back(AliHLTTRDDefinitions::fgkTRDTrackDataType | kAliHLTDataOriginTRD); - tgtList.push_back(AliHLTTRDDefinitions::fgkTRDTrackPointDataType | kAliHLTDataOriginTRD); - tgtList.push_back(kAliHLTDataTypeTObject | kAliHLTDataOriginTRD); - return tgtList.size(); -} - -void GPUTRDTrackerComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - // define guess for the output data size - constBase = 1000; // minimum size - inputMultiplier = 2.; // size relative to input -} - -AliHLTComponent* GPUTRDTrackerComponent::Spawn() -{ - // see header file for class documentation - return new GPUTRDTrackerComponent; -} - -int32_t GPUTRDTrackerComponent::ReadConfigurationString(const char* arguments) -{ - // Set configuration parameters for the TRD tracker component from the string - - int32_t iResult = 0; - if (!arguments) { - return iResult; - } - - TString allArgs = arguments; - TString argument; - - TObjArray* pTokens = allArgs.Tokenize(" "); - - int32_t nArgs = pTokens ? pTokens->GetEntries() : 0; - - for (int32_t i = 0; i < nArgs; i++) { - argument = ((TObjString*)pTokens->At(i))->GetString(); - if (argument.IsNull()) { - continue; - } - - if (argument.CompareTo("-debugOutput") == 0) { - fDebugTrackOutput = true; - fVerboseDebugOutput = true; - HLTInfo("Tracks are dumped in the GPUTRDTrackGPU format"); - continue; - } - - if (argument.CompareTo("-requireITStrack") == 0) { - fRequireITStrack = true; - HLTInfo("TRD tracker requires seeds (TPC tracks) to have an ITS match"); - continue; - } - - HLTError("Unknown option \"%s\"", argument.Data()); - iResult = -EINVAL; - } - delete pTokens; - - return iResult; -} - -// ################################################################################# -int32_t GPUTRDTrackerComponent::DoInit(int argc, const char** argv) -{ - // see header file for class documentation - - int32_t iResult = 0; - if (fTracker) { - return -EINPROGRESS; - } - - fBenchmark.Reset(); - fBenchmark.SetTimer(0, "total"); - fBenchmark.SetTimer(1, "reco"); - - if (AliGeomManager::GetGeometry() == nullptr) { - AliGeomManager::LoadGeometry(); - } - - fTrackList = new TList(); - if (!fTrackList) { - return -ENOMEM; - } - fTrackList->SetOwner(kFALSE); - - TString arguments = ""; - for (int32_t i = 0; i < argc; i++) { - if (!arguments.IsNull()) { - arguments += " "; - } - arguments += argv[i]; - } - - iResult = ReadConfigurationString(arguments.Data()); - - GPUSettingsGRP cfgGRP; - cfgGRP.solenoidBzNominalGPU = GetBz(); - GPUSettingsRec cfgRec; - GPUSettingsProcessing cfgDeviceProcessing; - GPURecoStepConfiguration cfgRecoStep; - cfgRecoStep.steps = GPUDataTypes::RecoStep::NoRecoStep; - cfgRecoStep.inputs.clear(); - cfgRecoStep.outputs.clear(); - fRec = GPUReconstruction::CreateInstance("CPU", true); - fRec->SetSettings(&cfgGRP, &cfgRec, &cfgDeviceProcessing, &cfgRecoStep); - fChain = fRec->AddChain(); - - fGeo = new GPUTRDGeometry(); - if (!fGeo) { - return -ENOMEM; - } - if (!GPUTRDGeometry::CheckGeometryAvailable()) { - HLTError("TRD geometry not available"); - return -EINVAL; - } - fTracker = new GPUTRDTrackerGPU(); - if (!fTracker) { - return -ENOMEM; - } - if (fVerboseDebugOutput) { - fTracker->EnableDebugOutput(); - } - fRec->RegisterGPUProcessor(fTracker, false); - fChain->SetTRDGeometry(reinterpret_cast(fGeo)); - if (fRec->Init()) { - return -EINVAL; - } - - return iResult; -} - -// ################################################################################# -int32_t GPUTRDTrackerComponent::DoDeinit() -{ - // see header file for class documentation - delete fTracker; - fTracker = 0x0; - delete fGeo; - fGeo = 0x0; - return 0; -} - -// ################################################################################# -int32_t GPUTRDTrackerComponent::DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& /*trigData*/, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, std::vector& outputBlocks) -{ - // process event - - if (!IsDataEvent()) { - return 0; - } - - if (evtData.fBlockCnt <= 0) { - HLTWarning("no blocks in event"); - return 0; - } - - fBenchmark.StartNewEvent(); - fBenchmark.Start(0); - - AliHLTUInt32_t maxBufferSize = size; - size = 0; // output size - - int32_t iResult = 0; - - if (fTrackList->GetEntries() != 0) { - fTrackList->Clear(); // tracks are owned by GPUTRDTrackerGPU - } - - int32_t nBlocks = evtData.fBlockCnt; - - const AliHLTTracksData* tpcData = nullptr; - const AliHLTTracksData* itsData = nullptr; - const AliHLTTrackMCData* tpcDataMC = nullptr; - - std::vector tracksTPC; - std::vector tracksTPCId; - - bool hasMCtracklets = false; - - int32_t nTrackletsTotal = 0; - int32_t nTrackletsTotalMC = 0; - const GPUTRDTrackletWord* tracklets = nullptr; - const GPUTRDTrackletLabels* trackletsMC = nullptr; - - for (int32_t iBlock = 0; iBlock < nBlocks; iBlock++) { - if (blocks[iBlock].fDataType == (kAliHLTDataTypeTrack | kAliHLTDataOriginITS) && fRequireITStrack) { - itsData = (const AliHLTTracksData*)blocks[iBlock].fPtr; - fBenchmark.AddInput(blocks[iBlock].fSize); - } else if (blocks[iBlock].fDataType == (AliHLTTPCDefinitions::TracksOuterDataType() | kAliHLTDataOriginTPC)) { - tpcData = (const AliHLTTracksData*)blocks[iBlock].fPtr; - fBenchmark.AddInput(blocks[iBlock].fSize); - } else if (blocks[iBlock].fDataType == (kAliHLTDataTypeTrackMC | kAliHLTDataOriginTPC)) { - tpcDataMC = (const AliHLTTrackMCData*)blocks[iBlock].fPtr; - fBenchmark.AddInput(blocks[iBlock].fSize); - } else if (blocks[iBlock].fDataType == (AliHLTTRDDefinitions::fgkTRDTrackletDataType)) { - tracklets = reinterpret_cast(blocks[iBlock].fPtr); - nTrackletsTotal = blocks[iBlock].fSize / sizeof(GPUTRDTrackletWord); - fBenchmark.AddInput(blocks[iBlock].fSize); - } else if (blocks[iBlock].fDataType == (AliHLTTRDDefinitions::fgkTRDMCTrackletDataType)) { - hasMCtracklets = true; - trackletsMC = reinterpret_cast(blocks[iBlock].fPtr); - nTrackletsTotalMC = blocks[iBlock].fSize / sizeof(GPUTRDTrackletLabels); - fBenchmark.AddInput(blocks[iBlock].fSize); - } - } - - if (tpcData == nullptr) { - HLTInfo("did not receive any TPC tracks. Skipping event"); - return 0; - } - - if (nTrackletsTotal == 0) { - HLTInfo("did not receive any TRD tracklets. Skipping event"); - return 0; - } - - if (hasMCtracklets && nTrackletsTotal != nTrackletsTotalMC) { - HLTError("the numbers of input tracklets does not match the number of input MC labels for them"); - return -EINVAL; - } - - // copy tracklets into temporary vector to allow for sorting them (the input array is const) - std::vector trackletsTmp(nTrackletsTotal); - for (int32_t iTrklt = 0; iTrklt < nTrackletsTotal; ++iTrklt) { - trackletsTmp[iTrklt] = tracklets[iTrklt]; - } - - int32_t nTPCtracks = tpcData->fCount; - std::vector itsAvail(nTPCtracks, false); - if (itsData) { - // look for ITS tracks with >= 2 hits - int32_t nITStracks = itsData->fCount; - const AliHLTExternalTrackParam* currITStrack = itsData->fTracklets; - for (int32_t iTrkITS = 0; iTrkITS < nITStracks; iTrkITS++) { - if (currITStrack->fNPoints >= 2) { - itsAvail.at(currITStrack->fTrackID) = true; - } - uint32_t dSize = sizeof(AliHLTExternalTrackParam) + currITStrack->fNPoints * sizeof(uint32_t); - currITStrack = (AliHLTExternalTrackParam*)(((Byte_t*)currITStrack) + dSize); - } - } - std::map mcLabels; - if (tpcDataMC) { - // look for TPC track MC labels - int32_t nMCtracks = tpcDataMC->fCount; - for (int32_t iMC = 0; iMC < nMCtracks; iMC++) { - const AliHLTTrackMCLabel& lab = tpcDataMC->fLabels[iMC]; - mcLabels[lab.fTrackID] = lab.fMCLabel; - } - } - const AliHLTExternalTrackParam* currOutTrackTPC = tpcData->fTracklets; - for (int32_t iTrk = 0; iTrk < nTPCtracks; iTrk++) { - // store TPC tracks (if required only the ones with >=2 ITS hits) - if (itsData != nullptr && !itsAvail.at(currOutTrackTPC->fTrackID)) { - continue; - } - GPUTRDTrackGPU t(*currOutTrackTPC); - int32_t mcLabel = -1; - if (tpcDataMC) { - if (mcLabels.find(currOutTrackTPC->fTrackID) != mcLabels.end()) { - mcLabel = mcLabels[currOutTrackTPC->fTrackID]; - } - } - tracksTPC.push_back(t); - tracksTPCId.push_back(currOutTrackTPC->fTrackID); - uint32_t dSize = sizeof(AliHLTExternalTrackParam) + currOutTrackTPC->fNPoints * sizeof(uint32_t); - currOutTrackTPC = (AliHLTExternalTrackParam*)+(((Byte_t*)currOutTrackTPC) + dSize); - } - - if (fVerboseDebugOutput) { - HLTInfo("TRDTrackerComponent received %i tracklets\n", nTrackletsTotal); - } - - fTracker->SetGenerateSpacePoints(true); - fTracker->Reset(); - fChain->mIOPtrs.nMergedTracks = tracksTPC.size(); - fChain->mIOPtrs.nTRDTracklets = nTrackletsTotal; - fChain->mIOPtrs.nTRDTriggerRecords = 1; - uint8_t trigRecMaskDummy[1] = {1}; - fChain->mIOPtrs.trdTrigRecMask = &(trigRecMaskDummy[0]); - fRec->PrepareEvent(); - fRec->SetupGPUProcessor(fTracker, true); - - std::sort(trackletsTmp.begin(), trackletsTmp.end()); - fChain->mIOPtrs.trdTracklets = &(trackletsTmp[0]); - - // loop over all tracks - for (uint32_t iTrack = 0; iTrack < tracksTPC.size(); ++iTrack) { - fTracker->LoadTrack(tracksTPC[iTrack], tracksTPCId[iTrack]); - } - - fBenchmark.Start(1); - fChain->DoTRDGPUTracking<1>(fTracker); - fBenchmark.Stop(1); - - GPUTRDTrackGPU* trackArray = fTracker->Tracks(); - int32_t nTracks = fTracker->NTracks(); - GPUTRDSpacePoint* spacePoints = fTracker->SpacePoints(); - - // TODO delete fTrackList since it only works for TObjects (or use compiler flag after tests with GPU track type) - // for (int32_t iTrack=0; iTrackAddLast(&trackArray[iTrack]); - //} - - // push back GPUTRDTracks for debugging purposes - if (fDebugTrackOutput) { - PushBack(fTrackList, (kAliHLTDataTypeTObject | kAliHLTDataOriginTRD), 0x3fffff); - } - // push back AliHLTExternalTrackParam (default) - else { - - AliHLTUInt32_t blockSize = GPUTRDTrackData::GetSize(nTracks); - if (size + blockSize > maxBufferSize) { - HLTWarning("Output buffer exceeded for tracks"); - return -ENOSPC; - } - - GPUTRDTrackData* outTracks = (GPUTRDTrackData*)(outputPtr); - outTracks->fCount = 0; - int32_t assignedTracklets = 0; - - for (int32_t iTrk = 0; iTrk < nTracks; ++iTrk) { - GPUTRDTrackGPU& t = trackArray[iTrk]; - if (t.getNtracklets() == 0) { - continue; - } - assignedTracklets += t.getNtracklets(); - GPUTRDTrackDataRecord& currOutTrack = outTracks->fTracks[outTracks->fCount]; - t.ConvertTo(currOutTrack); - outTracks->fCount++; - } - - AliHLTComponentBlockData resultData; - FillBlockData(resultData); - resultData.fOffset = size; - resultData.fSize = blockSize; - resultData.fDataType = AliHLTTRDDefinitions::fgkTRDTrackDataType; - outputBlocks.push_back(resultData); - fBenchmark.AddOutput(resultData.fSize); - - size += blockSize; - outputPtr += resultData.fSize; - - blockSize = 0; - - // space points calculated from tracklets - - blockSize = sizeof(GPUTRDTrackPointData) + sizeof(GPUTRDTrackPoint) * nTrackletsTotal; - - if (size + blockSize > maxBufferSize) { - HLTWarning("Output buffer exceeded for space points"); - return -ENOSPC; - } - - GPUTRDTrackPointData* outTrackPoints = (GPUTRDTrackPointData*)(outputPtr); - outTrackPoints->fCount = nTrackletsTotal; - - { // fill array with 0 for a case.. - GPUTRDTrackPoint empty; - empty.fX[0] = 0; - empty.fX[1] = 0; - empty.fX[2] = 0; - empty.fVolumeId = 0; - for (int32_t i = 0; i < nTrackletsTotal; ++i) { - outTrackPoints->fPoints[i] = empty; - } - } - - for (int32_t i = 0; i < nTrackletsTotal; ++i) { - const GPUTRDSpacePoint& sp = spacePoints[i]; - GPUTRDTrackPoint* currOutPoint = &outTrackPoints->fPoints[i]; - currOutPoint->fX[0] = sp.getX(); // x in sector coordinates - currOutPoint->fX[1] = sp.getY(); // y in sector coordinates - currOutPoint->fX[2] = sp.getZ(); // z in sector coordinates - int32_t detId = trackletsTmp[i].GetDetector(); - int32_t layer = detId % 6; // TRD layer number for given detector - int32_t modId = (detId / 18) * 5 + ((detId % 30) / 6); // global TRD stack number [0..89] - int32_t volId = (UShort_t(9 + layer) << 11) | UShort_t(modId); // taken from AliGeomManager::LayerToVolUID(). AliGeomManager::ELayerID(AliGeomManager::kTRD1) == 9 - currOutPoint->fVolumeId = volId; - } - AliHLTComponentBlockData resultDataSP; - FillBlockData(resultDataSP); - resultDataSP.fOffset = size; - resultDataSP.fSize = blockSize; - resultDataSP.fDataType = AliHLTTRDDefinitions::fgkTRDTrackPointDataType | kAliHLTDataOriginTRD; - outputBlocks.push_back(resultDataSP); - fBenchmark.AddOutput(resultData.fSize); - size += blockSize; - outputPtr += resultDataSP.fSize; - - HLTInfo("TRD tracker: output %d tracks (%d assigned tracklets) and %d track points", outTracks->fCount, assignedTracklets, outTrackPoints->fCount); - } - - fBenchmark.Stop(0); - HLTInfo(fBenchmark.GetStatistics()); - - return iResult; -} - -// ################################################################################# -int32_t GPUTRDTrackerComponent::Reconfigure(const char* cdbEntry, const char* chainId) -{ - // see header file for class documentation - - int32_t iResult = 0; - TString cdbPath; - if (cdbEntry) { - cdbPath = cdbEntry; - } else { - cdbPath = "HLT/ConfigGlobal/"; - cdbPath += GetComponentID(); - } - - AliInfoClass(Form("reconfigure '%s' from entry %s%s", chainId, cdbPath.Data(), cdbEntry ? "" : " (default)")); - iResult = ConfigureFromCDBTObjString(cdbPath); - - return iResult; -} diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.h deleted file mode 100644 index e6d4ef609e101..0000000000000 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerComponent.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTRDTrackerComponent.h -/// \brief A TRD tracker processing component for the GPU - -/// \author Ole Schmidt - -#ifndef GPUTRDTRACKERCOMPONENT_H -#define GPUTRDTRACKERCOMPONENT_H - -#ifndef GPUCA_ALIROOT_LIB -#define GPUCA_ALIROOT_LIB -#endif - -#include "AliHLTProcessor.h" -#include "AliHLTComponentBenchmark.h" -#include "AliHLTDataTypes.h" - -class TH1F; -class TList; - -#include "GPUTRDDef.h" -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ -class GPUTRDGeometry; -class GPUReconstruction; -class GPUChainTracking; -} // namespace gpu -} // namespace GPUCA_NAMESPACE - -class GPUTRDTrackerComponent : public AliHLTProcessor -{ - public: - /* - * --------------------------------------------------------------------------------- - * Constructor / Destructor - * --------------------------------------------------------------------------------- - */ - - /** constructor */ - GPUTRDTrackerComponent(); - - /** dummy copy constructor, defined according to effective C++ style */ - GPUTRDTrackerComponent(const GPUTRDTrackerComponent&); - - /** dummy assignment op, but defined according to effective C++ style */ - GPUTRDTrackerComponent& operator=(const GPUTRDTrackerComponent&); - - /** destructor */ - virtual ~GPUTRDTrackerComponent(); - - /* - * --------------------------------------------------------------------------------- - * Public functions to implement AliHLTComponent's interface. - * These functions are required for the registration process - * --------------------------------------------------------------------------------- - */ - - /** interface function, see @ref AliHLTComponent for description */ - const char* GetComponentID(); - - /** interface function, see @ref AliHLTComponent for description */ - void GetInputDataTypes(vector& list); - - /** interface function, see @ref AliHLTComponent for description */ - AliHLTComponentDataType GetOutputDataType(); - - /** @see component interface @ref AliHLTComponent::GetOutputDataType */ - int32_t GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList); - - /** interface function, see @ref AliHLTComponent for description */ - void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - - /** interface function, see @ref AliHLTComponent for description */ - AliHLTComponent* Spawn(); - - int32_t ReadConfigurationString(const char* arguments); - - protected: - /* - * --------------------------------------------------------------------------------- - * Protected functions to implement AliHLTComponent's interface. - * These functions provide initialization as well as the actual processing - * capabilities of the component. - * --------------------------------------------------------------------------------- - */ - - // AliHLTComponent interface functions - - /** interface function, see @ref AliHLTComponent for description */ - int32_t DoInit(int argc, const char** argv); - - /** interface function, see @ref AliHLTComponent for description */ - int32_t DoDeinit(); - - /** interface function, see @ref AliHLTComponent for description */ - int32_t DoEvent(const AliHLTComponentEventData& evtData, const AliHLTComponentBlockData* blocks, AliHLTComponentTriggerData& trigData, AliHLTUInt8_t* outputPtr, AliHLTUInt32_t& size, vector& outputBlocks); - - /** interface function, see @ref AliHLTComponent for description */ - int32_t Reconfigure(const char* cdbEntry, const char* chainId); - - /////////////////////////////////////////////////////////////////////////////////// - - private: - /* - * --------------------------------------------------------------------------------- - * Private functions to implement AliHLTComponent's interface. - * These functions provide initialization as well as the actual processing - * capabilities of the component. - * --------------------------------------------------------------------------------- - */ - - /* - * --------------------------------------------------------------------------------- - * Helper - * --------------------------------------------------------------------------------- - */ - - /* - * --------------------------------------------------------------------------------- - * Members - private - * --------------------------------------------------------------------------------- - */ - GPUCA_NAMESPACE::gpu::GPUTRDTrackerGPU* fTracker; // the tracker itself - GPUCA_NAMESPACE::gpu::GPUTRDGeometry* fGeo; // TRD geometry needed by the tracker - GPUCA_NAMESPACE::gpu::GPUReconstruction* fRec; // GPU Reconstruction object - GPUCA_NAMESPACE::gpu::GPUChainTracking* fChain; // Tracking Chain Object - - TList* fTrackList; - bool fDebugTrackOutput; // output GPUTRDTracks instead AliHLTExternalTrackParam - bool fVerboseDebugOutput; // more verbose information is printed - bool fRequireITStrack; // only TPC tracks with ITS match are used as seeds for tracking - AliHLTComponentBenchmark fBenchmark; // benchmark - - ClassDef(GPUTRDTrackerComponent, 0); -}; -#endif // GPUTRDTRACKERCOMPONENT_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h index ed590ff7b89ad..45b083a4cca66 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h @@ -17,299 +17,13 @@ #ifndef GPUTRDTRACKERDEBUG_H #define GPUTRDTRACKERDEBUG_H -#if defined(ENABLE_GPUTRDDEBUG) && defined(GPUCA_ALIROOT_LIB) +#if defined(ENABLE_GPUTRDDEBUG) && 0 -#include "TVectorF.h" -#include "TTreeStream.h" -#include "GPULogging.h" -#include "GPUTRDTrack.h" - -namespace GPUCA_NAMESPACE -{ -namespace gpu -{ - -template -class GPUTRDTrackerDebug -{ - public: - GPUTRDTrackerDebug() : fStreamer(0x0) {} - ~GPUTRDTrackerDebug() { delete fStreamer; } - - void CreateStreamer() - { - GPUInfo("Creating streamer for debugging"); - fStreamer = new TTreeSRedirector("TRDhlt.root", "recreate"); - } - - int32_t GetSector(float alpha) - { - if (alpha < 0) { - alpha += 2.f * M_PI; - } - return (int32_t)(alpha * 18 / (2.f * M_PI)); - } - - void ExpandVectors() - { - fTrackX.ResizeTo(6); - fTrackY.ResizeTo(6); - fTrackZ.ResizeTo(6); - fTrackPhi.ResizeTo(6); - fTrackLambda.ResizeTo(6); - fTrackPt.ResizeTo(6); - fTrackQPt.ResizeTo(6); - fTrackSector.ResizeTo(6); - fTrackYerr.ResizeTo(6); - fTrackZerr.ResizeTo(6); - fTrackNoUpX.ResizeTo(6); - fTrackNoUpY.ResizeTo(6); - fTrackNoUpZ.ResizeTo(6); - fTrackNoUpPhi.ResizeTo(6); - fTrackNoUpLambda.ResizeTo(6); - fTrackNoUpPt.ResizeTo(6); - fTrackNoUpSector.ResizeTo(6); - fTrackNoUpYerr.ResizeTo(6); - fTrackNoUpZerr.ResizeTo(6); - fTrackletX.ResizeTo(6); - fTrackletY.ResizeTo(6); - fTrackletZ.ResizeTo(6); - ; - fTrackletYcorr.ResizeTo(6); - fTrackletZcorr.ResizeTo(6); - fTrackletY2err.ResizeTo(6); - fTrackletYZerr.ResizeTo(6); - fTrackletZ2err.ResizeTo(6); - fTrackletDy.ResizeTo(6); - fTrackletDet.ResizeTo(6); - fRoadY.ResizeTo(6); - fRoadZ.ResizeTo(6); - fChi2Update.ResizeTo(6); - fFindable.ResizeTo(6); - } - - void Reset() - { - fTrackX.Zero(); - fTrackY.Zero(); - fTrackZ.Zero(); - fTrackPhi.Zero(); - fTrackLambda.Zero(); - fTrackPt.Zero(); - fTrackQPt.Zero(); - fTrackSector.Zero(); - fTrackYerr.Zero(); - fTrackZerr.Zero(); - fTrackNoUpX.Zero(); - fTrackNoUpY.Zero(); - fTrackNoUpZ.Zero(); - fTrackNoUpPhi.Zero(); - fTrackNoUpLambda.Zero(); - fTrackNoUpPt.Zero(); - fTrackNoUpSector.Zero(); - fTrackNoUpYerr.Zero(); - fTrackNoUpZerr.Zero(); - fTrackletX.Zero(); - fTrackletY.Zero(); - fTrackletZ.Zero(); - ; - fTrackletYcorr.Zero(); - fTrackletZcorr.Zero(); - fTrackletY2err.Zero(); - fTrackletYZerr.Zero(); - fTrackletZ2err.Zero(); - fTrackletDy.Zero(); - fTrackletDet.Zero(); - fRoadY.Zero(); - fRoadZ.Zero(); - fChi2Update.Zero(); - fFindable.Zero(); - fEv = 0; - fNTPCtracks = 0; - fTrk = 0; - fPtTPC = 0.f; - fNtrklts = 0; - fNlayers = 0; - fChi2 = 0.f; - } - - // general information - void SetGeneralInfo(int32_t iEv, int32_t nTPCtracks, int32_t iTrk, float pt) - { - fEv = iEv; - fNTPCtracks = nTPCtracks; - fTrk = iTrk; - fPtTPC = pt; - } - - // track parameters - void SetTrackParameter(const T& trk, int32_t ly) - { - fTrackX(ly) = trk.getX(); - fTrackY(ly) = trk.getY(); - fTrackZ(ly) = trk.getZ(); - fTrackPhi(ly) = trk.getSnp(); - fTrackLambda(ly) = trk.getTgl(); - fTrackPt(ly) = trk.getPt(); - fTrackQPt(ly) = trk.getQ2Pt(); - fTrackSector(ly) = GetSector(trk.getAlpha()); - fTrackYerr(ly) = trk.getSigmaY2(); - fTrackZerr(ly) = trk.getSigmaZ2(); - } - void SetTrackParameterNoUp(const T& trk, int32_t ly) - { - fTrackNoUpX(ly) = trk.getX(); - fTrackNoUpY(ly) = trk.getY(); - fTrackNoUpZ(ly) = trk.getZ(); - fTrackNoUpPhi(ly) = trk.getSnp(); - fTrackNoUpLambda(ly) = trk.getTgl(); - fTrackNoUpPt(ly) = trk.getPt(); - fTrackNoUpSector(ly) = GetSector(trk.getAlpha()); - fTrackNoUpYerr(ly) = trk.getSigmaY2(); - fTrackNoUpZerr(ly) = trk.getSigmaZ2(); - } - void SetTrack(const T& trk) - { - fChi2 = trk.getChi2(); - fNlayers = trk.getNlayers(); - fNtrklts = trk.getNtracklets(); - for (int32_t iLy = 0; iLy < 6; iLy++) { - if (trk.getIsFindable(iLy)) { - fFindable(iLy) = 1; - } - } - } - - // tracklet parameters - void SetRawTrackletPosition(const float fX, const float fY, const float fZ, int32_t ly) - { - fTrackletX(ly) = fX; - fTrackletY(ly) = fY; - fTrackletZ(ly) = fZ; - } - void SetCorrectedTrackletPosition(const My_Float* fYZ, int32_t ly) - { - fTrackletYcorr(ly) = fYZ[0]; - fTrackletZcorr(ly) = fYZ[1]; - } - void SetTrackletCovariance(const My_Float* fCov, int32_t ly) - { - fTrackletY2err(ly) = fCov[0]; - fTrackletYZerr(ly) = fCov[1]; - fTrackletZ2err(ly) = fCov[2]; - } - void SetTrackletProperties(const float dy, const int32_t det, int32_t ly) - { - fTrackletDy(ly) = dy; - fTrackletDet(ly) = det; - } - - // update information - void SetChi2Update(float chi2, int32_t ly) { fChi2Update(ly) = chi2; } - - // other infos - void SetRoad(float roadY, float roadZ, int32_t ly) - { - fRoadY(ly) = roadY; - fRoadZ(ly) = roadZ; - } - - void Output() - { - (*fStreamer) << "tracksFinal" - << "event=" << fEv << // event number - "nTPCtracks=" << fNTPCtracks << // total number of TPC tracks for this event - "iTrack=" << fTrk << // track index in event - "trackPtTPC=" << fPtTPC << // track pT before any propagation - "trackX.=" << &fTrackX << // x-pos of track (layerwise) - "trackY.=" << &fTrackY << // y-pos of track (layerwise) - "trackZ.=" << &fTrackZ << // z-pos of track (layerwise) - "trackPhi.=" << &fTrackPhi << // phi angle of track (track.fP[2]) - "trackLambda.=" << &fTrackLambda << // lambda angle of track (track.fP[3]) - "trackQPt.=" << &fTrackQPt << // track q/pT (track.fP[4]) - "trackPt.=" << &fTrackPt << // track pT (layerwise) - "trackYerr.=" << &fTrackYerr << // sigma_y^2 for track - "trackZerr.=" << &fTrackZerr << // sigma_z^2 for track - "trackSec.=" << &fTrackSector << // TRD sector of track - "trackNoUpX.=" << &fTrackNoUpX << // x-pos of track w/o updates (layerwise) - "trackNoUpY.=" << &fTrackNoUpY << // y-pos of track w/o updates (layerwise) - "trackNoUpZ.=" << &fTrackNoUpZ << // z-pos of track w/o updates (layerwise) - "trackNoUpPhi.=" << &fTrackNoUpPhi << // phi angle of track w/o updates (track.fP[2]) - "trackNoUpLambda.=" << &fTrackNoUpLambda << // lambda angle of track w/o updates (track.fP[3]) - "trackNoUpPt.=" << &fTrackNoUpPt << // track pT w/o updates (layerwise) - "trackNoUpYerr.=" << &fTrackNoUpYerr << // sigma_y^2 for track w/o updates - "trackNoUpZerr.=" << &fTrackNoUpZerr << // sigma_z^2 for track w/o updates - "trackNoUpSec.=" << &fTrackNoUpSector << // TRD sector of track w/o updates - "trackletX.=" << &fTrackletX << // x position of tracklet used for update (sector coords) - "trackletY.=" << &fTrackletYcorr << // y position of tracklet used for update (sector coords, tilt corrected position) - "trackletZ.=" << &fTrackletZcorr << // z position of tracklet used for update (sector coords, tilt corrected position) - "trackletYRaw.=" << &fTrackletY << // y position of tracklet used for update (sector coords) - "trackletZRaw.=" << &fTrackletZ << // z position of tracklet used for update (sector coords) - "trackletYerr.=" << &fTrackletY2err << // sigma_y^2 for tracklet - "trackletYZerr.=" << &fTrackletYZerr << // sigma_yz for tracklet - "trackletZerr.=" << &fTrackletZ2err << // sigma_z^2 for tracklet - "trackletDy.=" << &fTrackletDy << // deflection for tracklet - "trackletDet.=" << &fTrackletDet << // TRD chamber of tracklet - "chi2Update.=" << &fChi2Update << // chi2 for update - "chi2Total=" << fChi2 << // total chi2 for track - "nLayers=" << fNlayers << // number of layers in which track was findable - "nTracklets=" << fNtrklts << // number of attached tracklets - "roadY.=" << &fRoadY << // search road width in Y - "roadZ.=" << &fRoadZ << // search road width in Z - "findable.=" << &fFindable << // whether or not track was in active TRD volume (layerwise) - "\n"; - } - - private: - int32_t fEv; - int32_t fNTPCtracks; - int32_t fTrk; - float fPtTPC; - int32_t fNlayers; - float fChi2; - TVectorF fTrackX; - TVectorF fTrackY; - TVectorF fTrackZ; - TVectorF fTrackPhi; - TVectorF fTrackLambda; - TVectorF fTrackPt; - TVectorF fTrackQPt; - TVectorF fTrackSector; - TVectorF fTrackYerr; - TVectorF fTrackZerr; - TVectorF fTrackNoUpX; - TVectorF fTrackNoUpY; - TVectorF fTrackNoUpZ; - TVectorF fTrackNoUpPhi; - TVectorF fTrackNoUpLambda; - TVectorF fTrackNoUpPt; - TVectorF fTrackNoUpSector; - TVectorF fTrackNoUpYerr; - TVectorF fTrackNoUpZerr; - TVectorF fTrackletX; - TVectorF fTrackletY; - TVectorF fTrackletZ; - TVectorF fTrackletYcorr; - TVectorF fTrackletZcorr; - TVectorF fTrackletY2err; - TVectorF fTrackletYZerr; - TVectorF fTrackletZ2err; - TVectorF fTrackletDy; - TVectorF fTrackletDet; - TVectorF fChi2Update; - TVectorF fRoadY; - TVectorF fRoadZ; - TVectorF fFindable; - - TTreeSRedirector* fStreamer; -}; -template class GPUTRDTrackerDebug; -} // namespace gpu -} // namespace GPUCA_NAMESPACE +// could implement debug code, as we had for AliRoot #else -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -332,8 +46,8 @@ class GPUTRDTrackerDebug // tracklet parameters GPUd() void SetRawTrackletPosition(const float fX, const float fY, const float fZ, int32_t ly) {} - GPUd() void SetCorrectedTrackletPosition(const My_Float* fYZ, int32_t ly) {} - GPUd() void SetTrackletCovariance(const My_Float* fCov, int32_t ly) {} + GPUd() void SetCorrectedTrackletPosition(const float* fYZ, int32_t ly) {} + GPUd() void SetTrackletCovariance(const float* fCov, int32_t ly) {} GPUd() void SetTrackletProperties(const float dy, const int32_t det, int32_t ly) {} // update information @@ -346,15 +60,13 @@ class GPUTRDTrackerDebug GPUd() void Output() {} }; #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. -#ifndef GPUCA_ALIROOT_LIB template class GPUTRDTrackerDebug; -#endif #if !defined(GPUCA_STANDALONE) && !defined(GPUCA_GPUCODE) template class GPUTRDTrackerDebug; #endif #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif #endif // GPUTRDTRACKERDEBUG_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index 336e54e9efc67..376194e4a586c 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -20,7 +20,7 @@ #include "GPUReconstruction.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template GPUdii() void GPUTRDTrackerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance) diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 4430ce850fb14..8745eabb02473 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -17,7 +17,7 @@ #include "GPUGeneralKernels.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -33,6 +33,6 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDTRACKERKERNELSCA_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h index dea3f5ffe4d27..d56ee1cbbba5e 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h @@ -17,7 +17,7 @@ #ifndef GPUTRDTRACKLETLABELS_H #define GPUTRDTRACKLETLABELS_H -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -26,6 +26,6 @@ struct GPUTRDTrackletLabels { int32_t mLabel[3]; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUTRDTRACKLETLABELS_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.cxx deleted file mode 100644 index 02c6891fc45b4..0000000000000 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.cxx +++ /dev/null @@ -1,396 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTRDTrackletReaderComponent.cxx -/// \brief A pre-processing component for TRD tracking/trigger data on FEP-level - -/// \author Felix Rettig, Stefan Kirsch, Ole Schmidt - -#include -#include "AliLog.h" -#include "AliHLTDataTypes.h" -#include "AliHLTTRDDefinitions.h" -#include "GPUTRDTrackletReaderComponent.h" -#include "AliRawReaderMemory.h" -#include "AliTRDrawStream.h" -#include "GPUTRDTrackletWord.h" -#include "GPUTRDTrackletLabels.h" -#include "AliTRDtrackletWord.h" -#include "AliTRDtrackletMCM.h" -#include "TTree.h" -#include "TEventList.h" -#include "AliRunLoader.h" -#include "AliLoader.h" -#include "AliDataLoader.h" - -using namespace GPUCA_NAMESPACE::gpu; - -ClassImp(GPUTRDTrackletReaderComponent); - -#define LogError(...) \ - { \ - HLTError(__VA_ARGS__); \ - if (fDebugLevel >= 1) { \ - DbgLog("ERROR", __VA_ARGS__); \ - } \ - } -#define LogInfo(...) \ - { \ - HLTInfo(__VA_ARGS__); \ - if (fDebugLevel >= 1) { \ - DbgLog("INFO", __VA_ARGS__); \ - } \ - } -#define LogInspect(...) \ - { \ - HLTDebug(__VA_ARGS__); \ - if (fDebugLevel >= 1) { \ - DbgLog("INSPECT", __VA_ARGS__); \ - } \ - } -#define LogDebug(...) \ - { \ - if (fDebugLevel >= 1) { \ - HLTInfo(__VA_ARGS__); \ - DbgLog("DEBUG", __VA_ARGS__); \ - } \ - } - -GPUTRDTrackletReaderComponent::GPUTRDTrackletReaderComponent() - : AliHLTProcessor(), fDebugLevel(0), fEventId(fgkInvalidEventId), fTrackletArray(nullptr), fRawReaderMem(nullptr), fRawReaderTrd(nullptr) -{ - // constructor -} - -GPUTRDTrackletReaderComponent::~GPUTRDTrackletReaderComponent() -{ - // destructor -} - -const char* GPUTRDTrackletReaderComponent::GetComponentID() { return "TRDTrackletReader"; } - -void GPUTRDTrackletReaderComponent::GetInputDataTypes(vector& list) -{ - list.push_back(kAliHLTDataTypeDDLRaw | kAliHLTDataOriginTRD); - list.push_back(kAliHLTDataTypeAliTreeD | kAliHLTDataOriginTRD); -} - -AliHLTComponentDataType GPUTRDTrackletReaderComponent::GetOutputDataType() { return kAliHLTMultipleDataType; } - -int32_t GPUTRDTrackletReaderComponent::GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList) -{ - tgtList.clear(); - tgtList.push_back(AliHLTTRDDefinitions::fgkTRDTrackletDataType); - tgtList.push_back(AliHLTTRDDefinitions::fgkTRDMCTrackletDataType); - return tgtList.size(); -} - -void GPUTRDTrackletReaderComponent::GetOutputDataSize(uint64_t& constBase, double& inputMultiplier) -{ - constBase = 5000000; - inputMultiplier = 0; -} - -void GPUTRDTrackletReaderComponent::GetOCDBObjectDescription(TMap* const /*targetMap*/) {} - -AliHLTComponent* GPUTRDTrackletReaderComponent::Spawn() { return new GPUTRDTrackletReaderComponent; } - -int32_t GPUTRDTrackletReaderComponent::Reconfigure(const char* /*cdbEntry*/, const char* /*chainId*/) { return 0; } - -int32_t GPUTRDTrackletReaderComponent::ReadPreprocessorValues(const char* /*modules*/) { return 0; } - -int32_t GPUTRDTrackletReaderComponent::ScanConfigurationArgument(int argc, const char** argv) -{ - - if (argc <= 0) { - return 0; - } - - uint16_t iArg = 0; - TString argument(argv[iArg]); - - if (!argument.CompareTo("-debug")) { - if (++iArg >= argc) { - return -EPROTO; - } - argument = argv[iArg]; - fDebugLevel = argument.Atoi(); - LogInfo("debug level set to %d.", fDebugLevel); - return 2; - } - - return 0; -} - -int32_t GPUTRDTrackletReaderComponent::DoInit(int argc, const char** argv) -{ - - int32_t iResult = 0; - - do { - - fRawReaderMem = new AliRawReaderMemory; - if (!fRawReaderMem) { - iResult = -ENOMEM; - break; - } - - fTrackletArray = new TClonesArray("AliTRDtrackletWord", 1000); - if (!fTrackletArray) { - iResult = -ENOMEM; - break; - } - - fRawReaderTrd = new AliTRDrawStream(fRawReaderMem); - if (!fRawReaderTrd) { - iResult = -ENOMEM; - break; - } - - fRawReaderTrd->SetTrackletArray(fTrackletArray); - - // Disable raw reader error messages that could flood HLT logbook - AliLog::SetClassDebugLevel("AliTRDrawStream", 0); - fRawReaderTrd->SetErrorDebugLevel(AliTRDrawStream::kLinkMonitor, 1); - - } while (0); - - if (iResult < 0) { - - if (fRawReaderTrd) { - delete fRawReaderTrd; - } - fRawReaderTrd = nullptr; - - if (fRawReaderMem) { - delete fRawReaderMem; - } - fRawReaderMem = nullptr; - - if (fTrackletArray) { - delete fTrackletArray; - } - fTrackletArray = nullptr; - } - - vector remainingArgs; - for (int32_t i = 0; i < argc; ++i) { - remainingArgs.push_back(argv[i]); - } - - if (argc > 0) { - ConfigureFromArgumentString(remainingArgs.size(), &(remainingArgs[0])); - } - - return iResult; -} - -int32_t GPUTRDTrackletReaderComponent::DoDeinit() -{ - - if (fRawReaderTrd) { - delete fRawReaderTrd; - } - fRawReaderTrd = nullptr; - - if (fRawReaderMem) { - delete fRawReaderMem; - } - fRawReaderMem = nullptr; - - if (fTrackletArray) { - delete fTrackletArray; - } - fTrackletArray = nullptr; - - return 0; -} - -// void GPUTRDTrackletReaderComponent::DbgLog(const char* prefix, const char* msg){ -// AliHLTEventID_t eventNumber = fEventId; -// int32_t runNumber = -1; -// HLTInfo("TRDGM %s-%s: [PRE] %s%s", -// (runNumber >= 0) ? Form("%06d", runNumber) : "XXXXXX", -// (eventNumber != fgkInvalidEventId) ? Form("%05llu", eventNumber) : "XXXXX", -// (strlen(prefix) > 0) ? Form("<%s> ", prefix) : "", msg); -//} - -void GPUTRDTrackletReaderComponent::DbgLog(const char* prefix, ...) -{ -#ifdef __TRDHLTDEBUG - AliHLTEventID_t eventNumber = fEventId; - int32_t runNumber = -1; - printf("TRDHLTGM %s-X-%s: [PRE] %s", (runNumber >= 0) ? Form("%06d", runNumber) : "XXXXXX", (eventNumber != fgkInvalidEventId) ? Form("%05llu", eventNumber) : "XXXXX", (strlen(prefix) > 0) ? Form("<%s> ", prefix) : ""); -#endif - va_list args; - va_start(args, prefix); - char* fmt = va_arg(args, char*); - vprintf(fmt, args); - printf("\n"); - va_end(args); -} - -int32_t GPUTRDTrackletReaderComponent::DoEvent(const AliHLTComponentEventData& hltEventData, AliHLTComponentTriggerData& /*trigData*/) -{ - - fEventId = hltEventData.fEventID; - - HLTInfo("### START DoEvent [event id: %lu, %d blocks, size: %d]", hltEventData.fEventID, hltEventData.fBlockCnt, hltEventData.fStructSize); - - // event processing function - int32_t iResult = 0; - - fTrackletArray->Clear(); - fRawReaderMem->ClearBuffers(); - - if (!IsDataEvent()) { // process data events only - HLTInfo("### END DoEvent [event id: %lu, %d blocks, size: %d] (skipped: no data event)", hltEventData.fEventID, hltEventData.fBlockCnt, hltEventData.fStructSize); - return iResult; - } - - std::vector outputTrkls; - std::vector outputTrklsMC; - - { // read raw data - - TString infoStr(""); - uint32_t sourceSectors = 0; - - // loop over all incoming TRD raw data blocks - for (const AliHLTComponentBlockData* pBlock = GetFirstInputBlock(kAliHLTDataTypeDDLRaw | kAliHLTDataOriginTRD); pBlock != nullptr && iResult >= 0; pBlock = GetNextInputBlock()) { - - int32_t trdSector = -1; - - // determine sector from block specification - for (unsigned pos = 0; pos < 8 * sizeof(AliHLTUInt32_t); pos++) { - if (pBlock->fSpecification & (0x1 << pos)) { - if (trdSector >= 0) { - HLTWarning("Cannot uniquely identify DDL number from specification, skipping data block %s 0x%08x", DataType2Text(pBlock->fDataType).c_str(), pBlock->fSpecification); - trdSector = -1; - break; - } - trdSector = pos; - } - } - if (trdSector < 0) { - continue; - } - - // add data block to rawreader - infoStr += Form("%02d, ", trdSector); - sourceSectors |= pBlock->fSpecification; - if (!fRawReaderMem->AddBuffer((uint8_t*)pBlock->fPtr, pBlock->fSize, trdSector + 1024)) { - LogError("Could not add buffer of data block %s, 0x%08x to rawreader", DataType2Text(pBlock->fDataType).c_str(), pBlock->fSpecification); - continue; - } - } // loop over all incoming TRD raw data blocks - - if (sourceSectors) { - infoStr.Remove(infoStr.Length() - 2, 2); - LogDebug("preprocessing raw data from sectors: %s...", infoStr.Data()); - - // extract header info and TRD tracklets from raw data - fRawReaderTrd->ReadEvent(); - - // read and process TRD tracklets - int32_t nTracklets = fTrackletArray->GetEntriesFast(); - - HLTInfo("There are %i tracklets in this event\n", nTracklets); - for (int32_t iTracklet = 0; iTracklet < nTracklets; ++iTracklet) { - GPUTRDTrackletWord trkl = *((AliTRDtrackletWord*)fTrackletArray->At(iTracklet)); - outputTrkls.push_back(trkl); - } - LogDebug("pushing data for sectors: 0x%05x", sourceSectors); - } - fRawReaderMem->ClearBuffers(); - } - - { // loop over all incoming TRD MC tracklets data blocks - - for (const TObject* iter = GetFirstInputObject(kAliHLTDataTypeAliTreeD | kAliHLTDataOriginTRD); iter != nullptr; iter = GetNextInputObject()) { - TTree* trackletTree = dynamic_cast(const_cast(iter)); - if (!trackletTree) { - HLTFatal("No Tracklet Tree found"); - return -EINVAL; - } - - TBranch* trklbranch = trackletTree->GetBranch("mcmtrklbranch"); - if (!trklbranch) { - HLTFatal("No tracklet branch found in tracklet tree"); - return -EINVAL; - } - int32_t nTracklets = trklbranch->GetEntries(); - HLTInfo("Input tree with %d TRD MCM tracklets", nTracklets); - - //----------------------------------- - // Deploy same hack as in ITS Clusterizer - AliRunLoader* pRunLoader = AliRunLoader::Instance(); - if (!pRunLoader) { - HLTError("failed to get global runloader instance"); - return -ENOSYS; - } - pRunLoader->GetEvent(GetEventCount()); - const char* loaderType = "TRDLoader"; - AliLoader* pLoader = pRunLoader->GetLoader(loaderType); - if (!pLoader) { - HLTError("can not get loader \"%s\" from runloader", loaderType); - return -ENOSYS; - } - pLoader->LoadDigits("read"); - AliDataLoader* dataLoader = pLoader->GetDataLoader("tracklets"); - if (dataLoader) { - trackletTree = dataLoader->Tree(); - dataLoader->Load("read"); - } else { - HLTWarning("TRD tracklet loader not found"); - } - trklbranch = trackletTree->GetBranch("mcmtrklbranch"); - if (!trklbranch) { - HLTFatal("No tracklet branch found in tracklet tree"); - return -EINVAL; - } - if (trklbranch->GetEntries() != nTracklets) { - HLTFatal("Incorrect number of tracklets in tree"); - return -EINVAL; - } - //----------------------------------- - - AliTRDtrackletMCM* trkl = 0x0; - trklbranch->SetAddress(&trkl); - - for (int32_t iTracklet = 0; iTracklet < nTracklets; iTracklet++) { - int32_t nbytes = trklbranch->GetEntry(iTracklet, 1); - if (!trkl || nbytes <= 0) { - HLTWarning("Can not read entry from tracklet branch"); - continue; - } - GPUTRDTrackletWord hltTrkl = *trkl; - outputTrkls.push_back(hltTrkl); - GPUTRDTrackletLabels trklMC; - trklMC.mLabel[0] = trkl->GetLabel(0); - trklMC.mLabel[1] = trkl->GetLabel(1); - trklMC.mLabel[2] = trkl->GetLabel(2); - outputTrklsMC.push_back(trklMC); - } - } - } - - if (outputTrkls.size() > 0) { - iResult = PushBack(&outputTrkls[0], outputTrkls.size() * sizeof(outputTrkls[0]), AliHLTTRDDefinitions::fgkTRDTrackletDataType, 0); - } - if (outputTrklsMC.size() > 0) { - iResult = PushBack(&outputTrklsMC[0], outputTrklsMC.size() * sizeof(outputTrklsMC[0]), AliHLTTRDDefinitions::fgkTRDMCTrackletDataType, 0); - } - - HLTInfo("### END DoEvent [event id: %lu, %d blocks, size: %d, output tracklets: %d]", hltEventData.fEventID, hltEventData.fBlockCnt, hltEventData.fStructSize, outputTrkls.size()); - - return iResult; -} diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.h deleted file mode 100644 index 5aa60cc68291f..0000000000000 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletReaderComponent.h +++ /dev/null @@ -1,126 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTRDTrackletReaderComponent.h -/// \brief A pre-processing component for TRD tracking/trigger data on FEP-level - -/// \author Felix Rettig, Stefan Kirsch, Ole Schmidt - -#ifndef GPUTRDTRACKLETREADERCOMPONENT_H -#define GPUTRDTRACKLETREADERCOMPONENT_H - -#ifndef GPUCA_ALIROOT_LIB -#define GPUCA_ALIROOT_LIB -#endif - -#include "AliHLTProcessor.h" - -class AliRawReaderMemory; -class TTree; -class AliTRDrawStream; -class AliTRDonlineTrackingDataContainer; -class TClonesArray; - -/** - * @class GPUTRDTrackletReaderComponent - * Component fetches raw data input objects in DDL format and extracts tracklets. - * It also instantiates a RawReader in order to be used with some reconstruction. - * - * More information and examples can be found here (relative to $ALICE_ROOT): - * - * -- HLT/BASE/AliHLTComponent.h/.cxx, HLT/BASE/AliHLTProcessor.h/.cxx - * Interface definition and description - * -- HLT/SampleLib: example implementations of components - * - * - *

General properties:

- * - * Component ID: \b TRDReaderComponent
- * Library: \b libAliHLTTRD.so
- * Input Data Types: @ref kAliHLTDataTypeDDLRaw|kAliHLTDataOriginTRD
- * Output Data Types: @ref kAliHLTTrackDataTypeID|kAliHLTDataOriginTRD
- * - *

Mandatory arguments:

- * none - * - *

Optional arguments:

- * none - * - *

Configuration:

- * none - * - *

Default CDB entries:

- * none - * - *

Performance:

- * minmal - * - *

Memory consumption:

- * don't know yet - * - *

Output size:

- * not very much - * - * @ingroup The component has no output data. - */ -class GPUTRDTrackletReaderComponent : public AliHLTProcessor -{ - public: - GPUTRDTrackletReaderComponent(); - virtual ~GPUTRDTrackletReaderComponent(); - - // AliHLTComponent interface functions - const char* GetComponentID(); - void GetInputDataTypes(vector& list); - AliHLTComponentDataType GetOutputDataType(); - int32_t GetOutputDataTypes(AliHLTComponentDataTypeList& tgtList); - void GetOutputDataSize(uint64_t& constBase, double& inputMultiplier); - void GetOCDBObjectDescription(TMap* const targetMap); - - // Spawn function, return new class instance - AliHLTComponent* Spawn(); - - protected: - // AliHLTComponent interface functions - int32_t DoInit(int argc, const char** argv); - int32_t DoDeinit(); - int32_t DoEvent(const AliHLTComponentEventData& evtData, AliHLTComponentTriggerData& trigData); - int32_t ScanConfigurationArgument(int argc, const char** argv); - int32_t Reconfigure(const char* cdbEntry, const char* chainId); - int32_t ReadPreprocessorValues(const char* modules); - - using AliHLTProcessor::DoEvent; - - private: - /** copy constructor prohibited */ - GPUTRDTrackletReaderComponent(const GPUTRDTrackletReaderComponent&); - /** assignment operator prohibited */ - GPUTRDTrackletReaderComponent& operator=(const GPUTRDTrackletReaderComponent&); - - void DbgLog(const char* prefix, ...); - - // general - static const AliHLTEventID_t fgkInvalidEventId = 18446744073709551615llu; - - UShort_t fDebugLevel; //! set debug checks/output level, 0: debug off - AliHLTEventID_t fEventId; //! event ID - - // trd specific data - TClonesArray* fTrackletArray; //! internal tracklet array - - // rawreader instance - AliRawReaderMemory* fRawReaderMem; //! TRD raw reader memory instance - AliTRDrawStream* fRawReaderTrd; //! TRD raw stream instance - - ClassDef(GPUTRDTrackletReaderComponent, 0); -}; - -#endif // GPUTRDTRACKLETREADERCOMPONENT_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.cxx index cc7b6b77fd4a0..c14f80ed2d8e0 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.cxx @@ -13,7 +13,7 @@ /// \author Ole Schmidt #include "GPUTRDTrackletWord.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifndef GPUCA_TPC_GEOMETRY_O2 @@ -22,24 +22,6 @@ GPUd() GPUTRDTrackletWord::GPUTRDTrackletWord(uint32_t trackletWord) : mHCId(-1) } GPUd() GPUTRDTrackletWord::GPUTRDTrackletWord(uint32_t trackletWord, int32_t hcid) : mHCId(hcid), mTrackletWord(trackletWord) {} -#ifdef GPUCA_ALIROOT_LIB -#include "AliTRDtrackletWord.h" -#include "AliTRDtrackletMCM.h" - -GPUTRDTrackletWord::GPUTRDTrackletWord(const AliTRDtrackletWord& rhs) : mHCId(rhs.GetHCId()), mTrackletWord(rhs.GetTrackletWord()) -{ -} -GPUTRDTrackletWord::GPUTRDTrackletWord(const AliTRDtrackletMCM& rhs) : mHCId(rhs.GetHCId()), mTrackletWord(rhs.GetTrackletWord()) {} - -GPUTRDTrackletWord& GPUTRDTrackletWord::operator=(const AliTRDtrackletMCM& rhs) -{ - this->~GPUTRDTrackletWord(); - new (this) GPUTRDTrackletWord(rhs); - return *this; -} - -#endif // GPUCA_ALIROOT_LIB - GPUd() int32_t GPUTRDTrackletWord::GetYbin() const { // returns (signed) value of Y diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index 542700b7fe355..83acbcda8e3a1 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -24,7 +24,7 @@ class AliTRDtrackletWord; class AliTRDtrackletMCM; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -73,13 +73,13 @@ class GPUTRDTrackletWord // bits: 8 4 7 13 }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #else // compatibility with Run 3 data types #include "DataFormatsTRD/Tracklet64.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -109,7 +109,7 @@ class GPUTRDTrackletWord : private o2::trd::Tracklet64 static_assert(sizeof(GPUTRDTrackletWord) == sizeof(o2::trd::Tracklet64), "Incorrect memory layout"); } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/TRDTracking/macros/run_trd_tracker.C b/GPU/GPUTracking/TRDTracking/macros/run_trd_tracker.C index b098d3e16b5ea..3d86a77b6d9cc 100644 --- a/GPU/GPUTracking/TRDTracking/macros/run_trd_tracker.C +++ b/GPU/GPUTracking/TRDTracking/macros/run_trd_tracker.C @@ -40,8 +40,7 @@ #endif -using namespace GPUCA_NAMESPACE::gpu; - +using namespace o2::gpu; void run_trd_tracker(std::string path = "./", std::string inputTracks = "o2match_itstpc.root", diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 1667ad867a9e7..ff003eca78948 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -52,10 +52,6 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() set(TMP_PRE "") set(TMP_POST "") - if(NOT kernel_bounds MATCHES "_ALIR") - set(TMP_PRE "#ifdef GPUCA_KRNL_NOALIROOT\n") - set(TMP_POST "#endif\n") - endif() set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}), (${OPT3}))\n") separate_arguments(kernel_files NATIVE_COMMAND ${kernel_files}) list(GET kernel_files 0 TMP_KERNEL_CLASS_FILE) diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index 6ea59c4c2c9fe..2e67ddda7c99c 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -18,7 +18,7 @@ #include "GPUCommonAlgorithm.h" #include "GPUParam.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifndef GPUCA_GPUCODE_DEVICE GPUd() void GPUdEdx::clear() diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 8c042d51514c4..8cff279076348 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -26,7 +26,7 @@ #include "GPUDebugStreamer.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -214,6 +214,6 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri #endif // !GPUCA_HAVE_O2HEADERS } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 56e59d664491a..4c770b32ee66a 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -44,7 +44,7 @@ constexpr hmm_mat4 MY_HMM_IDENTITY = {{{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, {0, 0, 0, 1}}}; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; static const GPUSettingsDisplay& GPUDisplay_GetConfig(GPUChainTracking* chain) { diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index ab6fe540d01bf..e7836461e4fd9 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -29,7 +29,7 @@ #include "utils/timer.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -322,6 +322,6 @@ class GPUDisplay : public GPUDisplayInterface float mYFactor = 1.0f; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.cxx b/GPU/GPUTracking/display/GPUDisplayInterface.cxx index 12b7b96540f79..2f5cc9cbb5dd5 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.cxx +++ b/GPU/GPUTracking/display/GPUDisplayInterface.cxx @@ -21,7 +21,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; static constexpr const char* libName = "lib" LIBRARY_PREFIX "GPUTrackingDisplay" LIBRARY_EXTENSION; static constexpr const char* funcName = "GPUTrackingDisplayLoader"; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 49cdb7e9e2038..44ae35068cac3 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -17,7 +17,7 @@ #include "GPUSettings.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -62,6 +62,6 @@ class GPUDisplayFrontendInterface }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif // GPUDISPLAYINTERFACE_H diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index 15b759e658354..508c9d0b2e4ff 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -36,7 +36,7 @@ #include "GPUDisplay.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayBackend::GPUDisplayBackend() = default; GPUDisplayBackend::~GPUDisplayBackend() = default; diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h index 8f00f39b97ce3..c2c23f659e418 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h @@ -28,7 +28,7 @@ union hmm_mat4; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplay; class GPUDisplayFrontend; @@ -131,6 +131,6 @@ class GPUDisplayBackend std::unique_ptr mMagneticFieldVisualization; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx index b92872a79c6de..10acbea3a2586 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx @@ -30,7 +30,7 @@ #define OPENGL_EMULATE_MULTI_DRAW 0 -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifdef GPUCA_BUILD_EVENT_DISPLAY_VULKAN #include "utils/qGetLdBinarySymbols.h" diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.h b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.h index b1bcb25740ed4..d9bb50d7bead8 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.h @@ -19,7 +19,7 @@ #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GLfb { uint32_t fb_id = 0, fbCol_id = 0, fbDepth_id = 0; @@ -120,6 +120,6 @@ class GPUDisplayBackendOpenGL : public GPUDisplayBackend GLfb mMixBuffer; GLfb mOffscreenBufferMSAA, mOffscreenBuffer; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 5491555ace8d2..64167afaa536d 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -21,7 +21,7 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #include "GPUDisplayBackendVulkan.h" #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #include "utils/qGetLdBinarySymbols.h" QGET_LD_BINARY_SYMBOLS(shaders_shaders_vertex_vert_spv); diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h index 00310e58dd5a8..97c24084915f6 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h @@ -22,7 +22,7 @@ #include #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayBackendVulkan : public GPUDisplayBackend @@ -217,6 +217,6 @@ class GPUDisplayBackendVulkan : public GPUDisplayBackend vk::Fence mSingleCommitFence; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx index b48d5b994fcf8..590d8648eb5bb 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx @@ -33,15 +33,15 @@ #ifdef GPUCA_BUILD_EVENT_DISPLAY_QT #include "GPUDisplayGUIWrapper.h" #else -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayGUIWrapper { }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayFrontend::~GPUDisplayFrontend() = default; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h index 1c459d77beee9..ceb63e788564a 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h @@ -19,7 +19,7 @@ #include "GPUDisplayInterface.h" #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUReconstruction; class GPUDisplay; @@ -146,6 +146,6 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface void ExitDisplay(); // Callback to clean up the GL Display int32_t& drawTextFontSize(); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx index d9cfbedf3ba53..4d80917a26215 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.cxx @@ -46,7 +46,7 @@ extern "C" int32_t gl3wInit(); #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayFrontendGlfw::GPUDisplayFrontendGlfw() { diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h index 792abcf64b839..5276652a370a1 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlfw.h @@ -20,7 +20,7 @@ struct GLFWwindow; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendGlfw : public GPUDisplayFrontend { @@ -65,6 +65,6 @@ class GPUDisplayFrontendGlfw : public GPUDisplayFrontend uint8_t mLastKeyDown = 0; bool mUseIMGui = false; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx index ab435586820e8..334a60446a4f3 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.cxx @@ -23,7 +23,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; static GPUDisplayFrontendGlut* me = nullptr; GPUDisplayFrontendGlut::GPUDisplayFrontendGlut() diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h index 71d7d17c935c8..96f8f4af6cba5 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendGlut.h @@ -18,7 +18,7 @@ #include "GPUDisplayFrontend.h" #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendGlut : public GPUDisplayFrontend { @@ -56,6 +56,6 @@ class GPUDisplayFrontendGlut : public GPUDisplayFrontend int32_t mHeight = INIT_HEIGHT; bool mFullScreen = false; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx index 080d48b41cd93..c48000bd80685 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.cxx @@ -13,4 +13,4 @@ /// \author David Rohr #include "GPUDisplayFrontendNone.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h index c95927c6655b9..defd759ac4df6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendNone.h @@ -17,7 +17,7 @@ #include "GPUDisplayFrontend.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendNone : public GPUDisplayFrontend { @@ -31,6 +31,6 @@ class GPUDisplayFrontendNone : public GPUDisplayFrontend void SetVSync(bool enable) override {} void OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton = true) override {} }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index b920259a1e70d..ed0d08fb24add 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -37,7 +37,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayFrontendWayland::GPUDisplayFrontendWayland() { diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h index f13c6fe831e0d..6dfe0a361fbb6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.h @@ -28,7 +28,7 @@ struct xkb_context; struct xkb_keymap; struct xkb_state; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendWayland : public GPUDisplayFrontend { @@ -85,6 +85,6 @@ class GPUDisplayFrontendWayland : public GPUDisplayFrontend int32_t mWidthRequested = 0; int32_t mHeightRequested = 0; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx index d8e05a3f03b9b..8d48536e0a351 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx @@ -24,7 +24,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; HDC hDC = nullptr; // Private GDI Device Context HGLRC hRC = nullptr; // Permanent Rendering Context diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h index 62b1a7437a58f..a8534f3f0fc1f 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.h @@ -17,7 +17,7 @@ #include "GPUDisplayFrontend.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendWindows : public GPUDisplayFrontend { @@ -35,6 +35,6 @@ class GPUDisplayFrontendWindows : public GPUDisplayFrontend private: int32_t FrontendMain() override; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx index 2ef2ccca92baf..96011aa064bac 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.cxx @@ -30,7 +30,7 @@ typedef GLXContext (*glXCreateContextAttribsARBProc)(Display*, GLXFBConfig, GLXContext, Bool, const int32_t*); -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayFrontendX11::GPUDisplayFrontendX11() { diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h index 16920ce77460e..f14d05b3080bd 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendX11.h @@ -21,7 +21,7 @@ #include #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayFrontendX11 : public GPUDisplayFrontend { @@ -55,6 +55,6 @@ class GPUDisplayFrontendX11 : public GPUDisplayFrontend PFNGLXSWAPINTERVALEXTPROC mGlXSwapIntervalEXT = nullptr; bool vsync_supported = false; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx index a5ba968f9e50d..69d24538123c6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx @@ -22,9 +22,9 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUDisplayGUIWrapperObjects { std::unique_ptr app; @@ -39,7 +39,7 @@ struct GPUDisplayGUIWrapperObjects { std::mutex mutex, mutexRet; std::condition_variable signal, signalRet; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu GPUDisplayGUIWrapper::GPUDisplayGUIWrapper() { diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h index ef632c6f2d54b..00542321d6a19 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUDisplayGUIWrapperObjects; @@ -39,5 +39,5 @@ class GPUDisplayGUIWrapper void guiThread(); }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUDISPLAYGUIWRAPPER_H diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 8dccdc60c0d93..431240e93b732 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -14,7 +14,7 @@ #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; const char* HelpText[] = { "[ESC] Quit", diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx index 77b7181a3a377..6c0595b073cd0 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx @@ -14,7 +14,7 @@ #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; constexpr hmm_mat4 MY_HMM_FROM(float (&v)[16]) { return {{{v[0], v[1], v[2], v[3]}, {v[4], v[5], v[6], v[7]}, {v[8], v[9], v[10], v[11]}, {v[12], v[13], v[14], v[15]}}}; } diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx index 16166baa9a91c..b04c93ab8496e 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx @@ -32,7 +32,7 @@ #include "shaders/GPUDisplayShaders.h" #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; // Runtime minimum version defined in GPUDisplayFrontend.h, keep in sync! #define GPUCA_BUILD_EVENT_DISPLAY_OPENGL diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc b/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc index 2994af86980d8..c10e0d3a55876 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc +++ b/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc @@ -14,7 +14,7 @@ #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; inline void GPUDisplay::ActivateColor() { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index d782898380281..866d4a59aab82 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -21,7 +21,7 @@ #include "bitmapfile.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; int32_t GPUDisplay::getNumThreads() { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx index 3df61bfc81110..644995929acb7 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx @@ -15,7 +15,7 @@ #include #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUDisplay::opengl_spline::create(const vecpod& x, const vecpod& y) { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx index 27f0355e95583..ee50f32e3c1ac 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx @@ -19,7 +19,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; extern "C" void* GPUTrackingDisplayLoader(const char*, void*); diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.cxx index 352f029ab3648..7e23e6694d377 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.cxx @@ -28,7 +28,7 @@ #include "DetectorsBase/Propagator.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; GPUDisplayMagneticField::GPUDisplayMagneticField() { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.h b/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.h index f8461e4c036f4..d4eb5b571fdc0 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.h +++ b/GPU/GPUTracking/display/helpers/GPUDisplayMagneticField.h @@ -23,7 +23,7 @@ #include #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUDisplayMagneticField { @@ -141,6 +141,6 @@ class GPUDisplayMagneticField std::unique_ptr mDipoleParameterization; std::vector mFieldLineSeedPoints; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // GPUDISPLAYMAGNETICFIELD_H diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayQuaternion.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayQuaternion.cxx index 6caaabe938df4..1b77a685c0242 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayQuaternion.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayQuaternion.cxx @@ -15,7 +15,7 @@ #include "GPUDisplay.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUDisplay::createQuaternionFromMatrix(float* v, const float* mat) { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx index 4d99c3aa38cfc..dbeefc7bf9b07 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx @@ -17,7 +17,7 @@ #endif #include "GPUDisplay.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifndef GPUCA_NO_ROOT #include "Rtypes.h" // Include ROOT header first, to use ROOT and disable replacements diff --git a/GPU/GPUTracking/display/helpers/field-uniform-exporter.cxx b/GPU/GPUTracking/display/helpers/field-uniform-exporter.cxx index d8210979efa64..e57c7389e6d74 100644 --- a/GPU/GPUTracking/display/helpers/field-uniform-exporter.cxx +++ b/GPU/GPUTracking/display/helpers/field-uniform-exporter.cxx @@ -21,7 +21,7 @@ #include "GPUDisplayMagneticField.h" namespace bpo = boost::program_options; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template void saveSegments(std::ofstream& file, int32_t NSegDim1, int32_t NSegDim2, int32_t NSegDim3, GPUDisplayMagneticField::SegmentsUniform& segments) diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index ffebc373b253f..3d15077c8cf10 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -42,7 +42,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #define GET_CID(slice, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[slice][i].id : (mIOPtrs->clustersNative->clusterOffset[slice][0] + i)) diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 072119f7d528f..56ce3bef39082 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -35,7 +35,7 @@ #include "ITSMFTBase/DPLAlpideParam.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void GPUDisplay::DrawGLScene_updateEventData() { diff --git a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h index ff22006ceb37e..63673505f4732 100644 --- a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h +++ b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h @@ -16,7 +16,7 @@ #define GPUDISPLAYSHADERS_H #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -469,6 +469,6 @@ void main() { )"; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index bfa738201b637..c46419c439eb0 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -33,16 +33,16 @@ o2_gpu_kernel_file_list(MATLUT) o2_gpu_kernel_file_list(TPCMERGER) endif() -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB_ALIR single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB_ALIR both) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO_ALIR "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO_ALIR single int32_t n) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO single int32_t n) o2_gpu_add_kernel("GPUTPCGlobalTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) diff --git a/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.cxx b/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.cxx deleted file mode 100644 index 4cac2612e4cb2..0000000000000 --- a/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.cxx +++ /dev/null @@ -1,425 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLT3DTrackParam.cxx -/// \author Sergey Gorbunov - -#include "AliHLT3DTrackParam.h" -#include "TMath.h" - -ClassImp(AliHLT3DTrackParam); - -//* Transport utilities - -double AliHLT3DTrackParam::GetDStoPoint(double Bz, const double xyz[3], const double* T0) const -{ - //* Get DS = Path/Momentum to a certain space point for Bz field - - double q = fSignQ; - if (!T0) { - T0 = mParam; - } else { - q = T0[6]; - } - - const double kCLight = 0.000299792458; - double bq = Bz * q * kCLight; - double pt2 = T0[3] * T0[3] + T0[4] * T0[4]; - if (pt2 < 1.e-4) { - return 0; - } - double dx = xyz[0] - T0[0]; - double dy = xyz[1] - T0[1]; - double a = dx * T0[3] + dy * T0[4]; - double dS = 0; - if (TMath::Abs(bq) < 1.e-8) { - dS = a / pt2; - } else { - dS = TMath::ATan2(bq * a, pt2 + bq * (dy * T0[3] - dx * T0[4])) / bq; - } - return dS; -} - -void AliHLT3DTrackParam::TransportToDS(double Bz, double DS, double* T0) -{ - //* Transport the particle on DS = Path/Momentum, for Bz field - - double tmp[7]; - if (!T0) { - T0 = tmp; - T0[0] = mParam[0]; - T0[1] = mParam[1]; - T0[2] = mParam[2]; - T0[3] = mParam[3]; - T0[4] = mParam[4]; - T0[5] = mParam[5]; - T0[6] = fSignQ; - } - const double kCLight = 0.000299792458; - Bz = Bz * T0[6] * kCLight; - double bs = Bz * DS; - double s = TMath::Sin(bs), c = TMath::Cos(bs); - double sB, cB; - if (TMath::Abs(bs) > 1.e-10) { - sB = s / Bz; - cB = (1 - c) / Bz; - } else { - const Double_t kOvSqr6 = 1. / TMath::Sqrt(6.); - sB = (1. - bs * kOvSqr6) * (1. + bs * kOvSqr6) * DS; - cB = .5 * sB * bs; - } - - double px = T0[3]; - double py = T0[4]; - double pz = T0[5]; - - double d[6] = {mParam[0] - T0[0], mParam[1] - T0[1], mParam[2] - T0[2], mParam[3] - T0[3], mParam[4] - T0[4], mParam[5] - T0[5]}; - - T0[0] = T0[0] + sB * px + cB * py; - T0[1] = T0[1] - cB * px + sB * py; - T0[2] = T0[2] + DS * pz; - T0[3] = c * px + s * py; - T0[4] = -s * px + c * py; - T0[5] = T0[5]; - - // clang-format off - double mJ[6][6] = { {1, 0, 0, sB, cB, 0, }, - {0, 1, 0, -cB, sB, 0, }, - {0, 0, 1, 0, 0, DS, }, - {0, 0, 0, c, s, 0, }, - {0, 0, 0, -s, c, 0, }, - {0, 0, 0, 0, 0, 1, }}; - // clang-format on - - for (int32_t i = 0; i < 6; i++) { - mParam[i] = T0[i]; - for (int32_t j = 0; j < 6; j++) { - mParam[i] += mJ[i][j] * d[j]; - } - } - - double mA[6][6]; - for (int32_t k = 0, i = 0; i < 6; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - mA[i][j] = mA[j][i] = fCov[k]; - } - } - - double mJC[6][6]; - for (int32_t i = 0; i < 6; i++) { - for (int32_t j = 0; j < 6; j++) { - mJC[i][j] = 0; - for (int32_t k = 0; k < 6; k++) { - mJC[i][j] += mJ[i][k] * mA[k][j]; - } - } - } - - for (int32_t k = 0, i = 0; i < 6; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - fCov[k] = 0; - for (int32_t l = 0; l < 6; l++) { - fCov[k] += mJC[i][l] * mJ[j][l]; - } - } - } -} - -//* Fit utilities - -void AliHLT3DTrackParam::InitializeCovarianceMatrix() -{ - //* Initialization of covariance matrix - - for (int32_t i = 0; i < 21; i++) { - fCov[i] = 0; - } - fSignQ = 0; - fCov[0] = fCov[2] = fCov[5] = 100.; - fCov[9] = fCov[14] = fCov[20] = 10000.; - fChi2 = 0; - fNDF = -5; -} - -void AliHLT3DTrackParam::GetGlueMatrix(const double xyz[3], double G[6], const double* T0) const -{ - //* ! - - if (!T0) { - T0 = mParam; - } - - double dx = xyz[0] - T0[0], dy = xyz[1] - T0[1], dz = xyz[2] - T0[2]; - double px2 = T0[3] * T0[3], py2 = T0[4] * T0[4], pz2 = T0[5] * T0[5]; - double s2 = (dx * dx + dy * dy + dz * dz); - double p2 = px2 + py2 + pz2; - if (p2 > 1.e-4) { - s2 /= p2; - } - double x = T0[3] * s2; - double xx = px2 * s2, xy = x * T0[4], xz = x * T0[5], yy = py2 * s2, yz = T0[4] * T0[5] * s2; - G[0] = xx; - G[1] = xy; - G[2] = yy; - G[3] = xz; - G[4] = yz; - G[5] = pz2 * s2; -} - -void AliHLT3DTrackParam::Filter(const double m[3], const double V[6], const double G[6]) -{ - //* ! - - // clang-format off - double - c00 = fCov[0], - c10 = fCov[1], c11 = fCov[2], - c20 = fCov[3], c21 = fCov[4], c22 = fCov[5], - c30 = fCov[6], c31 = fCov[7], c32 = fCov[8], - c40 = fCov[10], c41 = fCov[11], c42 = fCov[12], - c50 = fCov[15], c51 = fCov[16], c52 = fCov[17]; - // clang-format on - - double z0 = m[0] - mParam[0], z1 = m[1] - mParam[1], z2 = m[2] - mParam[2]; - - double mS[6] = {c00 + V[0] + G[0], c10 + V[1] + G[1], c11 + V[2] + G[2], c20 + V[3] + G[3], c21 + V[4] + G[4], c22 + V[5] + G[5]}; - double mSi[6]; - mSi[0] = mS[4] * mS[4] - mS[2] * mS[5]; - mSi[1] = mS[1] * mS[5] - mS[3] * mS[4]; - mSi[3] = mS[2] * mS[3] - mS[1] * mS[4]; - double det = 1. / (mS[0] * mSi[0] + mS[1] * mSi[1] + mS[3] * mSi[3]); - mSi[0] *= det; - mSi[1] *= det; - mSi[3] *= det; - mSi[2] = (mS[3] * mS[3] - mS[0] * mS[5]) * det; - mSi[4] = (mS[0] * mS[4] - mS[1] * mS[3]) * det; - mSi[5] = (mS[1] * mS[1] - mS[0] * mS[2]) * det; - - fNDF += 2; - fChi2 += (+(mSi[0] * z0 + mSi[1] * z1 + mSi[3] * z2) * z0 + (mSi[1] * z0 + mSi[2] * z1 + mSi[4] * z2) * z1 + (mSi[3] * z0 + mSi[4] * z1 + mSi[5] * z2) * z2); - - double k0, k1, k2; // k = CHtS - - k0 = c00 * mSi[0] + c10 * mSi[1] + c20 * mSi[3]; - k1 = c00 * mSi[1] + c10 * mSi[2] + c20 * mSi[4]; - k2 = c00 * mSi[3] + c10 * mSi[4] + c20 * mSi[5]; - - mParam[0] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[0] -= k0 * c00 + k1 * c10 + k2 * c20; - - k0 = c10 * mSi[0] + c11 * mSi[1] + c21 * mSi[3]; - k1 = c10 * mSi[1] + c11 * mSi[2] + c21 * mSi[4]; - k2 = c10 * mSi[3] + c11 * mSi[4] + c21 * mSi[5]; - - mParam[1] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[1] -= k0 * c00 + k1 * c10 + k2 * c20; - fCov[2] -= k0 * c10 + k1 * c11 + k2 * c21; - - k0 = c20 * mSi[0] + c21 * mSi[1] + c22 * mSi[3]; - k1 = c20 * mSi[1] + c21 * mSi[2] + c22 * mSi[4]; - k2 = c20 * mSi[3] + c21 * mSi[4] + c22 * mSi[5]; - - mParam[2] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[3] -= k0 * c00 + k1 * c10 + k2 * c20; - fCov[4] -= k0 * c10 + k1 * c11 + k2 * c21; - fCov[5] -= k0 * c20 + k1 * c21 + k2 * c22; - - k0 = c30 * mSi[0] + c31 * mSi[1] + c32 * mSi[3]; - k1 = c30 * mSi[1] + c31 * mSi[2] + c32 * mSi[4]; - k2 = c30 * mSi[3] + c31 * mSi[4] + c32 * mSi[5]; - - mParam[3] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[6] -= k0 * c00 + k1 * c10 + k2 * c20; - fCov[7] -= k0 * c10 + k1 * c11 + k2 * c21; - fCov[8] -= k0 * c20 + k1 * c21 + k2 * c22; - fCov[9] -= k0 * c30 + k1 * c31 + k2 * c32; - - k0 = c40 * mSi[0] + c41 * mSi[1] + c42 * mSi[3]; - k1 = c40 * mSi[1] + c41 * mSi[2] + c42 * mSi[4]; - k2 = c40 * mSi[3] + c41 * mSi[4] + c42 * mSi[5]; - - mParam[4] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[10] -= k0 * c00 + k1 * c10 + k2 * c20; - fCov[11] -= k0 * c10 + k1 * c11 + k2 * c21; - fCov[12] -= k0 * c20 + k1 * c21 + k2 * c22; - fCov[13] -= k0 * c30 + k1 * c31 + k2 * c32; - fCov[14] -= k0 * c40 + k1 * c41 + k2 * c42; - - k0 = c50 * mSi[0] + c51 * mSi[1] + c52 * mSi[3]; - k1 = c50 * mSi[1] + c51 * mSi[2] + c52 * mSi[4]; - k2 = c50 * mSi[3] + c51 * mSi[4] + c52 * mSi[5]; - - mParam[5] += k0 * z0 + k1 * z1 + k2 * z2; - fCov[15] -= k0 * c00 + k1 * c10 + k2 * c20; - fCov[16] -= k0 * c10 + k1 * c11 + k2 * c21; - fCov[17] -= k0 * c20 + k1 * c21 + k2 * c22; - fCov[18] -= k0 * c30 + k1 * c31 + k2 * c32; - fCov[19] -= k0 * c40 + k1 * c41 + k2 * c42; - fCov[20] -= k0 * c50 + k1 * c51 + k2 * c52; - - // fit charge - - double px = mParam[3]; - double py = mParam[4]; - double pz = mParam[5]; - - double p = TMath::Sqrt(px * px + py * py + pz * pz); - double pi = 1. / p; - double qp = fSignQ * pi; - double qp3 = qp * pi * pi; - double c60 = qp3 * (c30 + c40 + c50), c61 = qp3 * (c31 + c41 + c51), c62 = qp3 * (c32 + c42 + c52); - - k0 = c60 * mSi[0] + c61 * mSi[1] + c62 * mSi[3]; - k1 = c60 * mSi[1] + c61 * mSi[2] + c62 * mSi[4]; - k2 = c60 * mSi[3] + c61 * mSi[4] + c62 * mSi[5]; - - qp += k0 * z0 + k1 * z1 + k2 * z2; - if (qp > 0) { - fSignQ = 1; - } else if (qp < 0) { - fSignQ = -1; - } else { - fSignQ = 0; - } -} - -//* Other utilities - -void AliHLT3DTrackParam::SetDirection(double Direction[3]) -{ - //* Change track direction - - if (mParam[3] * Direction[0] + mParam[4] * Direction[1] + mParam[5] * Direction[2] >= 0) { - return; - } - - mParam[3] = -mParam[3]; - mParam[4] = -mParam[4]; - mParam[5] = -mParam[5]; - fSignQ = -fSignQ; - - fCov[6] = -fCov[6]; - fCov[7] = -fCov[7]; - fCov[8] = -fCov[8]; - fCov[10] = -fCov[10]; - fCov[11] = -fCov[11]; - fCov[12] = -fCov[12]; - fCov[15] = -fCov[15]; - fCov[16] = -fCov[16]; - fCov[17] = -fCov[17]; -} - -void AliHLT3DTrackParam::RotateCoordinateSystem(double alpha) -{ - //* ! - - double cA = TMath::Cos(alpha); - double sA = TMath::Sin(alpha); - double x = mParam[0], y = mParam[1], px = mParam[3], py = mParam[4]; - mParam[0] = x * cA + y * sA; - mParam[1] = -x * sA + y * cA; - mParam[2] = mParam[2]; - mParam[3] = px * cA + py * sA; - mParam[4] = -px * sA + py * cA; - mParam[5] = mParam[5]; - - // clang-format off - double mJ[6][6] = { { cA, sA, 0, 0, 0, 0 }, - { -sA, cA, 0, 0, 0, 0 }, - { 0, 0, 1, 0, 0, 0 }, - { 0, 0, 0, cA, sA, 0 }, - { 0, 0, 0, -sA, cA, 0 }, - { 0, 0, 0, 0, 0, 1 }}; - // clang-format on - - double mA[6][6]; - for (int32_t k = 0, i = 0; i < 6; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - mA[i][j] = mA[j][i] = fCov[k]; - } - } - - double mJC[6][6]; - for (int32_t i = 0; i < 6; i++) { - for (int32_t j = 0; j < 6; j++) { - mJC[i][j] = 0; - for (int32_t k = 0; k < 6; k++) { - mJC[i][j] += mJ[i][k] * mA[k][j]; - } - } - } - - for (int32_t k = 0, i = 0; i < 6; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - fCov[k] = 0; - for (int32_t l = 0; l < 6; l++) { - fCov[k] += mJC[i][l] * mJ[j][l]; - } - } - } -} - -void AliHLT3DTrackParam::Get5Parameters(double alpha, double T[6], double C[15]) const -{ - //* ! - - AliHLT3DTrackParam t = *this; - t.RotateCoordinateSystem(alpha); - double x = t.mParam[0], y = t.mParam[1], z = t.mParam[2], px = t.mParam[3], py = t.mParam[4], pz = t.mParam[5], q = t.fSignQ; - - double p2 = px * px + py * py + pz * pz; - if (p2 < 1.e-8) { - p2 = 1; - } - double n2 = 1. / p2; - double n = sqrt(n2); - - T[5] = x; - T[0] = y; - T[1] = z; - T[2] = py / px; - T[3] = pz / px; - T[4] = q * n; - - // clang-format off - double mJ[5][6] = { { -T[2], 1, 0, 0, 0, 0 }, - { -T[3], 0, 1, 0, 0, 0 }, - { 0, 0, 0, -T[2] / px, 1. / px, 0 }, - { 0, 0, 0, -T[3] / px, 0, 1. / px }, - { 0, 0, 0, -T[4]*n2*px, -T[4]*n2*py, -T[4]*n2*pz}}; - // clang-format on - - double mA[6][6]; - for (int32_t k = 0, i = 0; i < 6; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - mA[i][j] = mA[j][i] = t.fCov[k]; - } - } - - double mJC[5][6]; - for (int32_t i = 0; i < 5; i++) { - for (int32_t j = 0; j < 6; j++) { - mJC[i][j] = 0; - for (int32_t k = 0; k < 6; k++) { - mJC[i][j] += mJ[i][k] * mA[k][j]; - } - } - } - - for (int32_t k = 0, i = 0; i < 5; i++) { - for (int32_t j = 0; j <= i; j++, k++) { - C[k] = 0; - for (int32_t l = 0; l < 6; l++) { - C[k] += mJC[i][l] * mJ[j][l]; - } - } - } -} diff --git a/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.h b/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.h deleted file mode 100644 index a762f22eb46dd..0000000000000 --- a/GPU/GPUTracking/oldFiles/AliHLT3DTrackParam.h +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file AliHLT3DTrackParam.h -/// \author Sergey Gorbunov - -#ifndef ALIHLT3DTRACKPARAM_H -#define ALIHLT3DTRACKPARAM_H - -#include "TObject.h" - -/** - * @class AliHLT3DTrackParam - */ -class AliHLT3DTrackParam : public TObject -{ - public: - //* - //* INITIALIZATION - //* - - //* Constructor - - AliHLT3DTrackParam() : fChi2(0), fNDF(0), fSignQ(0) {} - - //* Destructor (empty) - - virtual ~AliHLT3DTrackParam() {} - - //* - //* ACCESSORS - //* - - //* Simple accessors - - double GetX() const { return mParam[0]; } - double GetY() const { return mParam[1]; } - double GetZ() const { return mParam[2]; } - double GetPx() const { return mParam[3]; } - double GetPy() const { return mParam[4]; } - double GetPz() const { return mParam[5]; } - double GetChi2() const { return fChi2; } - int32_t GetNDF() const { return fNDF; } - int32_t GetCharge() const { return fSignQ; } - - double GetParameter(int32_t i) const { return mParam[i]; } - double GetCovariance(int32_t i) const { return fCov[i]; } - double GetCovariance(int32_t i, int32_t j) const { return fCov[(j <= i) ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i]; } - - //* - //* Accessors - //* - - const double* Param() const { return mParam; } - const double* Cov() const { return fCov; } - double X() const { return mParam[0]; } - double Y() const { return mParam[1]; } - double Z() const { return mParam[2]; } - double Px() const { return mParam[3]; } - double Py() const { return mParam[4]; } - double Pz() const { return mParam[5]; } - double Chi2() const { return fChi2; } - int32_t NDF() const { return fNDF; } - int32_t Charge() const { return fSignQ; } - - //* Accessors with calculations( &value, &estimated sigma ) - //* error flag returned (0 means no error during calculations) - - //* - //* MODIFIERS - //* - - void SetParam(int32_t i, double v) { mParam[i] = v; } - void SetCov(int32_t i, double v) { fCov[i] = v; } - void SetX(double v) { mParam[0] = v; } - void SetY(double v) { mParam[1] = v; } - void SetZ(double v) { mParam[2] = v; } - void SetPx(double v) { mParam[3] = v; } - void SetPy(double v) { mParam[4] = v; } - void SetPz(double v) { mParam[5] = v; } - void SetChi2(double v) { fChi2 = v; } - void SetNDF(int32_t v) { fNDF = v; } - void SetCharge(int32_t v) { fSignQ = v; } - - //* - //* UTILITIES - //* - - //* Transport utilities - - double GetDStoPoint(double Bz, const double xyz[3], const double* T0 = 0) const; - - void TransportToDS(double Bz, double DS, double* T0 = 0); - - void TransportToPoint(double Bz, const double xyz[3], double* T0 = 0) { TransportToDS(Bz, GetDStoPoint(Bz, xyz, T0), T0); } - - void TransportToPoint(double Bz, double x, double y, double z, const double* T0 = 0) - { - double xyz[3] = {x, y, z}; - TransportToPoint(Bz, xyz, T0); - } - - //* Fit utilities - - void InitializeCovarianceMatrix(); - - void GetGlueMatrix(const double p[3], double G[6], const double* T0 = 0) const; - - void Filter(const double m[3], const double V[6], const double G[6]); - - //* Other utilities - - void SetDirection(double Direction[3]); - - void RotateCoordinateSystem(double alpha); - - void Get5Parameters(double alpha, double T[6], double C[15]) const; - - protected: - double mParam[6]; // Parameters ( x, y, z, px, py, pz ): 3-position and 3-momentum - double fCov[21]; // Covariance matrix - double fChi2; // Chi^2 - int32_t fNDF; // Number of Degrees of Freedom - int32_t fSignQ; // Charge - - ClassDef(AliHLT3DTrackParam, 1); -}; - -#endif diff --git a/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.cxx b/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.cxx deleted file mode 100644 index efecae4fc9b47..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.cxx +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMOfflineFitter.cxx -/// \author Sergey Gorbunov - -#if (defined(GPUCA_ALIROOT_LIB) && !defined(GPUCA_GPUCODE)) - -#include "GPUTPCGMOfflineFitter.h" - -#include "GPUCommonMath.h" -#include "GPUTPCGMMergedTrack.h" -#include "GPUTPCGMMergedTrackHit.h" -#include "AliHLTTPCGeometry.h" -#include -#include "AliTracker.h" -#include "AliMagF.h" -#include "AliExternalTrackParam.h" -#include "AliTPCtracker.h" -#include "AliTPCParam.h" -#include "AliTPCseed.h" -#include "AliTPCclusterMI.h" -#include "AliTPCcalibDB.h" -#include "AliTPCParamSR.h" -#include "GPUTPCGMPropagator.h" -#include "AliTPCReconstructor.h" -#include "AliHLTTPCClusterTransformation.h" - -#define DOUBLE 1 - -GPUTPCGMOfflineFitter::GPUTPCGMOfflineFitter() : fCAParam() {} - -GPUTPCGMOfflineFitter::~GPUTPCGMOfflineFitter() {} - -void GPUTPCGMOfflineFitter::Initialize(const GPUParam& hltParam, long TimeStamp, bool isMC) -{ - // - - AliHLTTPCClusterTransformation hltTransform; - hltTransform.Init(0., TimeStamp, isMC, 1); - - // initialisation of AliTPCtracker as it is done in AliTPCReconstructor.cxx - - AliTPCcalibDB* calib = AliTPCcalibDB::Instance(); - const AliMagF* field = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - calib->SetExBField(field); - - AliTPCParam* param = AliTPCcalibDB::Instance()->GetParameters(); - if (!param) { - AliWarning("Loading default TPC parameters !"); - param = new AliTPCParamSR; - } - param->ReadGeoMatrices(); - - AliTPCReconstructor* tpcRec = new AliTPCReconstructor(); - tpcRec->SetRecoParam(AliTPCcalibDB::Instance()->GetTransform()->GetCurrentRecoParam()); - - //(this)->~AliTPCtracker(); //call the destructor explicitly - // new (this) AliTPCtracker(param); // call the constructor - - AliTPCtracker::fSectors = AliTPCtracker::fInnerSec; - // AliTPCReconstructor::ParseOptions(tracker); : not important, it only set useHLTClusters flag - - fCAParam = hltParam; -} - -void GPUTPCGMOfflineFitter::RefitTrack(GPUTPCGMMergedTrack& track, const GPUTPCGMPolynomialField* field, GPUTPCGMMergedTrackHit* clusters) -{ - // copy of HLT RefitTrack() with calling of the offline fit utilities - - if (!track.OK()) { - return; - } - - int32_t nTrackHits = track.NClusters(); - cout << "call FitOffline .. " << endl; - bool ok = FitOffline(field, track, clusters + track.FirstClusterRef(), nTrackHits); - cout << ".. end of call FitOffline " << endl; - - GPUTPCGMTrackParam t = track.Param(); - float Alpha = track.Alpha(); - - if (fabsf(t.QPt()) < 1.e-4) { - t.QPt() = 1.e-4; - } - - track.SetOK(ok); - track.SetNClustersFitted(nTrackHits); - track.Param() = t; - track.Alpha() = Alpha; - - { - int32_t ind = track.FirstClusterRef(); - float alphaa = fCAParam.Alpha(clusters[ind].slice); - float xx = clusters[ind].fX; - float yy = clusters[ind].fY; - float zz = clusters[ind].fZ - track.Param().GetZOffset(); - float sinA = CAMath::Sin(alphaa - track.Alpha()); - float cosA = CAMath::Cos(alphaa - track.Alpha()); - track.SetLastX(xx * cosA - yy * sinA); - track.SetLastY(xx * sinA + yy * cosA); - track.SetLastZ(zz); - } -} - -int32_t GPUTPCGMOfflineFitter::CreateTPCclusterMI(const GPUTPCGMMergedTrackHit& h, AliTPCclusterMI& c) -{ - // Create AliTPCclusterMI for the HLT hit - - AliTPCclusterMI tmp; // everything is set to 0 by constructor - c = tmp; - - // add the information we have - - Int_t sector, row; - AliHLTTPCGeometry::Slice2Sector(h.slice, h.row, sector, row); - c.SetDetector(sector); - c.SetRow(row); // ?? is it right row numbering for the TPC tracker ?? - c.SetX(h.fX); - c.SetY(h.fY); - c.SetZ(h.fZ); - int32_t index = (((sector << 8) + row) << 16) + 0; - return index; -} - -bool GPUTPCGMOfflineFitter::FitOffline(const GPUTPCGMPolynomialField* field, GPUTPCGMMergedTrack& gmtrack, GPUTPCGMMergedTrackHit* clusters, int32_t& N) -{ - const float maxSinPhi = GPUCA_MAX_SIN_PHI; - - int32_t maxN = N; - float covYYUpd = 0.; - float lastUpdateX = -1.; - - const bool rejectChi2ThisRound = 0; - const bool markNonFittedClusters = 0; - const double kDeg2Rad = 3.14159265358979323846 / 180.; - const float maxSinForUpdate = CAMath::Sin(70. * kDeg2Rad); - - bool ok = 1; - - AliTPCtracker::SetIteration(2); - - AliTPCseed seed; - gmtrack.Param().GetExtParam(seed, gmtrack.Alpha()); - - AliTPCtracker::AddCovariance(&seed); - - N = 0; - lastUpdateX = -1; - - // find last leg - int32_t ihitStart = 0; - for (int32_t ihit = 0; ihit < maxN; ihit++) { - if (clusters[ihit].leg != clusters[ihitStart].leg) { - ihitStart = ihit; - } - } - - for (int32_t ihit = ihitStart; ihit < maxN; ihit++) { - if (clusters[ihit].fState < 0) { - continue; // hit is excluded from fit - } - float xx = clusters[ihit].fX; - float yy = clusters[ihit].fY; - float zz = clusters[ihit].fZ; - - if (DOUBLE && ihit + 1 >= 0 && ihit + 1 < maxN && clusters[ihit].row == clusters[ihit + 1].row) { - float count = 1.; - do { - if (clusters[ihit].slice != clusters[ihit + 1].slice || clusters[ihit].leg != clusters[ihit + 1].leg || fabsf(clusters[ihit].fY - clusters[ihit + 1].fY) > 4. || fabsf(clusters[ihit].fZ - clusters[ihit + 1].fZ) > 4.) { - break; - } - ihit += 1; - xx += clusters[ihit].fX; - yy += clusters[ihit].fY; - zz += clusters[ihit].fZ; - count += 1.; - } while (ihit + 1 >= 0 && ihit + 1 < maxN && clusters[ihit].row == clusters[ihit + 1].row); - xx /= count; - yy /= count; - zz /= count; - } - - // Create AliTPCclusterMI for the hit - - AliTPCclusterMI cluster; - Int_t tpcindex = CreateTPCclusterMI(clusters[ihit], cluster); - if (tpcindex < 0) { - continue; - } - Double_t sy2 = 0, sz2 = 0; - AliTPCtracker::ErrY2Z2(&seed, &cluster, sy2, sz2); - cluster.SetSigmaY2(sy2); - cluster.SetSigmaZ2(sz2); - cluster.SetQ(10); - cluster.SetMax(10); - - Int_t iRow = clusters[ihit].row; - - if (iRow < AliHLTTPCGeometry::GetNRowLow()) { - AliTPCtracker::fSectors = AliTPCtracker::fInnerSec; - } else { - AliTPCtracker::fSectors = AliTPCtracker::fOuterSec; - } - - seed.SetClusterIndex2(iRow, tpcindex); - seed.SetClusterPointer(iRow, &cluster); - seed.SetCurrentClusterIndex1(tpcindex); - - int32_t retVal; - float threshold = 3. + (lastUpdateX >= 0 ? (fabsf(seed.GetX() - lastUpdateX) / 2) : 0.); - if (N > 2 && (fabsf(yy - seed.GetY()) > threshold || fabsf(zz - seed.GetZ()) > threshold)) { - retVal = 2; - } else { - Int_t err = !(AliTPCtracker::FollowToNext(seed, iRow)); - - const int32_t err2 = N > 0 && CAMath::Abs(seed.GetSnp()) >= maxSinForUpdate; - if (err || err2) { - if (markNonFittedClusters) { - if (N > 0 && (fabsf(yy - seed.GetY()) > 3 || fabsf(zz - seed.GetZ()) > 3)) { - clusters[ihit].fState = -2; - } else if (err && err >= -3) { - clusters[ihit].fState = -1; - } - } - continue; - } - - // retVal = prop.Update( yy, zz, clusters[ihit].row, param, rejectChi2ThisRound); - retVal = 0; - } - - if (retVal == 0) // track is updated - { - lastUpdateX = seed.GetX(); - covYYUpd = seed.GetCovariance()[0]; - ihitStart = ihit; - N++; - } else if (retVal == 2) { // cluster far away form the track - if (markNonFittedClusters) { - clusters[ihit].fState = -2; - } - } else { - break; // bad chi2 for the whole track, stop the fit - } - } // end loop over clusters - - GPUTPCGMTrackParam t; - t.SetExtParam(seed); - - float Alpha = seed.GetAlpha(); - - t.ConstrainSinPhi(); - - bool ok1 = N >= GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(t.GetQPt()) && t.CheckNumericalQuality(covYYUpd); - if (!ok1) { - return (false); - } - - // const float kDeg2Rad = 3.1415926535897 / 180.f; - const float kSectAngle = 2 * 3.1415926535897 / 18.f; - - if (fCAParam.GetTrackReferenceX() <= 500) { - GPUTPCGMPropagator prop; - prop.SetMaterialTPC(); - prop.SetPolynomialField(field); - prop.SetMaxSinPhi(maxSinPhi); - prop.SetToyMCEventsFlag(fCAParam.ToyMCEventsFlag()); - - for (int32_t k = 0; k < 3; k++) // max 3 attempts - { - int32_t err = prop.PropagateToXAlpha(fCAParam.GetTrackReferenceX(), Alpha, 0); - t.ConstrainSinPhi(); - if (fabsf(t.GetY()) <= t.GetX() * tan(kSectAngle / 2.f)) { - break; - } - float dAngle = floor(atan2(t.GetY(), t.GetX()) / kDeg2Rad / 20.f + 0.5f) * kSectAngle; - Alpha += dAngle; - if (err || k == 2) { - t.Rotate(dAngle); - break; - } - } - } else if (fabsf(t.GetY()) > t.GetX() * tan(kSectAngle / 2.f)) { - float dAngle = floor(atan2(t.GetY(), t.GetX()) / kDeg2Rad / 20.f + 0.5f) * kSectAngle; - t.Rotate(dAngle); - Alpha += dAngle; - } - if (Alpha > 3.1415926535897) { - Alpha -= 2 * 3.1415926535897; - } else if (Alpha <= -3.1415926535897) { - Alpha += 2 * 3.1415926535897; - } - - gmtrack.Param() = t; - gmtrack.Alpha() = Alpha; - - return (ok); -} - -#endif diff --git a/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.h b/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.h deleted file mode 100644 index 8daf8102a8d20..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCGMOfflineFitter.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMOfflineFitter.h -/// \author Sergey Gorbunov - -#ifndef GPUTPCGMOfflineFitter_H -#define GPUTPCGMOfflineFitter_H - -#if (defined(GPUCA_ALIROOT_LIB) && !defined(GPUCA_GPUCODE)) - -#include "GPUParam.h" -#include "AliTPCtracker.h" - -class GPUTPCGMMergedTrack; -class GPUTPCGMMergedTrackHit; -class AliTPCclusterMI; -class GPUTPCGMPolynomialField; - -class GPUTPCGMOfflineFitter : public AliTPCtracker -{ - public: - GPUTPCGMOfflineFitter(); - ~GPUTPCGMOfflineFitter(); - - void Initialize(const GPUParam& hltParam, long TimeStamp, bool isMC); - - void RefitTrack(GPUTPCGMMergedTrack& track, const GPUTPCGMPolynomialField* field, GPUTPCGMMergedTrackHit* clusters); - - int32_t CreateTPCclusterMI(const GPUTPCGMMergedTrackHit& h, AliTPCclusterMI& c); - - bool FitOffline(const GPUTPCGMPolynomialField* field, GPUTPCGMMergedTrack& gmtrack, GPUTPCGMMergedTrackHit* clusters, int32_t& N); - - private: - GPUParam fCAParam; -}; - -#endif - -#endif diff --git a/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.cxx b/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.cxx deleted file mode 100644 index 83a9225afd86d..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.cxx +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCMCPoint.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUTPCMCPoint.h" - -GPUTPCMCPoint::GPUTPCMCPoint() : fX(0), fY(0), fZ(0), fSx(0), fSy(0), fSz(0), fTime(0), mISlice(0), fTrackID(0) -{ - //* Default constructor -} diff --git a/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.h b/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.h deleted file mode 100644 index be283433061e3..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCMCPoint.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCMCPoint.h -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#ifndef GPUTPCMCPOINT_H -#define GPUTPCMCPOINT_H - -#include "GPUTPCDef.h" - -/** - * @class GPUTPCMCPoint - * store MC point information for GPUTPCPerformance - */ -class GPUTPCMCPoint -{ - public: - GPUTPCMCPoint(); - - float X() const { return fX; } - float Y() const { return fY; } - float Z() const { return fZ; } - float Sx() const { return fSx; } - float Sy() const { return fSy; } - float Sz() const { return fSz; } - float Time() const { return fTime; } - int32_t ISlice() const { return mISlice; } - int32_t TrackID() const { return fTrackID; } - - void SetX(float v) { fX = v; } - void SetY(float v) { fY = v; } - void SetZ(float v) { fZ = v; } - void SetSx(float v) { fSx = v; } - void SetSy(float v) { fSy = v; } - void SetSz(float v) { fSz = v; } - void SetTime(float v) { fTime = v; } - void SetISlice(int32_t v) { mISlice = v; } - void SetTrackID(int32_t v) { fTrackID = v; } - - static bool Compare(const GPUTPCMCPoint& p1, const GPUTPCMCPoint& p2) - { - if (p1.fTrackID != p2.fTrackID) { - return (p1.fTrackID < p2.fTrackID); - } - if (p1.mISlice != p2.mISlice) { - return (p1.mISlice < p2.mISlice); - } - return (p1.Sx() < p2.Sx()); - } - - static bool CompareSlice(const GPUTPCMCPoint& p, int32_t slice) { return (p.ISlice() < slice); } - - static bool CompareX(const GPUTPCMCPoint& p, float X) { return (p.Sx() < X); } - - protected: - float fX; //* global X position - float fY; //* global Y position - float fZ; //* global Z position - float fSx; //* slice X position - float fSy; //* slice Y position - float fSz; //* slice Z position - float fTime; //* time - int32_t mISlice; //* slice number - int32_t fTrackID; //* mc track number -}; - -#endif // GPUTPCMCPOINT_H diff --git a/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.cxx b/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.cxx deleted file mode 100644 index 12a52e0e04b4f..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.cxx +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCMCTrack.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUTPCMCTrack.h" -#include "GPUCommonMath.h" -#include "TDatabasePDG.h" -#include "TParticle.h" - -GPUTPCMCTrack::GPUTPCMCTrack() : fPDG(0), fP(0), fPt(0), mNHits(0), fNMCPoints(0), fFirstMCPointID(0), fNReconstructed(0), fSet(0), fNTurns(0) -{ - //* Default constructor - for (int32_t i = 0; i < 7; i++) { - fPar[i] = 0; - fTPCPar[i] = 0; - } -} - -GPUTPCMCTrack::GPUTPCMCTrack(const TParticle* part) : fPDG(0), fP(0), fPt(0), mNHits(0), fNMCPoints(0), fFirstMCPointID(0), fNReconstructed(0), fSet(0), fNTurns(0) -{ - //* Constructor from TParticle - - for (int32_t i = 0; i < 7; i++) { - fPar[i] = 0; - } - for (int32_t i = 0; i < 7; i++) { - fTPCPar[i] = 0; - } - fP = 0; - fPt = 0; - - if (!part) { - return; - } - TLorentzVector mom, vtx; - part->ProductionVertex(vtx); - part->Momentum(mom); - fPar[0] = part->Vx(); - fPar[1] = part->Vy(); - fPar[2] = part->Vz(); - fP = part->P(); - fPt = part->Pt(); - double pi = (fP > 1.e-4) ? 1. / fP : 0; - fPar[3] = part->Px() * pi; - fPar[4] = part->Py() * pi; - fPar[5] = part->Pz() * pi; - fPar[6] = 0; - fPDG = part->GetPdgCode(); - if (CAMath::Abs(fPDG) < 100000) { - TParticlePDG* pPDG = TDatabasePDG::Instance()->GetParticle(fPDG); - if (pPDG) { - fPar[6] = pPDG->Charge() / 3.0 * pi; - } - } -} - -void GPUTPCMCTrack::SetTPCPar(float X, float Y, float Z, float Px, float Py, float Pz) -{ - //* Set parameters at TPC entrance - - for (int32_t i = 0; i < 7; i++) { - fTPCPar[i] = 0; - } - - fTPCPar[0] = X; - fTPCPar[1] = Y; - fTPCPar[2] = Z; - double p = CAMath::Sqrt(Px * Px + Py * Py + Pz * Pz); - double pi = (p > 1.e-4) ? 1. / p : 0; - fTPCPar[3] = Px * pi; - fTPCPar[4] = Py * pi; - fTPCPar[5] = Pz * pi; - fTPCPar[6] = 0; - if (CAMath::Abs(fPDG) < 100000) { - TParticlePDG* pPDG = TDatabasePDG::Instance()->GetParticle(fPDG); - if (pPDG) { - fTPCPar[6] = pPDG->Charge() / 3.0 * pi; - } - } -} diff --git a/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.h b/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.h deleted file mode 100644 index 9f2e4da6ada54..0000000000000 --- a/GPU/GPUTracking/oldFiles/GPUTPCMCTrack.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCMCTrack.h -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#ifndef GPUTPCMCTRACK_H -#define GPUTPCMCTRACK_H - -#include "GPUTPCDef.h" - -class TParticle; - -/** - * @class GPUTPCMCTrack - * store MC track information for GPUTPCPerformance - */ -class GPUTPCMCTrack -{ - public: - GPUTPCMCTrack(); - GPUTPCMCTrack(const TParticle* part); - - void SetTPCPar(float X, float Y, float Z, float Px, float Py, float Pz); - - int32_t PDG() const { return fPDG; } - const double* Par() const { return fPar; } - const double* TPCPar() const { return fTPCPar; } - double P() const { return fP; } - double Pt() const { return fPt; } - - int32_t NHits() const { return mNHits; } - int32_t NMCPoints() const { return fNMCPoints; } - int32_t FirstMCPointID() const { return fFirstMCPointID; } - int32_t NReconstructed() const { return fNReconstructed; } - int32_t Set() const { return fSet; } - int32_t NTurns() const { return fNTurns; } - - void SetP(float v) { fP = v; } - void SetPt(float v) { fPt = v; } - void SetPDG(int32_t v) { fPDG = v; } - void SetPar(int32_t i, double v) { fPar[i] = v; } - void SetTPCPar(int32_t i, double v) { fTPCPar[i] = v; } - void SetNHits(int32_t v) { mNHits = v; } - void SetNMCPoints(int32_t v) { fNMCPoints = v; } - void SetFirstMCPointID(int32_t v) { fFirstMCPointID = v; } - void SetNReconstructed(int32_t v) { fNReconstructed = v; } - void SetSet(int32_t v) { fSet = v; } - void SetNTurns(int32_t v) { fNTurns = v; } - - protected: - int32_t fPDG; //* particle pdg code - double fPar[7]; //* x,y,z,ex,ey,ez,q/p - double fTPCPar[7]; //* x,y,z,ex,ey,ez,q/p at TPC entrance (x=y=0 means no information) - double fP, fPt; //* momentum and transverse momentum - int32_t mNHits; //* N TPC clusters - int32_t fNMCPoints; //* N MC points - int32_t fFirstMCPointID; //* id of the first MC point in the points array - int32_t fNReconstructed; //* how many times is reconstructed - int32_t fSet; //* set of tracks 0-OutSet, 1-ExtraSet, 2-RefSet - int32_t fNTurns; //* N of turns in the current sector -}; - -#endif // GPUTPCMCTrack diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 59293dbfd1812..34318a1bab613 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -78,7 +78,7 @@ #include "utils/qconfig.h" #include "utils/timer.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #ifdef GPUCA_MERGER_BY_MC_LABEL #define CHECK_CLUSTER_STATE_INIT_LEG_BY_MC() \ @@ -315,12 +315,12 @@ void GPUQA::createHist(T*& h, const char* name, Args... args) p.second->emplace_back(&h); } -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { struct GPUQAGarbageCollection { std::tuple>, std::vector>, std::vector>, std::vector>, std::vector>> v; }; -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu template T* GPUQA::createGarbageCollected(Args... args) @@ -1379,7 +1379,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx prop.SetTrack(¶m, alpha); bool inFlyDirection = 0; if (mConfig.strict) { - const float dx = param.X() - std::max(mclocal[0], TRACK_EXPECTED_REFERENCE_X_DEFAULT); // Limit distance check if the O2 MC position is farther inside than the AliRoot MC position. + const float dx = param.X() - std::max(mclocal[0], TRACK_EXPECTED_REFERENCE_X_DEFAULT); // Limit distance check const float dy = param.Y() - mclocal[1]; const float dz = getdz(); if (dx * dx + dy * dy + dz * dz > 5.f * 5.f) { diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index b3175d9fd32c7..76774f740477f 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -33,7 +33,7 @@ typedef int16_t Color_t; #if !defined(GPUCA_BUILD_QA) || defined(GPUCA_GPUCODE) -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -60,7 +60,7 @@ class GPUQA void UpdateChain(GPUChainTracking* chain) {} }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #else @@ -84,7 +84,7 @@ struct ClusterNativeAccess; struct AliHLTTPCClusterMCLabel; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { class GPUChainTracking; struct GPUParam; @@ -352,7 +352,7 @@ inline bool GPUQA::SuppressTrack(int32_t iTrack) const { return (mConfig.matchMC inline bool GPUQA::SuppressHit(int32_t iHit) const { return (mConfig.matchMCLabels.size() && !mGoodHits[mNEvents - 1][iHit]); } inline int32_t GPUQA::HitAttachStatus(int32_t iHit) const { return (mClusterParam.size() && mClusterParam[iHit].fakeAttached ? (mClusterParam[iHit].attached ? 1 : 2) : 0); } -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif #endif diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 20c9b7bec096c..3bd4779dd13f0 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -40,9 +40,9 @@ #include "../utils/qconfig.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace std; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { extern GPUSettingsStandalone configStandalone; } diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index 943fa1e787674..43c091099bcf0 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -76,6 +76,6 @@ class genEvents #endif } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/GPUTracking/utils/qlibload.h b/GPU/GPUTracking/utils/qlibload.h index 85d81236fde3c..a0dfca8ef3f1e 100644 --- a/GPU/GPUTracking/utils/qlibload.h +++ b/GPU/GPUTracking/utils/qlibload.h @@ -29,9 +29,7 @@ #define LIBRARY_FUNCTION dlsym #endif -#if defined(GPUCA_ALIROOT_LIB) -#define LIBRARY_PREFIX "Ali" -#elif defined(GPUCA_O2_LIB) +#if defined(GPUCA_O2_LIB) #define LIBRARY_PREFIX "O2" #else #define LIBRARY_PREFIX "" diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.cxx b/GPU/TPCFastTransformation/BandMatrixSolver.cxx index 680216d6e14de..d05d1ed221b4e 100644 --- a/GPU/TPCFastTransformation/BandMatrixSolver.cxx +++ b/GPU/TPCFastTransformation/BandMatrixSolver.cxx @@ -21,9 +21,9 @@ #include using namespace std; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -templateClassImp(GPUCA_NAMESPACE::gpu::BandMatrixSolver); +templateClassImp(o2::gpu::BandMatrixSolver); template <> int32_t BandMatrixSolver<0>::test(bool prn) @@ -193,4 +193,4 @@ int32_t BandMatrixSolver<0>::test(bool prn) return ok && ok1; } -template class GPUCA_NAMESPACE::gpu::BandMatrixSolver<0>; +template class o2::gpu::BandMatrixSolver<0>; diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.h b/GPU/TPCFastTransformation/BandMatrixSolver.h index b548ad60f58e8..f11f538e49275 100644 --- a/GPU/TPCFastTransformation/BandMatrixSolver.h +++ b/GPU/TPCFastTransformation/BandMatrixSolver.h @@ -25,7 +25,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -113,9 +113,7 @@ class BandMatrixSolver std::vector mA; std::vector mB; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(BandMatrixSolver, 0); -#endif }; template <> @@ -260,6 +258,6 @@ inline void BandMatrixSolver::solveType1() } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index b338e1492cc6c..32c22c5193603 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -119,60 +119,6 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") endif() -if(${ALIGPU_BUILD_TYPE} STREQUAL "ALIROOT") - add_definitions(-DGPUCA_ALIROOT_LIB -DGPUCA_NO_FMT) - - set(SRCS ${SRCS} TPCFastTransformManager.cxx TPCFastTransformQA.cxx - ${AliRoot_SOURCE_DIR}/HLT/TPCLib/AliHLTTPCGeometry.cxx - ${AliRoot_SOURCE_DIR}/HLT/TPCLib/AliHLTTPCLog.cxx) - #set(HDRS_CINT ${HDRS_CINT_O2} TPCFastTransformManager.h TPCFastTransformQA.h ) - set(HDRS_CINT TPCFastTransformManager.h TPCFastTransformQA.h ) - - # Enable Vc - alice_usevc() - - include_directories(SYSTEM ${ROOT_INCLUDE_DIR}) - include_directories(${AliRoot_SOURCE_DIR}/GPU/TPCFastTransformation - ${AliRoot_SOURCE_DIR}/GPU/TPCFastTransformation/devtools - ${AliRoot_SOURCE_DIR}/GPU/Common - ${AliRoot_SOURCE_DIR}/GPU/Utils - ${AliRoot_SOURCE_DIR}/HLT/BASE - ${AliRoot_SOURCE_DIR}/HLT/TPCLib - ${AliRoot_SOURCE_DIR}/TPC/TPCbase - ${AliRoot_SOURCE_DIR}/STEER/STEERBase) - - # Generate the dictionary - get_directory_property(incdirs INCLUDE_DIRECTORIES) - generate_dictionary_flat("Ali${MODULE}" "TPCFastTransformationLinkDef_AliRoot.h" - "${HDRS_CINT}" "${incdirs}") - - # Generate the ROOT map Dependecies - set(LIBDEPS STEERBase HLTbase TPCbase) - generate_rootmap( - "Ali${MODULE}" "${LIBDEPS}" - "${CMAKE_CURRENT_SOURCE_DIR}/TPCFastTransformationLinkDef_AliRoot.h") - # Don't pass Vc to root - set(LIBDEPS ${LIBDEPS} Vc) - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS} G__Ali${MODULE}.cxx) - target_link_libraries(Ali${MODULE} ${LIBDEPS}) - - # Additional compilation flags - set_target_properties(Ali${MODULE} PROPERTIES COMPILE_FLAGS "") - - # System dependent: Modify the way the library is build - if(${CMAKE_SYSTEM} MATCHES Darwin) - set_target_properties(Ali${MODULE} - PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - endif(${CMAKE_SYSTEM} MATCHES Darwin) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS_CINT_O2} DESTINATION include) -endif() - if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS}) endif() diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx index 011243f5e158f..d709e5b9af92d 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx @@ -20,7 +20,7 @@ #include "GPUCommonLogger.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; void ChebyshevFit1D::reset(int32_t order, double xMin, double xMax) { diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.h b/GPU/TPCFastTransformation/ChebyshevFit1D.h index f1726ce063f64..591641839dd20 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.h +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.h @@ -20,7 +20,7 @@ #include "GPUCommonDef.h" #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -106,6 +106,6 @@ inline double ChebyshevFit1D::eval(double x) } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx index 26f565b15dab4..e12c98a8a400b 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.cxx @@ -12,7 +12,7 @@ #include "CorrectionMapsHelper.h" #include "GPUCommonLogger.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; //________________________________________________________ void CorrectionMapsHelper::clear() diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h index 100f871f2ec63..32ff6e1f06b10 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h @@ -23,7 +23,7 @@ #include "GPUCommonDef.h" #include "TPCFastTransform.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -57,15 +57,15 @@ class CorrectionMapsHelper mCorrMap->InverseTransformYZtoNominalYZ(slice, row, y, z, ny, nz, mCorrMapRef, mCorrMapMShape, (mScaleInverse ? mLumiScale : 0), (mScaleInverse ? 1 : 0), mLumiScaleMode); } - GPUd() const GPUCA_NAMESPACE::gpu::TPCFastTransform* getCorrMap() const { return mCorrMap; } - GPUd() const GPUCA_NAMESPACE::gpu::TPCFastTransform* getCorrMapRef() const { return mCorrMapRef; } - GPUd() const GPUCA_NAMESPACE::gpu::TPCFastTransform* getCorrMapMShape() const { return mCorrMapMShape; } + GPUd() const o2::gpu::TPCFastTransform* getCorrMap() const { return mCorrMap; } + GPUd() const o2::gpu::TPCFastTransform* getCorrMapRef() const { return mCorrMapRef; } + GPUd() const o2::gpu::TPCFastTransform* getCorrMapMShape() const { return mCorrMapMShape; } bool getOwner() const { return mOwner; } - void setCorrMap(GPUCA_NAMESPACE::gpu::TPCFastTransform* m); - void setCorrMapRef(GPUCA_NAMESPACE::gpu::TPCFastTransform* m); - void setCorrMapMShape(GPUCA_NAMESPACE::gpu::TPCFastTransform* m); + void setCorrMap(o2::gpu::TPCFastTransform* m); + void setCorrMapRef(o2::gpu::TPCFastTransform* m); + void setCorrMapMShape(o2::gpu::TPCFastTransform* m); void reportScaling(); void setInstLumiCTP(float v) { @@ -125,9 +125,9 @@ class CorrectionMapsHelper void setUpdatedLumi() { mUpdatedFlags |= UpdateFlags::LumiBit; } #if !defined(GPUCA_GPUCODE_DEVICE) - void setCorrMap(std::unique_ptr&& m); - void setCorrMapRef(std::unique_ptr&& m); - void setCorrMapMShape(std::unique_ptr&& m); + void setCorrMap(std::unique_ptr&& m); + void setCorrMapRef(std::unique_ptr&& m); + void setCorrMapMShape(std::unique_ptr&& m); #endif void setOwner(bool v); void acknowledgeUpdate() { mUpdatedFlags = 0; } @@ -181,15 +181,13 @@ class CorrectionMapsHelper float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP bool mEnableMShape = false; ///< use v shape correction bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again - GPUCA_NAMESPACE::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform - GPUCA_NAMESPACE::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform - GPUCA_NAMESPACE::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side -#ifndef GPUCA_ALIROOT_LIB + o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform + o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform + o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side ClassDefNV(CorrectionMapsHelper, 6); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/MultivariatePolynomial.h b/GPU/TPCFastTransformation/MultivariatePolynomial.h index 8da69b6978134..4fd2157409133 100644 --- a/GPU/TPCFastTransformation/MultivariatePolynomial.h +++ b/GPU/TPCFastTransformation/MultivariatePolynomial.h @@ -28,7 +28,7 @@ #endif #endif -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { /// Class for multivariate polynomials. @@ -158,9 +158,7 @@ class MultivariatePolynomial : public FlatObject, public MultivariatePolynomialH void construct(); #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(MultivariatePolynomial, 1); -#endif }; //================================================================================= @@ -177,9 +175,7 @@ void MultivariatePolynomial::loadFromFile(TFile& i setFromContainer(*polTmp); delete polTmp; } else { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "couldnt load object {} from input file", name); -#endif } } @@ -188,21 +184,15 @@ void MultivariatePolynomial::setFromContainer(cons { if constexpr (Dim > 0 && Degree > 0) { if (this->getDim() != container.mDim) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "wrong number of dimensions! this {} container {}", this->getDim(), container.mDim); -#endif return; } if (this->getDegree() != container.mDegree) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "wrong number of degrees! this {} container {}", this->getDegree(), container.mDegree); -#endif return; } if (this->isInteractionOnly() != container.mInteractionOnly) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "InteractionOnly is set for this object to {}, but stored as {} in the container", this->isInteractionOnly(), container.mInteractionOnly); -#endif return; } setParams(container.mParams.data()); @@ -279,6 +269,6 @@ void MultivariatePolynomial::setFutureBufferAddres FlatObject::setFutureBufferAddress(futureFlatBufferPtr); } -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.cxx b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.cxx index 80cb691d80fad..7ccc82b3512d3 100644 --- a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.cxx +++ b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.cxx @@ -20,7 +20,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) void MultivariatePolynomialHelper<0, 0, false>::print() const @@ -156,14 +156,12 @@ Type MultivariatePolynomialHelper<0, 0, false>::combination_with_repetiton(const } val += term; } else { -#if !defined(GPUCA_ALIROOT_LIB) std::string term{}; for (size_t i = 1; i <= degree; ++i) { term += fmt::format("x[{}] * ", pos[i]); } term += fmt::format("par[{}]", indexPar++); val.emplace_back(term); -#endif } } diff --git a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h index 69c2052f83138..52c30b3241adc 100644 --- a/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h +++ b/GPU/TPCFastTransformation/MultivariatePolynomialHelper.h @@ -28,7 +28,7 @@ class TLinearFitter; -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { #if !defined(GPUCA_GPUCODE) @@ -325,6 +325,6 @@ GPUd() constexpr float MultivariatePolynomialHelper void NDPiecewisePolynomials::setFromContainer(const NDPiecewisePolynomialContainer& container) { if (Dim != container.mDim) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "wrong number of dimensions! this {} container {}", Dim, container.mDim); -#endif return; } if (Degree != container.mDegree) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "wrong number of degrees! this {} container {}", Degree, container.mDegree); -#endif return; } if (InteractionOnly != container.mInteractionOnly) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "InteractionOnly is set for this object to {}, but stored as {} in the container", InteractionOnly, container.mInteractionOnly); -#endif return; } init(container.mMin.data(), container.mMax.data(), container.mN.data()); @@ -479,6 +471,6 @@ GPUdi() void NDPiecewisePolynomials::clamp(float x } } -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc b/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc index d7bb9d702e96f..2538e30056448 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.inc @@ -20,10 +20,9 @@ #include "CommonUtils/TreeStreamRedirector.h" #include "NDPiecewisePolynomials.h" -namespace GPUCA_NAMESPACE::gpu +namespace o2::gpu { -#ifndef GPUCA_ALIROOT_LIB template void NDPiecewisePolynomials::dumpToTree(const uint32_t nSamplingPoints[/* Dim */], const char* outName, const char* treeName, const bool recreateFile) const { @@ -61,7 +60,6 @@ void NDPiecewisePolynomials::dumpToTree(const uint } pcstream.Close(); } -#endif // GPUCA_ALIROOT_LIB #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) @@ -74,9 +72,7 @@ void NDPiecewisePolynomials::loadFromFile(TFile& i setFromContainer(*gridTmp); delete gridTmp; } else { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "couldnt load object {} from input file", name); -#endif } } @@ -98,9 +94,7 @@ template void NDPiecewisePolynomials::performFits(const std::function& func, const uint32_t nAuxiliaryPoints[/* Dim */]) { const int32_t nTotalFits = getNPolynomials(); -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); -#endif MultivariatePolynomialHelper<0, 0, false> pol(Dim, Degree, InteractionOnly); TLinearFitter fitter = pol.getTLinearFitter(); @@ -127,9 +121,7 @@ void NDPiecewisePolynomials::performFits(const std for (;;) { const bool debug = !(++counter % printDebugForNFits); if (debug) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); -#endif } checkPos(nPolynomials, pos); @@ -149,9 +141,7 @@ template void NDPiecewisePolynomials::performFits(const std::vector& x, const std::vector& y) { const int32_t nTotalFits = getNPolynomials(); -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "Perform fitting of {}D-Polynomials of degree {} for a total of {} fits.", Dim, Degree, nTotalFits); -#endif // approximate number of points uint32_t nPoints = 2 * y.size() / nTotalFits; @@ -196,16 +186,12 @@ void NDPiecewisePolynomials::performFits(const std for (int32_t i = 0; i < nTotalFits; ++i) { const bool debug = !(++counter % printDebugForNFits); if (debug) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "Performing fit {} out of {}", counter, nTotalFits); -#endif } // store values for fitting if (dataPointsIndices[i].empty()) { -#ifndef GPUCA_ALIROOT_LIB LOGP(info, "No data points to fit"); -#endif continue; } @@ -269,7 +255,7 @@ void NDPiecewisePolynomials::fitInnerGrid(const st std::copy(params.begin(), params.end(), &mParams[index]); } -} // namespace GPUCA_NAMESPACE::gpu +} // namespace o2::gpu #endif // !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) diff --git a/GPU/TPCFastTransformation/Spline.cxx b/GPU/TPCFastTransformation/Spline.cxx index 01cb96bc28482..70d69c465dd9e 100644 --- a/GPU/TPCFastTransformation/Spline.cxx +++ b/GPU/TPCFastTransformation/Spline.cxx @@ -17,8 +17,8 @@ #include "Spline.h" #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation -templateClassImp(GPUCA_NAMESPACE::gpu::Spline); +templateClassImp(o2::gpu::Spline); #endif -template class GPUCA_NAMESPACE::gpu::Spline; -template class GPUCA_NAMESPACE::gpu::Spline; +template class o2::gpu::Spline; +template class o2::gpu::Spline; diff --git a/GPU/TPCFastTransformation/Spline.h b/GPU/TPCFastTransformation/Spline.h index 9b514c984785d..ee3625a3793c1 100644 --- a/GPU/TPCFastTransformation/Spline.h +++ b/GPU/TPCFastTransformation/Spline.h @@ -19,7 +19,7 @@ #include "SplineSpec.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -99,7 +99,7 @@ class Spline Spline(const Spline&) = delete; #endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// read a class object from the file static Spline* readFromFile(TFile& inpf, const char* name) { @@ -107,12 +107,10 @@ class Spline } #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline1D.cxx b/GPU/TPCFastTransformation/Spline1D.cxx index c1ef8a45346ef..1ac8df93ab263 100644 --- a/GPU/TPCFastTransformation/Spline1D.cxx +++ b/GPU/TPCFastTransformation/Spline1D.cxx @@ -17,8 +17,8 @@ #include "Spline1D.h" #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation -templateClassImp(GPUCA_NAMESPACE::gpu::Spline1D); +templateClassImp(o2::gpu::Spline1D); #endif -template class GPUCA_NAMESPACE::gpu::Spline1D; -template class GPUCA_NAMESPACE::gpu::Spline1D; \ No newline at end of file +template class o2::gpu::Spline1D; +template class o2::gpu::Spline1D; \ No newline at end of file diff --git a/GPU/TPCFastTransformation/Spline1D.h b/GPU/TPCFastTransformation/Spline1D.h index 62c6f82ad166a..ccadaeed23b79 100644 --- a/GPU/TPCFastTransformation/Spline1D.h +++ b/GPU/TPCFastTransformation/Spline1D.h @@ -19,7 +19,7 @@ #include "Spline1DSpec.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -165,7 +165,7 @@ class Spline1D Spline1D(const Spline1D&) = delete; #endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// read a class object from the file static Spline1D* readFromFile(TFile& inpf, const char* name) { @@ -173,12 +173,10 @@ class Spline1D } #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1D, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelper.cxx b/GPU/TPCFastTransformation/Spline1DHelper.cxx index bfa9614b2abb8..938604bb9172d 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.cxx +++ b/GPU/TPCFastTransformation/Spline1DHelper.cxx @@ -33,9 +33,9 @@ #include "GPUCommonMath.h" #include -templateClassImp(GPUCA_NAMESPACE::gpu::Spline1DHelper); +templateClassImp(o2::gpu::Spline1DHelper); -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template Spline1DHelper::Spline1DHelper() : mError(), mSpline() @@ -502,7 +502,6 @@ void Spline1DHelper::setSpline(const Spline1DContainer& spline) mSpline.setXrange(spline.getXmin(), spline.getXmax()); } -#ifndef GPUCA_ALIROOT_LIB template int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) { @@ -754,7 +753,6 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) } return 0; } -#endif -template class GPUCA_NAMESPACE::gpu::Spline1DHelper; -template class GPUCA_NAMESPACE::gpu::Spline1DHelper; +template class o2::gpu::Spline1DHelper; +template class o2::gpu::Spline1DHelper; diff --git a/GPU/TPCFastTransformation/Spline1DHelper.h b/GPU/TPCFastTransformation/Spline1DHelper.h index 31a100d28c319..61a3e648ccc01 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.h +++ b/GPU/TPCFastTransformation/Spline1DHelper.h @@ -24,7 +24,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -99,7 +99,7 @@ class Spline1DHelper /// Gives error string const char* getLastError() const { return mError.c_str(); } -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the Spline1D class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -119,12 +119,10 @@ class Spline1DHelper Spline1D mSpline; ///< copy of the spline grid -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1DHelper, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.cxx b/GPU/TPCFastTransformation/Spline1DHelperOld.cxx index 7b75a8409eada..2ace188aa57f1 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.cxx +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.cxx @@ -32,9 +32,9 @@ #include "GPUCommonMath.h" #include -templateClassImp(GPUCA_NAMESPACE::gpu::Spline1DHelperOld); +templateClassImp(o2::gpu::Spline1DHelperOld); -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template Spline1DHelperOld::Spline1DHelperOld() : mError(), mSpline(), mFdimensions(0) @@ -857,7 +857,6 @@ void Spline1DHelperOld::approximateDerivatives( } } -#ifndef GPUCA_ALIROOT_LIB template int32_t Spline1DHelperOld::test(const bool draw, const bool drawDataPoints) { @@ -1108,9 +1107,8 @@ int32_t Spline1DHelperOld::test(const bool draw, const bool drawDataPoint } return 0; } -#endif -template class GPUCA_NAMESPACE::gpu::Spline1DHelperOld; -template class GPUCA_NAMESPACE::gpu::Spline1DHelperOld; +template class o2::gpu::Spline1DHelperOld; +template class o2::gpu::Spline1DHelperOld; #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h index 013b4974c8c60..fc8d33ad64f87 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.h +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h @@ -26,7 +26,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -147,7 +147,7 @@ class Spline1DHelperOld /// Gives error string const char* getLastError() const { return mError.c_str(); } -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the Spline1D class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -168,12 +168,10 @@ class Spline1DHelperOld std::vector mLSMmatrixSderivatives; std::vector mLSMmatrixSvalues; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1DHelperOld, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline1DSpec.cxx b/GPU/TPCFastTransformation/Spline1DSpec.cxx index 7a5b76a71678e..603013d5e0808 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline1DSpec.cxx @@ -25,12 +25,12 @@ #include "Spline1DHelper.h" #include "TFile.h" #include "GPUCommonMath.h" -templateClassImp(GPUCA_NAMESPACE::gpu::Spline1DContainer); -templateClassImp(GPUCA_NAMESPACE::gpu::Spline1DSpec); +templateClassImp(o2::gpu::Spline1DContainer); +templateClassImp(o2::gpu::Spline1DSpec); #endif using namespace std; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; #if !defined(GPUCA_GPUCODE) @@ -173,7 +173,6 @@ void Spline1DContainer::approximateFunction( helper.approximateFunction(*reinterpret_cast*>(this), xMin, xMax, F, nAxiliaryDataPoints); } -#ifndef GPUCA_ALIROOT_LIB template int32_t Spline1DContainer::writeToFile(TFile& outf, const char* name) { @@ -189,7 +188,6 @@ Spline1DContainer* Spline1DContainer::readFromFile( return FlatObject::readFromFile>(inpf, name); } -#endif #endif #if !defined(GPUCA_GPUCODE) @@ -261,7 +259,7 @@ void Spline1DContainer::setFutureBufferAddress(char* futureFlatBufferPtr) FlatObject::setFutureBufferAddress(futureFlatBufferPtr); } -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) template int32_t Spline1DContainer::test(const bool draw, const bool drawDataPoints) { @@ -269,7 +267,7 @@ int32_t Spline1DContainer::test(const bool draw, const bool drawDataPoint } #endif // GPUCA_GPUCODE -template class GPUCA_NAMESPACE::gpu::Spline1DContainer; -template class GPUCA_NAMESPACE::gpu::Spline1DContainer; -template class GPUCA_NAMESPACE::gpu::Spline1DSpec; -template class GPUCA_NAMESPACE::gpu::Spline1DSpec; +template class o2::gpu::Spline1DContainer; +template class o2::gpu::Spline1DContainer; +template class o2::gpu::Spline1DSpec; +template class o2::gpu::Spline1DSpec; diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h index 65223d551e2ee..1ed1cc322ede3 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.h +++ b/GPU/TPCFastTransformation/Spline1DSpec.h @@ -27,7 +27,7 @@ class TFile; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -173,7 +173,7 @@ class Spline1DContainer : public FlatObject ///_______________ Test tools _______________ -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -219,9 +219,7 @@ class Spline1DContainer : public FlatObject int32_t* mUtoKnotMap = nullptr; //! (transient!!) pointer to (integer U -> knot index) map inside the mFlatBufferPtr array DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1DContainer, 1); -#endif }; template @@ -365,9 +363,7 @@ class Spline1DSpec : public Spline1DContainer using TBase::mParameters; using TBase::mYdim; using TBase::TBase; // inherit constructors and hide them -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1DSpec, 0); -#endif }; /// ================================================================================================== @@ -507,9 +503,7 @@ class Spline1DSpec /// _______ Expert tools: interpolation with given nYdim and external Parameters _______ using TBase::interpolateU; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline1DSpec, 0); -#endif }; /// ================================================================================================== @@ -534,6 +528,6 @@ class Spline1DSpec }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline2D.cxx b/GPU/TPCFastTransformation/Spline2D.cxx index 3055bcaccbbc1..70e9f85a6f6e3 100644 --- a/GPU/TPCFastTransformation/Spline2D.cxx +++ b/GPU/TPCFastTransformation/Spline2D.cxx @@ -17,8 +17,8 @@ #include "Spline2D.h" #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation -templateClassImp(GPUCA_NAMESPACE::gpu::Spline2D); +templateClassImp(o2::gpu::Spline2D); #endif -template class GPUCA_NAMESPACE::gpu::Spline2D; -template class GPUCA_NAMESPACE::gpu::Spline2D; +template class o2::gpu::Spline2D; +template class o2::gpu::Spline2D; diff --git a/GPU/TPCFastTransformation/Spline2D.h b/GPU/TPCFastTransformation/Spline2D.h index 64c1b487987fc..788816f73d6dd 100644 --- a/GPU/TPCFastTransformation/Spline2D.h +++ b/GPU/TPCFastTransformation/Spline2D.h @@ -29,7 +29,7 @@ class TFile; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -96,7 +96,7 @@ class Spline2D Spline2D(const Spline2D&) = delete; #endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// read a class object from the file static Spline2D* readFromFile(TFile& inpf, const char* name) { @@ -104,12 +104,10 @@ class Spline2D } #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline2D, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline2DHelper.cxx b/GPU/TPCFastTransformation/Spline2DHelper.cxx index 0801d3b134e88..03ecf4a3f1707 100644 --- a/GPU/TPCFastTransformation/Spline2DHelper.cxx +++ b/GPU/TPCFastTransformation/Spline2DHelper.cxx @@ -36,7 +36,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template Spline2DHelper::Spline2DHelper() : mError(), mFdimensions(0), mHelperU1(), mHelperU2() @@ -405,7 +405,6 @@ void Spline2DHelper::approximateDataPoints( } } -#ifndef GPUCA_ALIROOT_LIB template int32_t Spline2DHelper::test(const bool draw, const bool drawDataPoints) { @@ -668,7 +667,6 @@ int32_t Spline2DHelper::test(const bool draw, const bool drawDataPoints) return 0; } -#endif -template class GPUCA_NAMESPACE::gpu::Spline2DHelper; -template class GPUCA_NAMESPACE::gpu::Spline2DHelper; +template class o2::gpu::Spline2DHelper; +template class o2::gpu::Spline2DHelper; diff --git a/GPU/TPCFastTransformation/Spline2DHelper.h b/GPU/TPCFastTransformation/Spline2DHelper.h index 19630adacd581..7195bab925f85 100644 --- a/GPU/TPCFastTransformation/Spline2DHelper.h +++ b/GPU/TPCFastTransformation/Spline2DHelper.h @@ -28,7 +28,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -110,7 +110,7 @@ class Spline2DHelper /// Gives error string const char* getLastError() const { return mError.c_str(); } -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the Spline2D class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -130,9 +130,7 @@ class Spline2DHelper Spline1D fGridU; Spline1D fGridV; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline2DHelper, 0); -#endif }; template @@ -168,6 +166,6 @@ int32_t Spline2DHelper::setSpline( } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/Spline2DSpec.cxx b/GPU/TPCFastTransformation/Spline2DSpec.cxx index aabf63f3252fe..4571110bdedaa 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline2DSpec.cxx @@ -34,13 +34,13 @@ #include "TFile.h" #include "GPUCommonMath.h" -templateClassImp(GPUCA_NAMESPACE::gpu::Spline2DContainer); -templateClassImp(GPUCA_NAMESPACE::gpu::Spline2DSpec); +templateClassImp(o2::gpu::Spline2DContainer); +templateClassImp(o2::gpu::Spline2DSpec); #endif using namespace std; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template void Spline2DContainer::destroy() @@ -215,7 +215,6 @@ void Spline2DContainer::approximateFunctionViaDataPoints( helper.approximateFunctionViaDataPoints(*reinterpret_cast*>(this), x1Min, x1Max, x2Min, x2Max, F, nAuxiliaryDataPointsX1, nAuxiliaryDataPointsX2); } -#ifndef GPUCA_ALIROOT_LIB template int32_t Spline2DContainer::writeToFile(TFile& outf, const char* name) { @@ -236,9 +235,8 @@ int32_t Spline2DContainer::test(const bool draw, const bool drawDataPoint { return Spline2DHelper::test(draw, drawDataPoints); } -#endif #endif // GPUCA_GPUCODE && !GPUCA_STANDALONE -template class GPUCA_NAMESPACE::gpu::Spline2DContainer; -template class GPUCA_NAMESPACE::gpu::Spline2DContainer; +template class o2::gpu::Spline2DContainer; +template class o2::gpu::Spline2DContainer; diff --git a/GPU/TPCFastTransformation/Spline2DSpec.h b/GPU/TPCFastTransformation/Spline2DSpec.h index b626df3fa7e51..e7a9db8e6ae9c 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.h +++ b/GPU/TPCFastTransformation/Spline2DSpec.h @@ -29,7 +29,7 @@ class TFile; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -159,7 +159,7 @@ class Spline2DContainer : public FlatObject ///_______________ Test tools _______________ -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -196,9 +196,7 @@ class Spline2DContainer : public FlatObject Spline1D mGridX2; ///< grid for V axis DataT* mParameters = nullptr; //! (transient!!) F-dependent parameters of the spline -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(Spline2DContainer, 1); -#endif }; /// ================================================================================================== @@ -537,6 +535,6 @@ class Spline2DSpec using TBase::interpolate; }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/SplineHelper.cxx b/GPU/TPCFastTransformation/SplineHelper.cxx index b79ba08f8fd4c..b0d1f4348ca60 100644 --- a/GPU/TPCFastTransformation/SplineHelper.cxx +++ b/GPU/TPCFastTransformation/SplineHelper.cxx @@ -33,7 +33,7 @@ #include "GPUCommonMath.h" #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template SplineHelper::SplineHelper() : mError(), mXdimensions(0), mFdimensions(0), mNumberOfDataPoints(0), mHelpers() @@ -578,7 +578,7 @@ int32_t SplineHelper::test(const bool draw, const bool drawDataPoints) return 0; } -template class GPUCA_NAMESPACE::gpu::SplineHelper; -template class GPUCA_NAMESPACE::gpu::SplineHelper; +template class o2::gpu::SplineHelper; +template class o2::gpu::SplineHelper; #endif diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h index ab558f82eaa17..986297e368aab 100644 --- a/GPU/TPCFastTransformation/SplineHelper.h +++ b/GPU/TPCFastTransformation/SplineHelper.h @@ -28,7 +28,7 @@ #include "Spline1DHelperOld.h" #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -160,6 +160,6 @@ int32_t SplineHelper::setSpline( } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/SplineSpec.cxx b/GPU/TPCFastTransformation/SplineSpec.cxx index 521b418e3f099..aa1af8ad30ba5 100644 --- a/GPU/TPCFastTransformation/SplineSpec.cxx +++ b/GPU/TPCFastTransformation/SplineSpec.cxx @@ -34,13 +34,13 @@ #include "TFile.h" #include "GPUCommonMath.h" -templateClassImp(GPUCA_NAMESPACE::gpu::SplineContainer); -templateClassImp(GPUCA_NAMESPACE::gpu::SplineSpec); +templateClassImp(o2::gpu::SplineContainer); +templateClassImp(o2::gpu::SplineSpec); #endif using namespace std; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; template void SplineContainer::destroy() @@ -206,7 +206,6 @@ void SplineContainer:: helper.approximateFunction(*reinterpret_cast*>(this), xMin, xMax, F, nAuxiliaryDataPoints); } -#ifndef GPUCA_ALIROOT_LIB template int32_t SplineContainer::writeToFile(TFile& outf, const char* name) { @@ -227,9 +226,8 @@ int32_t SplineContainer::test(const bool draw, const bool drawDataPoints) { return SplineHelper::test(draw, drawDataPoints); } -#endif #endif // GPUCA_GPUCODE && !GPUCA_STANDALONE -template class GPUCA_NAMESPACE::gpu::SplineContainer; -template class GPUCA_NAMESPACE::gpu::SplineContainer; +template class o2::gpu::SplineContainer; +template class o2::gpu::SplineContainer; diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index f9d3ec0613f64..dae17b22f42ea 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -29,7 +29,7 @@ class TFile; -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -150,7 +150,7 @@ class SplineContainer : public FlatObject ///_______________ Test tools _______________ -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) // code invisible on GPU and in the standalone compilation +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation /// Test the class functionality static int32_t test(const bool draw = 0, const bool drawDataPoints = 1); #endif @@ -189,9 +189,7 @@ class SplineContainer : public FlatObject Spline1D* mGrid; //! (transient!!) mXdim grids DataT* mParameters; //! (transient!!) F-dependent parameters of the spline -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SplineContainer, 1); -#endif }; template @@ -550,6 +548,6 @@ class SplineSpec }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/SplineUtil.h b/GPU/TPCFastTransformation/SplineUtil.h index b2226eb8e383a..23c786f33dc6f 100644 --- a/GPU/TPCFastTransformation/SplineUtil.h +++ b/GPU/TPCFastTransformation/SplineUtil.h @@ -17,7 +17,7 @@ #ifndef ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_SPLINEUTIL_H #define ALICEO2_GPUCOMMON_TPCFASTTRANSFORMATION_SPLINEUTIL_H -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -39,7 +39,6 @@ class SplineUtil // 2 - at least one of the dimensions must be set during runtime // 3 - specialization where nYdim==1 (a small add-on on top of the other specs) - // calculate it as one return statement to make the AliRoot compiler happy return (nYdim == 1) ? 3 : ((nXdim > 0 && nYdim > 0) ? 1 : 2); /* if (nYdim == 1) { @@ -111,6 +110,6 @@ class SplineUtil::Switch }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/SymMatrixSolver.cxx b/GPU/TPCFastTransformation/SymMatrixSolver.cxx index 1991dd76e7ad0..ea574eeec01b6 100644 --- a/GPU/TPCFastTransformation/SymMatrixSolver.cxx +++ b/GPU/TPCFastTransformation/SymMatrixSolver.cxx @@ -23,11 +23,9 @@ #include using namespace std; -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#ifndef GPUCA_ALIROOT_LIB -ClassImp(GPUCA_NAMESPACE::gpu::SymMatrixSolver); -#endif +ClassImp(o2::gpu::SymMatrixSolver); void SymMatrixSolver::solve() { diff --git a/GPU/TPCFastTransformation/SymMatrixSolver.h b/GPU/TPCFastTransformation/SymMatrixSolver.h index c84c1699c07f3..4ccb75bfa6888 100644 --- a/GPU/TPCFastTransformation/SymMatrixSolver.h +++ b/GPU/TPCFastTransformation/SymMatrixSolver.h @@ -23,7 +23,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -75,12 +75,10 @@ class SymMatrixSolver int32_t mShift = 0; std::vector mA; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SymMatrixSolver, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 -#endif \ No newline at end of file +#endif diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.cxx b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.cxx index ec32eca959d4f..5a7dffd2a753b 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.cxx +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.cxx @@ -23,11 +23,9 @@ #include "Spline2DHelper.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#ifndef GPUCA_ALIROOT_LIB ClassImp(TPCFastSpaceChargeCorrection); -#endif TPCFastSpaceChargeCorrection::TPCFastSpaceChargeCorrection() : FlatObject(), diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h index 70ca6c99ed853..c353f3f3329e7 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h @@ -23,7 +23,7 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -45,9 +45,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject struct RowInfo { int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) size_t dataOffsetBytes[3]{0}; ///< offset for the spline data withing a TPC slice -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(RowInfo, 1); -#endif }; struct RowActiveArea { @@ -56,9 +54,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject float cuMin{0.f}; float cuMax{0.f}; float cvMax{0.f}; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(RowActiveArea, 1); -#endif }; struct SliceRowInfo { @@ -68,16 +64,12 @@ class TPCFastSpaceChargeCorrection : public FlatObject float scaleCorrUtoGrid{0.f}; ///< scale corrected U to U-grid coordinate float scaleCorrVtoGrid{0.f}; ///< scale corrected V to V-grid coordinate RowActiveArea activeArea; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SliceRowInfo, 1); -#endif }; struct SliceInfo { float vMax{0.f}; ///< Max value of V coordinate -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SliceInfo, 1); -#endif }; typedef Spline2D SplineType; @@ -277,9 +269,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject float fInterpolationSafetyMargin{0.1f}; // 10% area around the TPC row. Outside of this area the interpolation returns the boundary values. -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(TPCFastSpaceChargeCorrection, 3); -#endif }; /// ==================================================== @@ -504,6 +494,6 @@ GPUdi() float TPCFastSpaceChargeCorrection::getMaxDriftLength(int32_t slice) con } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.cxx b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.cxx index 92cbc2ba49a11..12d0c37f5f12f 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.cxx +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.cxx @@ -17,8 +17,6 @@ #include "TPCFastSpaceChargeCorrectionMap.h" #include "GPUCommonLogger.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; -#ifndef GPUCA_ALIROOT_LIB ClassImp(TPCFastSpaceChargeCorrectionMap); -#endif \ No newline at end of file diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.h index 998c810300098..97b824aa6da32 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrectionMap.h @@ -21,7 +21,7 @@ #include "GPUCommonRtypes.h" #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -97,12 +97,10 @@ class TPCFastSpaceChargeCorrectionMap int32_t mNrows{0}; std::vector> fDataPoints; //! (transient!!) points with space charge correction -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(TPCFastSpaceChargeCorrectionMap, 0); -#endif }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastTransform.cxx b/GPU/TPCFastTransformation/TPCFastTransform.cxx index eee8527962a6b..aea6589761403 100644 --- a/GPU/TPCFastTransformation/TPCFastTransform.cxx +++ b/GPU/TPCFastTransformation/TPCFastTransform.cxx @@ -30,11 +30,11 @@ #include "GPUCommonLogger.h" #endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #include "TPCSpaceCharge/SpaceCharge.h" #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCFastTransform::TPCFastTransform() : FlatObject(), mTimeStamp(0), mCorrection(), mApplyCorrection(1), mT0(0.f), mVdrift(0.f), mVdriftCorrY(0.f), mLdriftCorr(0.f), mTOFcorr(0.f), mPrimVtxZ(0.f), mLumi(0.f), mLumiError(0.f), mLumiScaleFactor(1.0f) @@ -165,7 +165,7 @@ void TPCFastTransform::print() const #endif } -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) int32_t TPCFastTransform::writeToFile(std::string outFName, std::string name) { @@ -231,7 +231,7 @@ TPCFastTransform* TPCFastTransform::loadFromFile(std::string inpFName, std::stri #endif -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) TPCSlowSpaceChargeCorrection::~TPCSlowSpaceChargeCorrection() { delete mCorr; diff --git a/GPU/TPCFastTransformation/TPCFastTransform.h b/GPU/TPCFastTransformation/TPCFastTransform.h index 936a19d3f30fa..4e0403422ee06 100644 --- a/GPU/TPCFastTransformation/TPCFastTransform.h +++ b/GPU/TPCFastTransformation/TPCFastTransform.h @@ -33,7 +33,7 @@ template class SpaceCharge; } -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -41,7 +41,7 @@ namespace gpu /// simple struct to hold the space charge object which can be used for CPU reconstruction only struct TPCSlowSpaceChargeCorrection { -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) /// destructor ~TPCSlowSpaceChargeCorrection(); @@ -61,9 +61,7 @@ struct TPCSlowSpaceChargeCorrection { } #endif -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(TPCSlowSpaceChargeCorrection, 2); -#endif }; /// @@ -268,7 +266,7 @@ class TPCFastTransform : public FlatObject /// maximal possible drift time of the active area GPUd() float getMaxDriftTime(int32_t slice) const; -#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && !defined(GPUCA_ALIROOT_LIB) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) int32_t writeToFile(std::string outFName = "", std::string name = ""); @@ -341,9 +339,7 @@ class TPCFastTransform : public FlatObject GPUd() void TransformInternal(int32_t slice, int32_t row, float& u, float& v, float& x, const TPCFastTransform* ref, const TPCFastTransform* ref2, float scale, float scale2, int32_t scaleMode) const; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(TPCFastTransform, 3); -#endif }; // ======================================================================= @@ -887,6 +883,6 @@ GPUdi() void TPCFastTransform::InverseTransformXYZtoNominalXYZ(int32_t slice, in } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastTransformGeo.cxx b/GPU/TPCFastTransformation/TPCFastTransformGeo.cxx index b4cfc7e7eb5b6..3c624b3222d77 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformGeo.cxx +++ b/GPU/TPCFastTransformation/TPCFastTransformGeo.cxx @@ -23,7 +23,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCFastTransformGeo::TPCFastTransformGeo() { diff --git a/GPU/TPCFastTransformation/TPCFastTransformGeo.h b/GPU/TPCFastTransformation/TPCFastTransformGeo.h index a24dcbf1e80c2..5eddada1e9acc 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformGeo.h +++ b/GPU/TPCFastTransformation/TPCFastTransformGeo.h @@ -23,7 +23,7 @@ #include "GPUCommonRtypes.h" #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -38,9 +38,7 @@ class TPCFastTransformGeo struct SliceInfo { float sinAlpha; float cosAlpha; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SliceInfo, 1); -#endif }; /// The struct contains necessary info about TPC padrow @@ -54,9 +52,7 @@ class TPCFastTransformGeo /// get width in U GPUd() float getUwidth() const { return -2.f * u0; } -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(RowInfo, 1); -#endif }; /// _____________ Constructors / destructors __________________________ @@ -209,9 +205,7 @@ class TPCFastTransformGeo SliceInfo mSliceInfos[NumberOfSlices + 1]; ///< array of slice information [fixed size] RowInfo mRowInfos[MaxNumberOfRows + 1]; ///< array of row information [fixed size] -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(TPCFastTransformGeo, 1); -#endif }; // ======================================================================= @@ -338,6 +332,6 @@ GPUdi() float TPCFastTransformGeo::convUtoPad(int32_t row, float u) const } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastTransformManager.cxx b/GPU/TPCFastTransformation/TPCFastTransformManager.cxx index f16a84c96d565..7d0aa29545578 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformManager.cxx +++ b/GPU/TPCFastTransformation/TPCFastTransformManager.cxx @@ -23,7 +23,7 @@ #include "TPCFastTransform.h" #include "Spline2DHelper.h" -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; TPCFastTransformManager::TPCFastTransformManager() : mError(), mOrigTransform(nullptr), fLastTimeBin(0) {} diff --git a/GPU/TPCFastTransformation/TPCFastTransformManager.h b/GPU/TPCFastTransformation/TPCFastTransformManager.h index 14a85f1030bd8..f981b05bec241 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformManager.h +++ b/GPU/TPCFastTransformation/TPCFastTransformManager.h @@ -24,7 +24,7 @@ #include "TString.h" #include "AliTPCTransform.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -81,6 +81,6 @@ inline int32_t TPCFastTransformManager::storeError(int32_t code, const char* msg return code; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastTransformQA.cxx b/GPU/TPCFastTransformation/TPCFastTransformQA.cxx index cbe9e10060c36..8616311038952 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformQA.cxx +++ b/GPU/TPCFastTransformation/TPCFastTransformQA.cxx @@ -31,7 +31,7 @@ #include #include -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; using namespace std; TPCFastTransformQA::TPCFastTransformQA() {} diff --git a/GPU/TPCFastTransformation/TPCFastTransformQA.h b/GPU/TPCFastTransformation/TPCFastTransformQA.h index 6030ceedc5c94..727a8a87dbbc0 100644 --- a/GPU/TPCFastTransformation/TPCFastTransformQA.h +++ b/GPU/TPCFastTransformation/TPCFastTransformQA.h @@ -28,7 +28,7 @@ #include "TString.h" #include "AliTPCTransform.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -75,6 +75,6 @@ inline int32_t TPCFastTransformQA::storeError(int32_t code, const char* msg) return code; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h b/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h deleted file mode 100644 index acdd2d701bb86..0000000000000 --- a/GPU/TPCFastTransformation/TPCFastTransformationLinkDef_AliRoot.h +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file TPCFastTransformationLinkDef_AliRoot.h -/// \author Sergey Gorbunov - -#if defined(__CLING__) - -#pragma link off all globals; -#pragma link off all classes; -#pragma link off all functions; - -#endif diff --git a/GPU/TPCFastTransformation/alirootMacro/createTPCFastTransform.C b/GPU/TPCFastTransformation/alirootMacro/createTPCFastTransform.C deleted file mode 100644 index 3af3be7760ecf..0000000000000 --- a/GPU/TPCFastTransformation/alirootMacro/createTPCFastTransform.C +++ /dev/null @@ -1,69 +0,0 @@ -// $Id$ -/** - * @file createTPCFastTransform.C - * @brief A macro to create TPCFastTransform object - * - *
- * Usage:
- *
- * aliroot
- * .x initTPCcalibration.C("alien://Folder=/alice/data/2015/OCDB",246984,1)
- *  gSystem->Load("libAliTPCFastTransformation")
- * .L createTPCFastTransform.C++
- * GPUCA_NAMESPACE::gpu::TPCFastTransform fastTransform;
- * createTPCFastTransform(fastTransform);
- *
- * 
- * - * @author sergey gorbunov - * - */ - -#include "AliTPCcalibDB.h" -#include "Riostream.h" -#include "TStopwatch.h" - -#define GPUCA_ALIROOT_LIB - -#include "TPCFastTransform.h" -#include "TPCFastTransformManager.h" -#include "TPCFastTransformQA.h" - -using namespace std; -using namespace GPUCA_NAMESPACE::gpu; - -int32_t createTPCFastTransform(TPCFastTransform& fastTransform) -{ - - AliTPCcalibDB* tpcCalib = AliTPCcalibDB::Instance(); - if (!tpcCalib) { - cerr << "AliTPCcalibDB does not exist" << endl; - return -1; - } - AliTPCTransform* origTransform = tpcCalib->GetTransform(); - uint32_t timeStamp = origTransform->GetCurrentTimeStamp(); - - TPCFastTransformManager manager; - - TStopwatch timer; - timer.Start(); - - int32_t err = manager.create(fastTransform, origTransform, timeStamp); - - timer.Stop(); - - cout << "\n\n Initialisation: " << timer.CpuTime() << " / " << timer.RealTime() << " sec.\n\n" - << endl; - - if (err != 0) { - cerr << "Cannot create fast transformation object from AliTPCcalibDB, TPCFastTransformManager returns " << err << endl; - return -1; - } - - // qa - - // GPUCA_NAMESPACE::gpu::TPCFastTransformQA qa; - // qa.doQA( timeStamp ); - - return 0; -} diff --git a/GPU/TPCFastTransformation/alirootMacro/generateTPCDistortionNTupleAliRoot.C b/GPU/TPCFastTransformation/alirootMacro/generateTPCDistortionNTupleAliRoot.C deleted file mode 100644 index e63e045373b11..0000000000000 --- a/GPU/TPCFastTransformation/alirootMacro/generateTPCDistortionNTupleAliRoot.C +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file generateTPCDistortionNTupleAliRoot.C -/// \brief A developer macro for generating TPC distortion ntuple to test the TPCFastTransformation class -/// Works only with AliRoot, not with O2 -/// -/// \author Sergey Gorbunov -/// - -/* - Run the macro: - uncomment the first #define - - aliroot - .x initTPCcalibration.C("alien://Folder=/alice/data/2015/OCDB",246984,1) - gSystem->Load("libAliTPCFastTransformation") - .x generateTPCDistortionNTupleAliRoot.C+ -*/ - -// A developer code. -// It is hidden inside #ifdef in order to avoid an automatic compilation during the O2 build -// Uncomment the #define for compiling the code - -//#define FASTTRANSFORM_DEVELOPING - -#include "AliTPCcalibDB.h" -#include "AliTPCRecoParam.h" -#include "Riostream.h" -#include "TStopwatch.h" -#include "TFile.h" -#include "TNtuple.h" - -#define GPUCA_ALIROOT_LIB - -#include "TPCFastTransform.h" -#include "TPCFastTransformManager.h" -#include "TPCFastTransformQA.h" -#include "AliHLTTPCGeometry.h" - -using namespace std; -using namespace GPUCA_NAMESPACE::gpu; - -int32_t generateTPCDistortionNTupleAliRoot() -{ - AliTPCcalibDB* tpcCalib = AliTPCcalibDB::Instance(); - if (!tpcCalib) { - cerr << "AliTPCcalibDB does not exist" << endl; - return -1; - } - AliTPCTransform* origTransform = tpcCalib->GetTransform(); - - AliTPCRecoParam* recoParam = origTransform->GetCurrentRecoParamNonConst(); - if (!recoParam) { - std::cout << "TPCFastTransformManager::Init: No TPC Reco Param set in transformation" << std::endl; - return -1; - } - - uint32_t timeStamp = origTransform->GetCurrentTimeStamp(); - - TPCFastTransformManager manager; - TPCFastTransform fastTransform; - - int32_t err = manager.create(fastTransform, origTransform, timeStamp); - - if (err != 0) { - cerr << "Cannot create fast transformation object from AliTPCcalibDB, TPCFastTransformManager returns " << err << endl; - return -1; - } - - const TPCFastTransformGeo& geo = fastTransform.getGeometry(); - - recoParam->SetUseTOFCorrection(kFALSE); - - cout << " generate NTuple " << endl; - - TFile* f = new TFile("tpcDistortionNTuple.root", "RECREATE"); - TNtuple* nt = new TNtuple("dist", "dist", "slice:row:su:sv:dx:du:dv"); - - int32_t nSlices = 1; // fastTransform.getNumberOfSlices(); - // for( int32_t slice=0; slice (x,y,z) without time-of-flight correction - float ox = 0, oy = 0, oz = 0; - { - int32_t sector = 0, secrow = 0; - AliHLTTPCGeometry::Slice2Sector(slice, row, sector, secrow); - int32_t is[] = {sector}; - double xx[] = {static_cast(secrow), pad, time}; - origTransform->Transform(xx, is, 0, 1); - ox = xx[0]; - oy = xx[1]; - oz = xx[2]; - } - - // convert to u,v - float ou = 0, ov = 0; - geo.convLocalToUV(slice, oy, oz, ou, ov); - - // distortions in x,u,v: - float dx = ox - x; - float du = ou - u; - float dv = ov - v; - - cout << slice << " " << row << " " << su << " " << sv << " " << dx << " " << du << " " << dv << endl; - nt->Fill(slice, row, su, sv, dx, du, dv); - } - } - } - } - nt->Write(); - f->Write(); - recoParam->SetUseTOFCorrection(kTRUE); - - return 0; -} diff --git a/GPU/TPCFastTransformation/alirootMacro/initTPCcalibration.C b/GPU/TPCFastTransformation/alirootMacro/initTPCcalibration.C deleted file mode 100644 index 9ae938f67907c..0000000000000 --- a/GPU/TPCFastTransformation/alirootMacro/initTPCcalibration.C +++ /dev/null @@ -1,198 +0,0 @@ -/** - * @file initTPCcalibration.C - * @brief A macro to initialize AliTPCTransform cluster transformation in AliTPCcalib - * - *
- * Usage:
- *
- * aliroot $ALICE_ROOT/GPU/TPCFastTransformation/macro/initTPCcalibration.C'("uri", runNumber, isMC)'
- * uri == "alien://Folder=/alice/data/2015/OCDB"
- * uri == "local://$HOME/alice/OCDB"
- * uri == "OCDBsim.root"
- *
- * 
- * - * Parameters:
- * - uri the OCDB URI. When ==nullptr, AliCDBManager::Instance().IsDefaultStorageSet() should be 1 - * - runNumber run number - * - isMC initialize for Monte Carlo - * - * @author sergey gorbunov - * - */ - -/* - aliroot - .L initTPCcalibration.C - initTPCcalibration("alien://Folder=/alice/data/2015/OCDB",246984,1) - initTPCcalibration("$ALICE_ROOT/../aliceEventsPbPb/OCDBsim.root",246984,1) - */ - -#include "AliTPCcalibDB.h" -#include "Riostream.h" -#include "TGeoGlobalMagField.h" -#include "AliGRPObject.h" -#include "AliGRPManager.h" -#include "AliGeomManager.h" -#include "AliTracker.h" -#include "AliCDBRunRange.h" -#include "AliCDBManager.h" -#include "AliCDBStorage.h" -#include "AliTPCRecoParam.h" -#include "AliCDBEntry.h" -#include "TMap.h" -#include "AliRawEventHeaderBase.h" -#include "AliEventInfo.h" -#include "AliRunInfo.h" -#include "AliTPCTransform.h" - -using namespace std; - -int32_t initTPCcalibration(const Char_t* cdbUri, int32_t runNumber, bool isMC) -{ - - // -------------------------------------- - // -- Setup CDB - // -------------------------------------- - - // cdbUri = "local://$ALICE_ROOT/OCDB"; - // cdbUri = "alien://Folder=/alice/data/2015/OCDB"; - // cdbUri = "OCDBsim.root"; - // cdbUri = "$ALICE_ROOT/../aliceEventsPbPb/OCDBsim.root"; - // cdbUri="/home/gorbunov/alice/aliceEventsPbPb/OCDB.root"; - - AliCDBManager* cdbm = AliCDBManager::Instance(); - if (!cdbm) { - cerr << "Error : Can not get AliCDBManager" << endl; - return -1; - } - - if (cdbUri != 0) { - TString storage = cdbUri; - cout << storage.Data() << endl; - if (storage.Contains(".root")) { - // local file - cout << "Snapshot mode" << endl; - cdbm->SetSnapshotMode(cdbUri); - cdbm->SetDefaultStorage("local://$ALICE_ROOT/OCDB"); - } else { - if (!storage.Contains("://")) { // add prefix to local path - storage = "local://"; - storage += cdbUri; - } - cdbm->SetDefaultStorage(storage); - } - } - - if (!cdbm->IsDefaultStorageSet()) { - cerr << "OCDB storage is not set!!" << endl; - return -1; - } - - cdbm->SetRun(runNumber); - - AliGRPManager grp; - grp.ReadGRPEntry(); - grp.SetMagField(); - - const AliGRPObject* grpObj = grp.GetGRPData(); - - if (!grpObj) { - cerr << "No GRP object found!!" << endl; - return -1; - } - - if (!AliGeomManager::GetGeometry()) { - AliGeomManager::LoadGeometry(); - } - if (!AliGeomManager::GetGeometry()) { - cerr << "Can not initialise geometry" << endl; - return -1; - } - - AliTPCcalibDB* tpcCalib = AliTPCcalibDB::Instance(); - if (!tpcCalib) { - cerr << "AliTPCcalibDB does not exist" << endl; - return -1; - } - - const AliMagF* field = (AliMagF*)TGeoGlobalMagField::Instance()->GetField(); - - if (!field) { - cerr << "no magnetic field found " << endl; - return -1; - } - - tpcCalib->SetExBField(field); - tpcCalib->SetRun(runNumber); - tpcCalib->UpdateRunInformations(runNumber); - - if (!tpcCalib->GetTransform()) { - cerr << "No TPC transformation found" << endl; - return -1; - } - - // -- Get AliRunInfo variables - - AliRunInfo runInfo(grpObj->GetLHCState(), grpObj->GetBeamType(), grpObj->GetBeamEnergy(), grpObj->GetRunType(), grpObj->GetDetectorMask()); - AliEventInfo evInfo; - evInfo.SetEventType(AliRawEventHeaderBase::kPhysicsEvent); - - AliCDBEntry* entry = AliCDBManager::Instance()->Get("TPC/Calib/RecoParam"); - - if (!entry) { - cerr << "No TPC reco param entry found in data base" << endl; - return -1; - } - - TObject* aliRecoParamObj = entry->GetObject(); - if (!aliRecoParamObj) { - cerr << " Empty TPC reco param entry in data base" << endl; - return -1; - } - - AliRecoParam aliRecoParam; - - if (dynamic_cast(aliRecoParamObj)) { - // cout<<"\n\nSet reco param from AliHLTTPCClusterTransformation: TObjArray found \n"<(aliRecoParamObj)->Clone()); - aliRecoParam.AddDetRecoParamArray(1, copy); - } else if (dynamic_cast(aliRecoParamObj)) { - // cout<<"\n\nSet reco param from AliHLTTPCClusterTransformation: AliDetectorRecoParam found \n"<(aliRecoParamObj)->Clone(); - aliRecoParam.AddDetRecoParam(1, copy); - } else { - cerr << "Unknown format of the TPC Reco Param entry in the data base" << endl; - return -1; - } - - aliRecoParam.SetEventSpecie(&runInfo, evInfo, 0); - - // - - AliTPCRecoParam* recParam = (AliTPCRecoParam*)aliRecoParam.GetDetRecoParam(1); - - if (!recParam) { - cerr << "No TPC Reco Param entry found for the given event specification" << endl; - return -1; - } - - recParam = new AliTPCRecoParam(*recParam); - - uint32_t timeStamp = grpObj->GetTimeStart(); - - if (isMC && !recParam->GetUseCorrectionMap()) { - timeStamp = 0; - } - - tpcCalib->GetTransform()->SetCurrentRecoParam(recParam); - - AliTPCTransform* origTransform = tpcCalib->GetTransform(); - origTransform->SetCurrentTimeStamp(static_cast(timeStamp)); - - Double_t bz = AliTracker::GetBz(); - cout << "\n\nBz field is set to " << bz << ", time stamp is set to " << timeStamp << endl - << endl; - - return 0; -} diff --git a/GPU/TPCFastTransformation/alirootMacro/moveTPCFastTransform.C b/GPU/TPCFastTransformation/alirootMacro/moveTPCFastTransform.C deleted file mode 100644 index 8d34d973210d7..0000000000000 --- a/GPU/TPCFastTransformation/alirootMacro/moveTPCFastTransform.C +++ /dev/null @@ -1,69 +0,0 @@ -// $Id$ -/** - * @file moveTPCFastTransform.C - * @brief Example of creation of TPCFastTransform object and moving it to another place - * - *
- * Usage:
- *
- * aliroot
- *  gSystem->Load("libAliTPCFastTransformation")
- * .L initTPCcalibration.C++
- * .L createTPCFastTransform.C++
- * .x moveTPCFastTransform.C
- *
- * 
- * - * @author sergey gorbunov - * - */ - -#include "TPCFastTransform.h" - -using namespace std; -using namespace GPUCA_NAMESPACE::gpu; - -int32_t moveTPCFastTransform() -{ - - // gSystem->Load("libAliTPCFastTransformation"); - // gROOT->LoadMacro("initTPCcalibration.C++"); - // gROOT->LoadMacro("createTPCFastTransform.C++"); - - initTPCcalibration("alien://Folder=/alice/data/2015/OCDB", 246984, 1); - - TPCFastTransform fastTransform; - createTPCFastTransform(fastTransform); - - // make flat buffer external - - std::unique_ptr buff(fastTransform.releaseInternalBuffer()); - - // example of moving the transformation object to another place - - { - char* newBuff = new char[fastTransform.getFlatBufferSize()]; - char* newObj = new char[sizeof(TPCFastTransform)]; - - memcpy((void*)newObj, (void*)&fastTransform, sizeof(fastTransform)); - memcpy((void*)newBuff, (void*)buff.get(), fastTransform.getFlatBufferSize()); - - TPCFastTransform& newTransform = *(TPCFastTransform*)newObj; - newTransform.setActualBufferAddress(newBuff); - } - - // another example of moving the transformation object to another place - { - char* newBuff = new char[fastTransform.getFlatBufferSize()]; - char* newObj = new char[sizeof(TPCFastTransform)]; - - fastTransform.setFutureBufferAddress(newBuff); - - memcpy((void*)newObj, (void*)&fastTransform, sizeof(fastTransform)); - memcpy((void*)newBuff, (void*)buff.get(), fastTransform.getFlatBufferSize()); - - TPCFastTransform& newTransform = *(TPCFastTransform*)newObj; - } - - return 0; -} diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.cxx index 3a125628d898c..456447720b2c1 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.cxx @@ -24,7 +24,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; IrregularSpline1D::IrregularSpline1D() : FlatObject(), mNumberOfKnots(0), mNumberOfAxisBins(0), mBin2KnotMapOffset(0) { diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h index 62229c2afe087..39c090dd9bd94 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline1D.h @@ -27,7 +27,7 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -266,9 +266,7 @@ class IrregularSpline1D : public FlatObject int32_t mNumberOfAxisBins; ///< number of axis bins uint32_t mBin2KnotMapOffset; ///< pointer to (axis bin) -> (knot) map in mFlatBufferPtr array -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(IrregularSpline1D, 1); -#endif }; /// ==================================================== @@ -401,6 +399,6 @@ GPUdi() void IrregularSpline1D::correctEdges(T* data) const data[i] = c0 * data[i - 0] + c1 * data[i - 1] + c2 * data[i - 2] + c3 * data[i - 3]; } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline1DTest.C b/GPU/TPCFastTransformation/devtools/IrregularSpline1DTest.C index a5e2dfc2261eb..086a6ce636698 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline1DTest.C +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline1DTest.C @@ -42,7 +42,7 @@ typedef double myfloat; int32_t IrregularSpline1DTest() { - using namespace GPUCA_NAMESPACE::gpu; + using namespace o2::gpu; std::cout << "Test roundf(): " << std::endl; for (float x = 0.; x <= 1.; x += 0.1) { diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.cxx index fca4b2da84c2b..1e5a7f7cd80f7 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.cxx @@ -21,7 +21,7 @@ #include #endif -using namespace GPUCA_NAMESPACE::gpu; +using namespace o2::gpu; IrregularSpline2D3D::IrregularSpline2D3D() : FlatObject(), mGridU(), mGridV() { diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h index 026338b3380fa..d60326ddf76ab 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3D.h @@ -26,7 +26,7 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -194,9 +194,7 @@ class IrregularSpline2D3D : public FlatObject IrregularSpline1D mGridU; ///< grid for U axis IrregularSpline1D mGridV; ///< grid for V axis -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(IrregularSpline2D3D, 1); -#endif }; /// ==================================================== @@ -374,6 +372,6 @@ GPUdi() void IrregularSpline2D3D::getSplineVec(const float* correctedData, float #endif } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx index ffce9376fe60b..27500d12d9d5d 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx @@ -22,7 +22,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -443,4 +443,4 @@ double IrregularSpline2D3DCalibrator::getIntegralDeviationArea(const IrregularSp } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h index 4b3ba8f787a79..8ba8a2c22fa5e 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.h @@ -24,7 +24,7 @@ #include #include -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -142,6 +142,6 @@ class IrregularSpline2D3DCalibrator }; } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h index b97903f7d5aac..2398ff4cd1cbc 100644 --- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h @@ -20,7 +20,7 @@ #include "GPUCommonDef.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -174,6 +174,6 @@ inline void RegularSpline1D::correctEdges(T* data) const } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx index 2401a16dc6f16..076e4ee0ed780 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx @@ -17,7 +17,7 @@ #include "SemiregularSpline2D3D.h" -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -164,4 +164,4 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h index 4e14b69583d74..4da954c8096ac 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h @@ -28,7 +28,7 @@ #include #endif -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -183,9 +183,7 @@ class SemiregularSpline2D3D : public FlatObject int32_t mNumberOfKnots; int32_t mDataIndexMapOffset; -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(SemiregularSpline2D3D, 1); -#endif }; /// ==================================================== @@ -472,6 +470,6 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa #endif } } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index c90ddb929e689..e4612e6a9490f 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -33,29 +33,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) - - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) -endif() - -if(ALIGPU_BUILD_TYPE STREQUAL "ALIROOT") - add_definitions(-DGPUCA_ALIROOT_LIB) - - set(SRCS ${SRCS} ../GPUTracking/utils/EmptyFile.cxx) - - # Add a library to the project using the specified source files - add_library_tested(Ali${MODULE} SHARED ${SRCS}) - # Additional compilation flags - set_target_properties(Ali${MODULE} PROPERTIES COMPILE_FLAGS "") - - # System dependent: Modify the way the library is build - if(${CMAKE_SYSTEM} MATCHES Darwin) - set_target_properties(Ali${MODULE} - PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") - endif(${CMAKE_SYSTEM} MATCHES Darwin) - - # Installation - install(TARGETS Ali${MODULE} ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) - - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include) + install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h index 99fcdab8a6cae..eba81a2ba06a2 100644 --- a/GPU/Utils/FlatObject.h +++ b/GPU/Utils/FlatObject.h @@ -30,7 +30,7 @@ //#define GPUCA_GPUCODE // uncomment to test "GPU" mode -namespace GPUCA_NAMESPACE +namespace o2 { namespace gpu { @@ -324,9 +324,7 @@ class FlatObject char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer -#ifndef GPUCA_ALIROOT_LIB ClassDefNV(FlatObject, 1); -#endif }; /// ======================================================================================================== @@ -574,6 +572,6 @@ inline void FlatObject::setFutureBufferAddress(char* futureFlatBufferPtr) #endif //GPUCA_GPUCODE_DEVICE } // namespace gpu -} // namespace GPUCA_NAMESPACE +} // namespace o2 #endif diff --git a/cmake/O2RootMacroExclusionList.cmake b/cmake/O2RootMacroExclusionList.cmake index 6d286df72b12f..4b87da5b4e42e 100644 --- a/cmake/O2RootMacroExclusionList.cmake +++ b/cmake/O2RootMacroExclusionList.cmake @@ -45,11 +45,7 @@ list(APPEND O2_ROOT_MACRO_EXCLUSION_LIST GPU/GPUTracking/Standalone/tools/dump.C # Needs AliRoot ALiHLTSystem GPU/GPUTracking/Standalone/tools/dumpTRDClusterMatrices.C # Needs AliRoot AliCDBManager, AliGeomManager and AliTRDgeometry GPU/GPUTracking/TRDTracking/macros/checkDbgOutput.C # Needs AliRoot TStatToolkit - GPU/TPCFastTransformation/alirootMacro/createTPCFastTransform.C # Needs AliTPCCalibDB - GPU/TPCFastTransformation/alirootMacro/generateTPCDistortionNTupleAliRoot.C # Needs AliTPCCalibDB - GPU/TPCFastTransformation/alirootMacro/initTPCcalibration.C # Needs AliTPCCalibDB GPU/TPCFastTransformation/devtools/loadlibs.C # Special macro - GPU/TPCFastTransformation/alirootMacro/moveTPCFastTransform.C # Relies on initTPCcalibration.C GPU/GPUTracking/TRDTracking/macros/run_trd_tracker.C # Not yet ready Detectors/TOF/prototyping/ConvertRun2CalibrationToO2.C Generators/share/external/hijing.C From ff52fe20b1144ed633494d1c40ad1840687f4f6e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 20 Jan 2025 12:35:03 +0100 Subject: [PATCH 0041/1914] O2: Remove obsolete functionality to build standalone benchmark without O2 sources --- GPU/Common/CMakeLists.txt | 3 +- GPU/Common/GPUCommonDef.h | 5 +- GPU/Common/GPUDebugStreamer.h | 2 +- GPU/GPUTracking/Base/GPUConstantMem.h | 17 -- GPU/GPUTracking/Base/GPUParam.cxx | 4 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 14 -- .../Base/GPUReconstructionConvert.cxx | 22 --- .../Base/GPUReconstructionIncludesITS.h | 2 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 12 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 10 -- GPU/GPUTracking/Base/hip/CMakeLists.txt | 18 +-- GPU/GPUTracking/CMakeLists.txt | 123 +++++++-------- .../DataCompression/GPUTPCClusterStatistics.h | 5 - .../DataCompression/GPUTPCCompression.h | 13 -- .../DataCompression/GPUTPCDecompression.h | 13 -- .../GPUTPCDecompressionKernels.h | 9 -- .../DataTypes/CalibdEdxTrackTopologyPol.h | 6 +- .../DataTypes/CalibdEdxTrackTopologySpline.h | 4 - GPU/GPUTracking/DataTypes/GPUO2DataTypes.h | 5 - GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h | 148 ------------------ GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 2 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx | 2 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 2 - GPU/GPUTracking/DataTypes/GPUTRDTrack.inc | 3 - GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h | 7 +- GPU/GPUTracking/DataTypes/GPUdEdxInfo.h | 7 - GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx | 2 - .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../GPUTrackingLinkDef_O2_DataTypes.h | 2 - GPU/GPUTracking/Global/GPUChainTracking.cxx | 27 +--- .../Global/GPUChainTrackingClusterizer.cxx | 4 - .../Global/GPUChainTrackingCompression.cxx | 10 +- .../GPUChainTrackingDebugAndProfiling.cxx | 8 - GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 12 -- .../Global/GPUChainTrackingRefit.cxx | 2 - .../Global/GPUChainTrackingSliceTracker.cxx | 2 - .../Global/GPUChainTrackingTRD.cxx | 6 - .../Global/GPUChainTrackingTransformation.cxx | 10 -- GPU/GPUTracking/Interface/GPUO2Interface.h | 3 - .../GPUO2InterfaceConfigurableParam.h | 3 - .../Interface/GPUO2InterfaceConfiguration.h | 3 - .../Interface/GPUO2InterfaceDisplay.h | 3 - GPU/GPUTracking/Interface/GPUO2InterfaceQA.h | 3 - .../Interface/GPUO2InterfaceRefit.h | 3 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 - GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 6 - GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 6 - .../GPUTPCTrackletConstructor.cxx | 4 - .../Standalone/Benchmark/standalone.cxx | 15 -- GPU/GPUTracking/Standalone/CMakeLists.txt | 66 ++++---- GPU/GPUTracking/Standalone/cmake/config.cmake | 1 - .../GPUTPCCFChargeMapFiller.h | 2 - .../GPUTPCCFCheckPadBaseline.h | 2 - .../TPCClusterFinder/GPUTPCCFClusterizer.h | 2 - .../TPCClusterFinder/GPUTPCCFDecodeZS.h | 4 - .../TPCClusterFinder/GPUTPCCFDeconvolution.h | 2 - .../TPCClusterFinder/GPUTPCCFGather.h | 2 - .../GPUTPCCFMCLabelFlattener.h | 2 - .../GPUTPCCFNoiseSuppression.h | 2 - .../TPCClusterFinder/GPUTPCCFPeakFinder.h | 2 - .../GPUTPCCFStreamCompaction.h | 2 - GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h | 73 --------- .../TRDTracking/GPUTRDInterfaces.h | 8 - GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 4 - .../TRDTracking/GPUTRDTrackerKernels.cxx | 2 - GPU/GPUTracking/dEdx/GPUdEdx.h | 15 -- .../display/render/GPUDisplayDraw.cxx | 13 +- .../display/render/GPUDisplayImportEvent.cxx | 10 -- GPU/GPUTracking/kernels.cmake | 9 -- GPU/GPUTracking/qa/GPUQA.cxx | 2 - GPU/TPCFastTransformation/CMakeLists.txt | 2 - GPU/Utils/CMakeLists.txt | 3 +- 72 files changed, 115 insertions(+), 713 deletions(-) delete mode 100644 GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 21253f7f9b815..8466035d74ef7 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -36,8 +36,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC $ $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB - GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) # cuda test, only compile if CUDA if(CUDA_ENABLED) diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index 62d665d36c285..ae8c48b500b69 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -49,11 +49,8 @@ #elif defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM #endif -#if !defined(GPUCA_HAVE_O2HEADERS) && (defined(GPUCA_O2_LIB) || !defined(GPUCA_STANDALONE)) - #define GPUCA_HAVE_O2HEADERS -#endif -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) #define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ #else #define GPUCA_DEBUG_STREAMER_CHECK(...) diff --git a/GPU/Common/GPUDebugStreamer.h b/GPU/Common/GPUDebugStreamer.h index 10a94cfd9bfae..9d048a60bf96b 100644 --- a/GPU/Common/GPUDebugStreamer.h +++ b/GPU/Common/GPUDebugStreamer.h @@ -15,7 +15,7 @@ #define GPUDEBUGSTREAMER_H #include "GPUCommonDef.h" -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) +#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) #include "CommonUtils/DebugStreamer.h" #endif diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 2a9a15e10f491..3396219634587 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -23,17 +23,12 @@ #include "GPUTPCGMMerger.h" #include "GPUTRDTracker.h" -// Dummies for stuff not suppored in legacy code, or for what requires O2 headers while not available -#if defined(GPUCA_HAVE_O2HEADERS) #include "GPUTPCConvert.h" #include "GPUTPCCompression.h" #include "GPUTPCDecompression.h" #include "GPUITSFitter.h" #include "GPUTPCClusterFinder.h" #include "GPUTrackingRefit.h" -#else -#include "GPUO2FakeClasses.h" -#endif #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT #include "GPUKernelDebugOutput.h" @@ -52,9 +47,7 @@ struct GPUConstantMem { GPUTPCDecompression tpcDecompressor; GPUTPCGMMerger tpcMerger; GPUTRDTrackerGPU trdTrackerGPU; -#ifdef GPUCA_HAVE_O2HEADERS GPUTRDTracker trdTrackerO2; -#endif GPUTPCClusterFinder tpcClusterer[GPUCA_NSLICES]; GPUITSFitter itsFitter; GPUTrackingRefitProcessor trackingRefit; @@ -65,19 +58,10 @@ struct GPUConstantMem { GPUKernelDebugOutput debugOutput; #endif -#if defined(GPUCA_HAVE_O2HEADERS) template GPUd() auto& getTRDTracker(); -#else // GPUCA_HAVE_O2HEADERS - template - GPUdi() GPUTRDTrackerGPU& getTRDTracker() - { - return trdTrackerGPU; - } -#endif // !GPUCA_HAVE_O2HEADERS }; -#if defined(GPUCA_HAVE_O2HEADERS) template <> GPUdi() auto& GPUConstantMem::getTRDTracker<0>() { @@ -88,7 +72,6 @@ GPUdi() auto& GPUConstantMem::getTRDTracker<1>() { return trdTrackerO2; } -#endif union GPUConstantMemCopyable { #if !defined(__OPENCL__) || defined(__OPENCL_HOST__) diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 6726c022e5ddb..d5c1149b0ab29 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -20,14 +20,12 @@ #include "GPUTPCGMPolynomialFieldManager.h" #include "GPUDataTypes.h" #include "GPUConstantMem.h" +#include "DetectorsBase/Propagator.h" using namespace o2::gpu; #include #include -#ifdef GPUCA_HAVE_O2HEADERS -#include "DetectorsBase/Propagator.h" -#endif #include "utils/qconfigrtc.h" diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 1fa8af0adffe8..5df69c416e858 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -94,9 +94,7 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos mMemoryScalers.reset(new GPUMemorySizeScalers); for (uint32_t i = 0; i < NSLICES; i++) { processors()->tpcTrackers[i].SetSlice(i); // TODO: Move to a better place -#ifdef GPUCA_HAVE_O2HEADERS processors()->tpcClusterer[i].mISlice = i; -#endif } #ifndef GPUCA_NO_ROOT mROOTDump = GPUROOTDumpCore::getAndCreate(); @@ -213,14 +211,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } GPUConfigDump::dumpConfig(¶m().rec, &mProcessingSettings, chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, &mDeviceBackendSettings, &mRecoSteps); } -#ifndef GPUCA_HAVE_O2HEADERS - mRecoSteps.steps.setBits(RecoStep::ITSTracking, false); - mRecoSteps.steps.setBits(RecoStep::TRDTracking, false); - mRecoSteps.steps.setBits(RecoStep::TPCConversion, false); - mRecoSteps.steps.setBits(RecoStep::TPCCompression, false); - mRecoSteps.steps.setBits(RecoStep::TPCdEdx, false); - mProcessingSettings.createO2Output = false; -#endif mRecoSteps.stepsGPUMask &= mRecoSteps.steps; mRecoSteps.stepsGPUMask &= AvailableGPURecoSteps(); if (!IsGPU()) { @@ -258,7 +248,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() #ifndef GPUCA_NO_FAST_MATH GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH, otherwise results will never be deterministic!"); #endif -#ifdef GPUCA_HAVE_O2HEADERS mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; if (param().rec.tpc.looperInterpolationInExtraPass == -1) { @@ -267,7 +256,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.createO2Output > 1) { mProcessingSettings.createO2Output = 1; } -#endif } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; @@ -347,7 +335,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mNStreams = std::max(mProcessingSettings.nStreams, 3); } -#ifdef GPUCA_HAVE_O2HEADERS if (mProcessingSettings.nTPCClustererLanes == -1) { mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSLICES, mProcessingSettings.ompKernels ? (mProcessingSettings.ompThreads >= 4 ? std::min(mProcessingSettings.ompThreads / 2, mProcessingSettings.ompThreads >= 32 ? GPUCA_NSLICES : 4) : 1) : mProcessingSettings.ompThreads)); } @@ -358,7 +345,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); mProcessingSettings.nTPCClustererLanes = GPUCA_NSLICES; } -#endif if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index a21bdcf28dd35..ca1c46766b9da 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -30,7 +30,6 @@ #include #include -#ifdef GPUCA_HAVE_O2HEADERS #include "clusterFinderDefs.h" #include "DataFormatsTPC/ZeroSuppression.h" #include "DataFormatsTPC/ZeroSuppressionLinkBased.h" @@ -40,7 +39,6 @@ #include "TPCBase/RDHUtils.h" #include "TPCBase/CRU.h" #include "DetectorsRaw/RDHUtils.h" -#endif using namespace o2::gpu; using namespace o2::tpc; @@ -49,7 +47,6 @@ using namespace std::string_literals; void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr* clusters, uint32_t* nClusters, const TPCFastTransform* transform, int32_t continuousMaxTimeBin) { -#ifdef GPUCA_HAVE_O2HEADERS memset(nClusters, 0, NSLICES * sizeof(nClusters[0])); uint32_t offset = 0; for (uint32_t i = 0; i < NSLICES; i++) { @@ -83,12 +80,10 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative offset += native->nClusters[i][j]; } } -#endif } void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters) { -#ifdef GPUCA_HAVE_O2HEADERS memset((void*)&native, 0, sizeof(native)); for (uint32_t i = 0; i < NSLICES; i++) { for (uint32_t j = 0; j < nRawClusters[i]; j++) { @@ -115,12 +110,10 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce c.qTot = org.GetCharge(); } } -#endif } int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& native) { -#ifdef GPUCA_HAVE_O2HEADERS float retVal = 0; for (uint32_t i = 0; i < NSLICES; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { @@ -132,14 +125,10 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& nativ } } return ceil(retVal); -#else - return 0; -#endif } int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& digits) { -#ifdef GPUCA_HAVE_O2HEADERS float retVal = 0; for (uint32_t i = 0; i < NSLICES; i++) { for (uint32_t k = 0; k < digits.nTPCDigits[i]; k++) { @@ -149,14 +138,10 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& di } } return ceil(retVal); -#else - return 0; -#endif } int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspages) { -#ifdef GPUCA_HAVE_O2HEADERS float retVal = 0; for (uint32_t i = 0; i < NSLICES; i++) { int32_t firstHBF = zspages.slice[i].count[0] ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)zspages.slice[i].zsPtr[0][0]) : 0; @@ -182,9 +167,6 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspage } } return ceil(retVal); -#else - return 0; -#endif } // ------------------------------------------------- TPC ZS ------------------------------------------------- @@ -1413,12 +1395,10 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr(const GPUTrackingInOutDigits&, std::unique_ptr*, uint32_t*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int32_t, bool, float, bool, std::function&)> digitsFilter); #ifdef GPUCA_O2_LIB template void GPUReconstructionConvert::RunZSEncoder(const DigitArray&, std::unique_ptr*, uint32_t*, o2::raw::RawFileWriter*, const o2::InteractionRecord*, const GPUParam&, int32_t, bool, float, bool, std::function&)> digitsFilter); #endif -#endif void GPUReconstructionConvert::RunZSEncoderCreateMeta(const uint64_t* buffer, const uint32_t* sizes, void** ptrs, GPUTrackingInOutZS* out) { @@ -1436,7 +1416,6 @@ void GPUReconstructionConvert::RunZSEncoderCreateMeta(const uint64_t* buffer, co void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* buffers, const o2::tpc::Digit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit, float threshold) { -#ifdef GPUCA_HAVE_O2HEADERS for (uint32_t i = 0; i < NSLICES; i++) { if (buffers[i].get() != ptrs[i] || nsb != ns) { throw std::runtime_error("Not owning digits"); @@ -1459,7 +1438,6 @@ void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* bu } nsb[i] = j; } -#endif } #ifdef GPUCA_O2_LIB diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesITS.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesITS.h index faf9e0afdf18b..5891891d9da24 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludesITS.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesITS.h @@ -15,7 +15,7 @@ #ifndef GPURECONSTRUCTIONINCLDUESITS_H #define GPURECONSTRUCTIONINCLDUESITS_H -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_STANDALONE) +#if !defined(GPUCA_STANDALONE) #include "ITStracking/TrackerTraits.h" #include "ITStracking/VertexerTraits.h" #include "ITStracking/TimeFrame.h" diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 7c382e9b9374d..bd6b3b6e51928 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -191,10 +191,8 @@ if(NOT GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") set_target_properties(${targetName} PROPERTIES LINKER_LANGUAGE CXX) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR GPUCA_CONFIG_O2_EXTENSIONS) - add_library(GPUTrackingCUDAExternalProvider OBJECT GPUReconstructionCUDAExternalProvider.cu) - add_library(O2::GPUTrackingCUDAExternalProvider ALIAS GPUTrackingCUDAExternalProvider) - set_property(TARGET GPUTrackingCUDAExternalProvider PROPERTY CUDA_SEPARABLE_COMPILATION ON) - target_compile_definitions(GPUTrackingCUDAExternalProvider PRIVATE $) - target_include_directories(GPUTrackingCUDAExternalProvider PRIVATE $) -endif() +add_library(GPUTrackingCUDAExternalProvider OBJECT GPUReconstructionCUDAExternalProvider.cu) +add_library(O2::GPUTrackingCUDAExternalProvider ALIAS GPUTrackingCUDAExternalProvider) +set_property(TARGET GPUTrackingCUDAExternalProvider PROPERTY CUDA_SEPARABLE_COMPILATION ON) +target_compile_definitions(GPUTrackingCUDAExternalProvider PRIVATE $) +target_include_directories(GPUTrackingCUDAExternalProvider PRIVATE $) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 7114d37380afc..62ad57ae3497a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -20,9 +20,7 @@ #include "GPUParamRTC.h" #include "GPUDefMacros.h" #include -#ifdef GPUCA_HAVE_O2HEADERS #include "Framework/SHA1.h" -#endif #include #include #include @@ -55,7 +53,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); -#ifdef GPUCA_HAVE_O2HEADERS char shasource[21], shaparam[21], shacmd[21], shakernels[21]; if (mProcessingSettings.rtc.cacheOutput) { o2::framework::internal::SHA1(shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len); @@ -63,7 +60,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) o2::framework::internal::SHA1(shacmd, baseCommand.c_str(), baseCommand.size()); o2::framework::internal::SHA1(shakernels, kernelsall.c_str(), kernelsall.size()); } -#endif nCompile = mProcessingSettings.rtc.compilePerKernel ? kernels.size() : 1; bool cacheLoaded = false; @@ -72,9 +68,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (mProcessingSettings.RTCcacheFolder != ".") { std::filesystem::create_directories(mProcessingSettings.RTCcacheFolder); } -#ifndef GPUCA_HAVE_O2HEADERS - throw std::runtime_error("Cannot use RTC cache without O2 headers"); -#else if (mProcessingSettings.rtc.cacheMutex) { mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; fd = open((mProcessingSettings.RTCcacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); @@ -153,7 +146,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) }; fclose(fp); } -#endif } if (!cacheLoaded) { if (mProcessingSettings.debugLevel >= 0) { @@ -203,7 +195,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (mProcessingSettings.debugLevel >= 0) { GPUInfo("RTC Compilation finished (%f seconds)", rtcTimer.GetCurrentElapsedTime()); } -#ifdef GPUCA_HAVE_O2HEADERS if (mProcessingSettings.rtc.cacheOutput) { FILE* fp = fopen((mProcessingSettings.RTCcacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); if (fp == nullptr) { @@ -241,7 +232,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } fclose(fp); } -#endif } if (mProcessingSettings.rtc.cacheOutput && mProcessingSettings.rtc.cacheMutex) { if (lockf(fd, F_ULOCK, 0)) { diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 729ac3cf483e9..727019fa13755 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -249,14 +249,12 @@ if(NOT GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") target_link_options(${targetName} PRIVATE $<$:-fno-gpu-rdc>) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR GPUCA_CONFIG_O2_EXTENSIONS) - add_library(GPUTrackingHIPExternalProvider OBJECT ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPExternalProvider.hip) - add_library(O2::GPUTrackingHIPExternalProvider ALIAS GPUTrackingHIPExternalProvider) - target_compile_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) - target_link_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) - target_compile_definitions(GPUTrackingHIPExternalProvider PRIVATE $) - target_include_directories(GPUTrackingHIPExternalProvider PRIVATE $) - if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") - add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) - endif() +add_library(GPUTrackingHIPExternalProvider OBJECT ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPExternalProvider.hip) +add_library(O2::GPUTrackingHIPExternalProvider ALIAS GPUTrackingHIPExternalProvider) +target_compile_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) +target_link_options(GPUTrackingHIPExternalProvider PRIVATE $<$:-fgpu-rdc>) +target_compile_definitions(GPUTrackingHIPExternalProvider PRIVATE $) +target_include_directories(GPUTrackingHIPExternalProvider PRIVATE $) +if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") + add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index aac689a2e01c8..5dd92d41db29b 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -115,7 +115,6 @@ set(HDRS_INSTALL DataTypes/GPUdEdxInfo.h DataTypes/GPUHostDataTypes.h DataTypes/GPUO2DataTypes.h - DataTypes/GPUO2FakeClasses.h DataTypes/GPUOutputControl.h DataTypes/GPUSettings.h DataTypes/GPUTPCGeometry.h @@ -159,67 +158,62 @@ set(HDRS_INSTALL DataTypes/GPUTPCGMPolynomialField.h ) -# Sources for O2 and for Standalone if requested in config file -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone") - set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx) -endif() -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR GPUCA_CONFIG_O2_EXTENSIONS) - set(SRCS_NO_CINT - ${SRCS_NO_CINT} - Global/GPUChainITS.cxx - ITS/GPUITSFitter.cxx - ITS/GPUITSFitterKernels.cxx - dEdx/GPUdEdx.cxx - TPCConvert/GPUTPCConvert.cxx - TPCConvert/GPUTPCConvertKernel.cxx - DataCompression/GPUTPCCompression.cxx - DataCompression/GPUTPCCompressionTrackModel.cxx - DataCompression/GPUTPCCompressionKernels.cxx - DataCompression/GPUTPCDecompression.cxx - DataCompression/GPUTPCDecompressionKernels.cxx - DataCompression/TPCClusterDecompressor.cxx - DataCompression/GPUTPCClusterStatistics.cxx - TPCClusterFinder/GPUTPCClusterFinder.cxx - TPCClusterFinder/ClusterAccumulator.cxx - TPCClusterFinder/MCLabelAccumulator.cxx - TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx - TPCClusterFinder/GPUTPCCFStreamCompaction.cxx - TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx - TPCClusterFinder/GPUTPCCFPeakFinder.cxx - TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx - TPCClusterFinder/GPUTPCCFClusterizer.cxx - TPCClusterFinder/GPUTPCCFDeconvolution.cxx - TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx - TPCClusterFinder/GPUTPCCFDecodeZS.cxx - TPCClusterFinder/GPUTPCCFGather.cxx - Refit/GPUTrackingRefit.cxx - Refit/GPUTrackingRefitKernel.cxx - Merger/GPUTPCGMO2Output.cxx) - - set(SRCS_DATATYPES - ${SRCS_DATATYPES} - DataTypes/TPCPadGainCalib.cxx - DataTypes/TPCPadBitMap.cxx - DataTypes/TPCZSLinkMapping.cxx - DataTypes/CalibdEdxContainer.cxx - DataTypes/CalibdEdxTrackTopologyPol.cxx - DataTypes/CalibdEdxTrackTopologySpline.cxx - DataTypes/GPUTRDTrackO2.cxx) - - set(SRCS_NO_H ${SRCS_NO_H} - TPCClusterFinder/GPUTPCClusterFinderDump.cxx) - - set(HDRS_INSTALL ${HDRS_INSTALL} - ITS/GPUITSTrack.h - TPCClusterFinder/Array2D.h - TPCClusterFinder/CfConsts.h - TPCClusterFinder/CfFragment.h - TPCClusterFinder/CfUtils.h - TPCClusterFinder/ChargePos.h - Definitions/clusterFinderDefs.h - TPCClusterFinder/PackedCharge.h - TPCClusterFinder/GPUTPCCFChainContext.h) -endif() +set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx) +set(SRCS_NO_CINT + ${SRCS_NO_CINT} + Global/GPUChainITS.cxx + ITS/GPUITSFitter.cxx + ITS/GPUITSFitterKernels.cxx + dEdx/GPUdEdx.cxx + TPCConvert/GPUTPCConvert.cxx + TPCConvert/GPUTPCConvertKernel.cxx + DataCompression/GPUTPCCompression.cxx + DataCompression/GPUTPCCompressionTrackModel.cxx + DataCompression/GPUTPCCompressionKernels.cxx + DataCompression/GPUTPCDecompression.cxx + DataCompression/GPUTPCDecompressionKernels.cxx + DataCompression/TPCClusterDecompressor.cxx + DataCompression/GPUTPCClusterStatistics.cxx + TPCClusterFinder/GPUTPCClusterFinder.cxx + TPCClusterFinder/ClusterAccumulator.cxx + TPCClusterFinder/MCLabelAccumulator.cxx + TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx + TPCClusterFinder/GPUTPCCFStreamCompaction.cxx + TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx + TPCClusterFinder/GPUTPCCFPeakFinder.cxx + TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx + TPCClusterFinder/GPUTPCCFClusterizer.cxx + TPCClusterFinder/GPUTPCCFDeconvolution.cxx + TPCClusterFinder/GPUTPCCFMCLabelFlattener.cxx + TPCClusterFinder/GPUTPCCFDecodeZS.cxx + TPCClusterFinder/GPUTPCCFGather.cxx + Refit/GPUTrackingRefit.cxx + Refit/GPUTrackingRefitKernel.cxx + Merger/GPUTPCGMO2Output.cxx) + +set(SRCS_DATATYPES + ${SRCS_DATATYPES} + DataTypes/TPCPadGainCalib.cxx + DataTypes/TPCPadBitMap.cxx + DataTypes/TPCZSLinkMapping.cxx + DataTypes/CalibdEdxContainer.cxx + DataTypes/CalibdEdxTrackTopologyPol.cxx + DataTypes/CalibdEdxTrackTopologySpline.cxx + DataTypes/GPUTRDTrackO2.cxx) + +set(SRCS_NO_H ${SRCS_NO_H} + TPCClusterFinder/GPUTPCClusterFinderDump.cxx) + +set(HDRS_INSTALL ${HDRS_INSTALL} + ITS/GPUITSTrack.h + TPCClusterFinder/Array2D.h + TPCClusterFinder/CfConsts.h + TPCClusterFinder/CfFragment.h + TPCClusterFinder/CfUtils.h + TPCClusterFinder/ChargePos.h + Definitions/clusterFinderDefs.h + TPCClusterFinder/PackedCharge.h + TPCClusterFinder/GPUTPCCFChainContext.h) if(ALIGPU_BUILD_TYPE STREQUAL "O2") set(SRCS_DATATYPES @@ -278,7 +272,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::TPCFastTransformation PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC SOURCES ${SRCS_DATATYPES}) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) o2_target_root_dictionary(GPUDataTypes HEADERS ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} LINKDEF GPUTrackingLinkDef_O2_DataTypes.h) @@ -323,8 +317,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${targetName} PRIVATE $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB - GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h index 4728f97ef704a..7c873fa67f522 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h @@ -29,10 +29,6 @@ namespace o2::gpu class GPUTPCClusterStatistics { public: -#ifndef GPUCA_HAVE_O2HEADERS - void RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param){}; - void Finish(){}; -#else static constexpr uint32_t NSLICES = GPUCA_NSLICES; void RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param); void Finish(); @@ -87,7 +83,6 @@ class GPUTPCClusterStatistics double mEntropy = 0; double mHuffman = 0; size_t mNTotalClusters = 0; -#endif }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index 3244a0ab2640a..9a5d6436f06af 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -19,20 +19,7 @@ #include "GPUProcessor.h" #include "GPUCommonMath.h" #include "GPUParam.h" - -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/CompressedClusters.h" -#else -namespace o2::tpc -{ -struct CompressedClustersPtrs { -}; -struct CompressedClusters { -}; -struct CompressedClustersFlat { -}; -} // namespace o2::tpc -#endif namespace o2::gpu { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h index c15564426f464..d39eba6a08e2d 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h @@ -20,20 +20,7 @@ #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUO2DataTypes.h" - -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/CompressedClusters.h" -#else -namespace o2::tpc -{ -struct CompressedClustersPtrs { -}; -struct CompressedClusters { -}; -struct CompressedClustersFlat { -}; -} // namespace o2::tpc -#endif namespace o2::gpu { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h index 52ec4c0eef403..0bd69653fdbd4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h @@ -19,16 +19,7 @@ #include "GPUO2DataTypes.h" #include "GPUParam.h" #include "GPUConstantMem.h" - -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/CompressedClusters.h" -#else -namespace o2::tpc -{ -struct CompressedClusters { -}; -} // namespace o2::tpc -#endif namespace o2::gpu { diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h index 20d53ff80a9c8..a50cf63698a78 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.h @@ -19,9 +19,7 @@ #include "NDPiecewisePolynomials.h" #include "GPUCommonDef.h" #include "FlatObject.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/Defs.h" -#endif #ifndef GPUCA_ALIGPUCODE #include #endif @@ -60,7 +58,6 @@ class CalibdEdxTrackTopologyPol : public o2::gpu::FlatObject ~CalibdEdxTrackTopologyPol() = default; #endif -#ifdef GPUCA_HAVE_O2HEADERS /// \return returns the track topology correction /// \param region region of the TPC /// \param charge correction for maximum or total charge @@ -82,7 +79,6 @@ class CalibdEdxTrackTopologyPol : public o2::gpu::FlatObject const float corr = (chargeT == ChargeType::Tot) ? getCorrectionqTot(region, tanTheta, sinPhi, z, threshold, charge) : getCorrectionqMax(region, tanTheta, sinPhi, z, relPad, relTime); return corr; } -#endif /// \return returns the track topology correction for qTot /// \param region region of the TPC @@ -128,7 +124,7 @@ class CalibdEdxTrackTopologyPol : public o2::gpu::FlatObject /// \param region region of the scaling factor GPUd() float getScalingFactorqMax(const int32_t region) const { return mScalingFactorsqMax[region]; }; -#if !defined(GPUCA_GPUCODE) && defined(GPUCA_HAVE_O2HEADERS) +#if !defined(GPUCA_GPUCODE) /// \return returns polynomial for qTot /// \param region region of the TPC const auto& getPolyqTot(const int32_t region) const { return mCalibPolsqTot[region]; } diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h index 9d7cc1d3b8dfc..106bbe93c27f5 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologySpline.h @@ -20,9 +20,7 @@ #include "FlatObject.h" #include "Spline.h" #include "GPUCommonRtypes.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/Defs.h" -#endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) // code invisible on GPU and in the standalone compilation #include @@ -185,7 +183,6 @@ class CalibdEdxTrackTopologySpline : public o2::gpu::FlatObject return mScalingFactorsqTot[region] * mCalibSplinesqTot[region].interpolate(x); }; -#ifdef GPUCA_HAVE_O2HEADERS /// \return returns the track topology correction /// \param region region of the TPC /// \param charge correction for maximum or total charge @@ -199,7 +196,6 @@ class CalibdEdxTrackTopologySpline : public o2::gpu::FlatObject /// \param charge correction for maximum or total charge /// \param x coordinates where the correction is evaluated GPUd() float getCorrection(const int32_t region, const ChargeType charge, const float x[/*inpXdim*/]) const { return (charge == ChargeType::Tot) ? mCalibSplinesqTot[region].interpolate(x) : mCalibSplinesqMax[region].interpolate(x); } -#endif /// \param region index of the spline (region) /// \return returns the spline for qMax diff --git a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h index 810e4dd58ca0e..91a72fb0b6031 100644 --- a/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUO2DataTypes.h @@ -17,16 +17,11 @@ // Pull in several O2 headers with basic data types, or load a header with empty fake classes if O2 headers not available -#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/Digit.h" #include "DetectorsBase/MatLayerCylSet.h" #include "DetectorsBase/Propagator.h" #include "TRDBase/GeometryFlat.h" -#else -#include "GPUO2FakeClasses.h" -#endif - #include "GPUdEdxInfo.h" #endif diff --git a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h b/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h deleted file mode 100644 index 40222021126fd..0000000000000 --- a/GPU/GPUTracking/DataTypes/GPUO2FakeClasses.h +++ /dev/null @@ -1,148 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUO2FakeClasses.h -/// \author David Rohr - -#ifndef O2_GPU_GPUO2FAKECLASSES_H -#define O2_GPU_GPUO2FAKECLASSES_H - -#include "GPUCommonDef.h" -#include "GPUDataTypes.h" - -// These are some dummies of O2 classes needed by AliGPU, to be used when O2 header unavailable - -namespace o2 -{ -class MCCompLabel -{ -}; -namespace gpu -{ -} // namespace gpu -namespace tpc -{ -class Digit -{ -}; -class TrackTPC -{ -}; -class CalibdEdxContainer -{ - public: - static bool isDead(int32_t slice, int32_t row, int32_t pad) { return false; } -}; -struct ClusterNative { - GPUd() static float getTime() { return 0.f; } - GPUd() static float getPad() { return 0.f; } - GPUd() static int32_t getFlags() { return 0; } - GPUd() static void setTimeFlags(float t, int32_t f) {} - GPUd() static void setPad(float p) {} - GPUd() static void setSigmaTime(float s) {} - GPUd() static void setSigmaPad(float s) {} - - uint8_t qTot, qMax; -}; -struct ClusterNativeAccess { - const ClusterNative* clustersLinear; - const ClusterNative* clusters[GPUCA_NSLICES][GPUCA_ROW_COUNT]; - uint32_t nClusters[GPUCA_NSLICES][GPUCA_ROW_COUNT]; - uint32_t nClustersSector[GPUCA_NSLICES]; - uint32_t clusterOffset[GPUCA_NSLICES][GPUCA_ROW_COUNT]; - uint32_t nClustersTotal; - void setOffsetPtrs() {} -}; -#ifndef __OPENCL__ -struct TPCZSHDR { - static const uint32_t TPC_ZS_PAGE_SIZE = 8192; -}; -#endif -} // namespace tpc -namespace base -{ -struct MatBudget { -}; -class MatLayerCylSet -{ -}; -} // namespace base -namespace trd -{ -class GeometryFlat -{ -}; -} // namespace trd -namespace dataformats -{ -template -class ConstMCTruthContainerView -{ -}; -template -class ConstMCTruthContainer -{ -}; -} // namespace dataformats -} // namespace o2 - -namespace o2 -{ -namespace gpu -{ -class TPCCFCalibration -{ -}; -class TPCdEdxCalibrationSplines -{ -}; -class GPUFakeEmpty -{ -}; -class GPUITSFitter -{ -}; -class GPUTPCConvert -{ -}; -class GPUTPCCompression -{ - public: - GPUFakeEmpty* mOutput; -}; -class GPUTPCDecompression -{ -}; -class GPUTPCClusterFinder -{ -}; -class GPUTrackingRefitProcessor -{ -}; -struct GPUTPCCFChainContext { -}; -struct GPUTPCDigitsMCInput { -}; -struct TPCPadGainCalib { -}; -#ifndef __OPENCL__ -struct GPUParam; -class GPUTPCClusterStatistics -{ - public: - void Finish() {} - void RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const GPUFakeEmpty* clustersCompressed, const GPUParam& param) {} -}; -#endif -} // namespace gpu -} // namespace o2 - -#endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 68a85e36c97bc..da9a66fa57301 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -17,7 +17,7 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_NSLICES) && !defined(GPUCA_ROW_COUNT) && defined(GPUCA_HAVE_O2HEADERS) +#if !defined(GPUCA_NSLICES) && !defined(GPUCA_ROW_COUNT) #include "DataFormatsTPC/Constants.h" #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR #define GPUCA_ROW_COUNT o2::tpc::constants::MAXGLOBALPADROW diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx index d9b7e2f3e829e..54f28ec21d30e 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx @@ -23,7 +23,7 @@ namespace o2 { namespace gpu { -#if defined(GPUCA_HAVE_O2HEADERS) && !defined(GPUCA_O2_LIB) // Instantiate O2 track version, for O2 this happens in GPUTRDTrackO2.cxx +#if !defined(GPUCA_O2_LIB) // Instantiate O2 track version, for O2 this happens in GPUTRDTrackO2.cxx template class GPUTRDTrack_t>; #endif template class GPUTRDTrack_t>; // Always instatiate GM track version diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 7327eca1bfa56..66cf31fc8e8d0 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -36,9 +36,7 @@ class GlobalTrackID; } // namespace o2 //_____________________________________________________________________________ -#if (!defined(GPUCA_STANDALONE)) || defined(GPUCA_HAVE_O2HEADERS) #include "GPUTRDInterfaceO2Track.h" -#endif namespace o2 { diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc b/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc index 48fd3fc42d22f..d7109620eba29 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.inc @@ -39,7 +39,6 @@ GPUd() void GPUTRDTrack_t::initialize() } } -#if defined(GPUCA_HAVE_O2HEADERS) #include "ReconstructionDataFormats/TrackTPCITS.h" #include "DataFormatsTPC/TrackTPC.h" @@ -55,8 +54,6 @@ GPUd() GPUTRDTrack_t::GPUTRDTrack_t(const o2::tpc::TrackTPC& t) : T(t) initialize(); } -#endif - template GPUd() GPUTRDTrack_t::GPUTRDTrack_t(const GPUTRDTrack_t& t) : T(t), mChi2(t.mChi2), mSignal(t.mSignal), mRefGlobalTrackId(t.mRefGlobalTrackId), mCollisionId(t.mCollisionId), mFlags(t.mFlags), mIsCrossingNeighbor(t.mIsCrossingNeighbor) diff --git a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h index 7c610403ad62e..01b61928be922 100644 --- a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h +++ b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h @@ -16,11 +16,10 @@ #define GPUTRIGGEROUTPUTS_H #include "GPUCommonDef.h" +#include "DataFormatsTPC/ZeroSuppression.h" + #include #include -#ifdef GPUCA_HAVE_O2HEADERS -#include "DataFormatsTPC/ZeroSuppression.h" -#endif namespace o2 { @@ -28,7 +27,6 @@ namespace gpu { struct GPUTriggerOutputs { -#ifdef GPUCA_HAVE_O2HEADERS struct hasher { size_t operator()(const o2::tpc::TriggerInfoDLBZS& key) const { @@ -52,7 +50,6 @@ struct GPUTriggerOutputs { std::unordered_set triggers; static_assert(sizeof(o2::tpc::TriggerInfoDLBZS) % sizeof(uint32_t) == 0); -#endif }; } // namespace gpu diff --git a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h index b4af969de46b3..f3a7f4b2a0724 100644 --- a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h +++ b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h @@ -15,20 +15,13 @@ #ifndef GPUDEDXINFO_H #define GPUDEDXINFO_H -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/dEdxInfo.h" -#endif namespace o2 { namespace gpu { -#ifdef GPUCA_HAVE_O2HEADERS using GPUdEdxInfo = o2::tpc::dEdxInfo; -#else -struct GPUdEdxInfo { -}; -#endif } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx index e513162aad87c..cdd0e4879f949 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUTPCClusterFilter.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; @@ -30,4 +29,3 @@ bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::Cluster // Note that this function might be called multiple times for the same cluster, in which case the final modified cl reference goes into the output clusters. return true; } -#endif diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index a5fc85eb80713..75b0169a660a5 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -51,7 +51,7 @@ #if defined(GPUCA_NSLICES) || defined(GPUCA_ROW_COUNT) #error GPUCA_NSLICES or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! #endif -#if defined(GPUCA_HAVE_O2HEADERS) && defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) +#if defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) //Use definitions from the O2 headers if available for nicer code and type safety #include "DataFormatsTPC/Constants.h" #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h index 6ed4e036c6597..ab60827655a43 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h @@ -18,11 +18,9 @@ #pragma link off all classes; #pragma link off all functions; -#ifdef GPUCA_HAVE_O2HEADERS #pragma link C++ class o2::gpu::trackInterface < o2::track::TrackParCov> + ; #pragma link C++ class o2::gpu::GPUTRDTrack_t < o2::gpu::trackInterface < o2::track::TrackParCov>> + ; #pragma link C++ class std::vector < o2::gpu::GPUTRDTrack_t < o2::gpu::trackInterface < o2::track::TrackParCov>>> + ; -#endif #ifdef GPUCA_O2_LIB #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsO2 + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsRec + ; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 161dbcd8689d9..1aa5f9ca0dad8 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -12,10 +12,8 @@ /// \file GPUChainTracking.cxx /// \author David Rohr -#ifdef GPUCA_HAVE_O2HEADERS #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" -#endif #include #include @@ -44,15 +42,11 @@ #include "GPUNewCalibValues.h" #include "GPUTriggerOutputs.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUTPCClusterStatistics.h" #include "GPUHostDataTypes.h" #include "GPUTPCCFChainContext.h" #include "GPUTrackingRefit.h" #include "CalibdEdxContainer.h" -#else -#include "GPUO2FakeClasses.h" -#endif #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" @@ -95,7 +89,6 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() if (GetRecoSteps() & RecoStep::TRDTracking) { mRec->RegisterGPUProcessor(&processors()->trdTrackerGPU, GetRecoStepsGPU() & RecoStep::TRDTracking); } -#ifdef GPUCA_HAVE_O2HEADERS if (GetRecoSteps() & RecoStep::TRDTracking) { mRec->RegisterGPUProcessor(&processors()->trdTrackerO2, GetRecoStepsGPU() & RecoStep::TRDTracking); } @@ -116,7 +109,6 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() if (GetRecoSteps() & RecoStep::Refit) { mRec->RegisterGPUProcessor(&processors()->trackingRefit, GetRecoStepsGPU() & RecoStep::Refit); } -#endif #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT mRec->RegisterGPUProcessor(&processors()->debugOutput, true); #endif @@ -141,7 +133,6 @@ void GPUChainTracking::RegisterGPUProcessors() mRec->RegisterGPUDeviceProcessor(&processorsShadow()->trdTrackerGPU, &processors()->trdTrackerGPU); } -#ifdef GPUCA_HAVE_O2HEADERS memcpy((void*)&processorsShadow()->trdTrackerO2, (const void*)&processors()->trdTrackerO2, sizeof(processors()->trdTrackerO2)); if (GetRecoStepsGPU() & RecoStep::TRDTracking) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->trdTrackerO2, &processors()->trdTrackerO2); @@ -163,7 +154,6 @@ void GPUChainTracking::RegisterGPUProcessors() if (GetRecoStepsGPU() & RecoStep::Refit) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->trackingRefit, &processors()->trackingRefit); } -#endif #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT mRec->RegisterGPUDeviceProcessor(&processorsShadow()->debugOutput, &processors()->debugOutput); #endif @@ -434,7 +424,6 @@ void GPUChainTracking::UpdateGPUCalibObjects(int32_t stream, const GPUCalibObjec mFlatObjectsShadow.mCalibObjects.fastTransformHelper->setCorrMapRef(mFlatObjectsShadow.mCalibObjects.fastTransformRef); mFlatObjectsShadow.mCalibObjects.fastTransformHelper->setCorrMapMShape(mFlatObjectsShadow.mCalibObjects.fastTransformMShape); } -#ifdef GPUCA_HAVE_O2HEADERS if (processors()->calibObjects.dEdxCalibContainer && (ptrMask == nullptr || ptrMask->dEdxCalibContainer)) { memcpy((void*)mFlatObjectsShadow.mCalibObjects.dEdxCalibContainer, (const void*)processors()->calibObjects.dEdxCalibContainer, sizeof(*processors()->calibObjects.dEdxCalibContainer)); memcpy((void*)mFlatObjectsShadow.mdEdxSplinesBuffer, (const void*)processors()->calibObjects.dEdxCalibContainer->getFlatBufferPtr(), processors()->calibObjects.dEdxCalibContainer->getFlatBufferSize()); @@ -464,7 +453,6 @@ void GPUChainTracking::UpdateGPUCalibObjects(int32_t stream, const GPUCalibObjec mFlatObjectsShadow.mCalibObjects.o2Propagator->setGPUField(&processorsDevice()->param.polynomialField); mFlatObjectsShadow.mCalibObjects.o2Propagator->setMatLUT(mFlatObjectsShadow.mCalibObjects.matLUT); } -#endif TransferMemoryResourceLinkToGPU(RecoStep::NoRecoStep, mFlatObjectsShadow.mMemoryResFlat, stream); memcpy((void*)&processorsShadow()->calibObjects, (void*)&mFlatObjectsDevice.mCalibObjects, sizeof(mFlatObjectsDevice.mCalibObjects)); } @@ -541,7 +529,6 @@ void* GPUChainTracking::GPUTrackingFlatObjects::SetPointersFlatObjects(void* mem if (mChainTracking->processors()->calibObjects.tpcZSLinkMapping) { computePointerWithAlignment(mem, mCalibObjects.tpcZSLinkMapping, 1); } -#ifdef GPUCA_HAVE_O2HEADERS char* dummyPtr; if (mChainTracking->processors()->calibObjects.matLUT) { computePointerWithAlignment(mem, mCalibObjects.matLUT, 1); @@ -560,7 +547,6 @@ void* GPUChainTracking::GPUTrackingFlatObjects::SetPointersFlatObjects(void* mem if (!mChainTracking->processors()->calibObjects.o2Propagator) { mCalibObjects.o2Propagator = nullptr; // Always reserve memory for o2::Propagator, since it may be propagatred only during run() not during init(). } -#endif if (!mChainTracking->mUpdateNewCalibObjects) { mem = (char*)mem + mChainTracking->GetProcessingSettings().calibObjectsExtraMemorySize; // TODO: Fixme and do proper dynamic allocation } @@ -648,11 +634,9 @@ int32_t GPUChainTracking::DoQueuedUpdates(int32_t stream, bool updateSlave) retVal = 1; } if (mUpdateNewCalibObjects) { -#ifdef GPUCA_HAVE_O2HEADERS if (mNewCalibObjects->o2Propagator && ((mNewCalibObjects->o2Propagator->getGPUField() != nullptr) ^ GetProcessingSettings().o2PropagatorUseGPUField)) { GPUFatal("GPU magnetic field for propagator requested, but received an O2 propagator without GPU field"); } -#endif void* const* pSrc = (void* const*)mNewCalibObjects.get(); void** pDst = (void**)&processors()->calibObjects; for (uint32_t i = 0; i < sizeof(processors()->calibObjects) / sizeof(void*); i++) { @@ -661,15 +645,12 @@ int32_t GPUChainTracking::DoQueuedUpdates(int32_t stream, bool updateSlave) } } if (mNewCalibObjects->trdGeometry && (GetRecoSteps() & GPUDataTypes::RecoStep::TRDTracking)) { -#ifdef GPUCA_HAVE_O2HEADERS if (GetProcessingSettings().trdTrackModelO2) { processors()->trdTrackerO2.UpdateGeometry(); if (mRec->IsGPU()) { TransferMemoryResourceLinkToGPU(RecoStep::NoRecoStep, processors()->trdTrackerO2.MemoryPermanent(), stream); } - } else -#endif - { + } else { processors()->trdTrackerGPU.UpdateGeometry(); if (mRec->IsGPU()) { TransferMemoryResourceLinkToGPU(RecoStep::NoRecoStep, processors()->trdTrackerGPU.MemoryPermanent(), stream); @@ -791,11 +772,9 @@ int32_t GPUChainTracking::RunChain() } } -#ifdef GPUCA_HAVE_O2HEADERS if (GetProcessingSettings().trdTrackModelO2 ? runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking) : runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking)) { return 1; } -#endif if (runRecoStep(RecoStep::Refit, &GPUChainTracking::RunRefit)) { return 1; @@ -826,12 +805,10 @@ int32_t GPUChainTracking::RunChain() int32_t GPUChainTracking::RunChainFinalize() { -#ifdef GPUCA_HAVE_O2HEADERS if (mIOPtrs.clustersNative && (GetRecoSteps() & RecoStep::TPCCompression) && GetProcessingSettings().runCompressionStatistics) { CompressedClusters c = *mIOPtrs.tpcCompressedClusters; mCompressionStatistics->RunStatistics(mIOPtrs.clustersNative, &c, param()); } -#endif if (GetProcessingSettings().outputSanityCheck) { SanityCheck(); @@ -1008,9 +985,7 @@ const o2::base::Propagator* GPUChainTracking::GetDeviceO2Propagator() void GPUChainTracking::SetO2Propagator(const o2::base::Propagator* prop) { processors()->calibObjects.o2Propagator = prop; -#ifdef GPUCA_HAVE_O2HEADERS if ((prop->getGPUField() != nullptr) ^ GetProcessingSettings().o2PropagatorUseGPUField) { GPUFatal("GPU magnetic field for propagator requested, but received an O2 propagator without GPU field"); } -#endif } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index f17c2f7de4720..bec61d6b76f1e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -24,7 +24,6 @@ #ifdef GPUCA_O2_LIB #include "CommonDataFormat/InteractionRecord.h" #endif -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUTriggerOutputs.h" #include "GPUHostDataTypes.h" #include "GPUTPCCFChainContext.h" @@ -33,9 +32,6 @@ #include "DataFormatsTPC/Digit.h" #include "DataFormatsTPC/Constants.h" #include "TPCBase/RDHUtils.h" -#else -#include "GPUO2FakeClasses.h" -#endif #include "utils/strtag.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 9878ad032cd3b..f3f3627573339 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -16,20 +16,17 @@ #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUTrackingInputProvider.h" -#include - -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" -#endif #include "utils/strtag.h" +#include + using namespace o2::gpu; using namespace o2::tpc; int32_t GPUChainTracking::RunTPCCompression() { -#ifdef GPUCA_HAVE_O2HEADERS mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR")); RecoStep myStep = RecoStep::TPCCompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression; @@ -199,13 +196,11 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); -#endif return 0; } int32_t GPUChainTracking::RunTPCDecompression() { -#ifdef GPUCA_HAVE_O2HEADERS if (GetProcessingSettings().tpcUseOldCPUDecoding) { const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; @@ -419,6 +414,5 @@ int32_t GPUChainTracking::RunTPCDecompression() } mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR")); } -#endif return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 06ba08527bfdc..96bc8a3083067 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -23,9 +23,7 @@ #include "bitmapfile.h" #endif -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUTPCClusterFilter.h" -#endif #define PROFILE_MAX_SIZE (100 * 1024 * 1024) @@ -237,11 +235,9 @@ void GPUChainTracking::PrintOutputStat() int32_t nTRDTracklets = 0; for (uint32_t k = 0; k < mIOPtrs.nTRDTracks; k++) { if (mIOPtrs.trdTracksO2) { -#ifdef GPUCA_HAVE_O2HEADERS auto& trk = mIOPtrs.trdTracksO2[k]; nTRDTracklets += trk.getNtracklets(); nTRDTracks += trk.getNtracklets() != 0; -#endif } else { auto& trk = mIOPtrs.trdTracks[k]; nTRDTracklets += trk.getNtracklets(); @@ -255,7 +251,6 @@ void GPUChainTracking::PrintOutputStat() void GPUChainTracking::SanityCheck() { -#ifdef GPUCA_HAVE_O2HEADERS size_t nErrors = 0; for (uint32_t i = 0; i < mIOPtrs.nOutputTracksTPCO2; i++) { @@ -296,12 +291,10 @@ void GPUChainTracking::SanityCheck() } else { GPUError("Sanity check found %lu errors", nErrors); } -#endif } void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts) { -#ifdef GPUCA_HAVE_O2HEADERS GPUTPCClusterFilter clusterFilter(*clusters); o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { @@ -338,5 +331,4 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster outputBuffer = allocator(countTotal); } } -#endif } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 106f71cd745d7..229469af801f6 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -35,7 +35,6 @@ #include "TPCZSLinkMapping.h" #include "GPUTriggerOutputs.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" #include "GPUTPCClusterStatistics.h" @@ -43,9 +42,6 @@ #include "GPUHostDataTypes.h" #include "DataFormatsTPC/Digit.h" #include "CalibdEdxContainer.h" -#else -#include "GPUO2FakeClasses.h" -#endif #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" @@ -78,7 +74,6 @@ void GPUChainTracking::DumpData(const char* filename) fwrite(&GPUReconstruction::geometryType, sizeof(GPUReconstruction::geometryType), 1, fp); DumpData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, InOutPointerType::CLUSTER_DATA); DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); -#ifdef GPUCA_HAVE_O2HEADERS if (mIOPtrs.clustersNative) { if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); @@ -145,7 +140,6 @@ void GPUChainTracking::DumpData(const char* filename) uint32_t n = 1; DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); } -#endif DumpData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, InOutPointerType::SLICE_OUT_TRACK); DumpData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); @@ -191,7 +185,6 @@ int32_t GPUChainTracking::ReadData(const char* filename) AliHLTTPCRawCluster* ptrRawClusters[NSLICES]; ReadData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, mIOMem.rawClusters, InOutPointerType::RAW_CLUSTERS, ptrRawClusters); int32_t nClustersTotal = 0; -#ifdef GPUCA_HAVE_O2HEADERS mIOMem.clusterNativeAccess.reset(new ClusterNativeAccess); if (ReadData(fp, &mIOMem.clusterNativeAccess->clustersLinear, &mIOMem.clusterNativeAccess->nClustersTotal, &mIOMem.clustersNative, InOutPointerType::CLUSTERS_NATIVE)) { r = fread(&mIOMem.clusterNativeAccess->nClusters[0][0], sizeof(mIOMem.clusterNativeAccess->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); @@ -248,7 +241,6 @@ int32_t GPUChainTracking::ReadData(const char* filename) } uint32_t n; ReadData(fp, &mIOPtrs.settingsTF, &n, &mIOMem.settingsTF, InOutPointerType::TF_SETTINGS); -#endif ReadData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, mIOMem.sliceTracks, InOutPointerType::SLICE_OUT_TRACK); ReadData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, mIOMem.sliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); ReadData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, &mIOMem.mcLabelsTPC, InOutPointerType::MC_LABEL_TPC); @@ -329,7 +321,6 @@ void GPUChainTracking::DumpSettings(const char* dir) f += "tpczslinkmapping.dump"; DumpStructToFile(processors()->calibObjects.tpcZSLinkMapping, f.c_str()); } -#ifdef GPUCA_HAVE_O2HEADERS if (processors()->calibObjects.dEdxCalibContainer != nullptr) { f = dir; f += "dEdxCalibContainer.dump"; @@ -345,7 +336,6 @@ void GPUChainTracking::DumpSettings(const char* dir) f += "trdgeometry.dump"; DumpStructToFile(processors()->calibObjects.trdGeometry, f.c_str()); } -#endif } void GPUChainTracking::ReadSettings(const char* dir) @@ -379,7 +369,6 @@ void GPUChainTracking::ReadSettings(const char* dir) f += "tpczslinkmapping.dump"; mTPCZSLinkMappingU = ReadStructFromFile(f.c_str()); processors()->calibObjects.tpcZSLinkMapping = mTPCZSLinkMappingU.get(); -#ifdef GPUCA_HAVE_O2HEADERS f = dir; f += "dEdxCalibContainer.dump"; mdEdxCalibContainerU = ReadFlatObjectFromFile(f.c_str()); @@ -392,5 +381,4 @@ void GPUChainTracking::ReadSettings(const char* dir) f += "trdgeometry.dump"; mTRDGeometryU = ReadStructFromFile(f.c_str()); processors()->calibObjects.trdGeometry = mTRDGeometryU.get(); -#endif } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 1d53177942b54..9e7085b31849e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -20,7 +20,6 @@ using namespace o2::gpu; int32_t GPUChainTracking::RunRefit() { -#ifdef GPUCA_HAVE_O2HEADERS bool doGPU = GetRecoStepsGPU() & RecoStep::Refit; GPUTrackingRefitProcessor& Refit = processors()->trackingRefit; GPUTrackingRefitProcessor& RefitShadow = doGPU ? processorsShadow()->trackingRefit : Refit; @@ -40,6 +39,5 @@ int32_t GPUChainTracking::RunRefit() } //TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); SynchronizeStream(0); -#endif return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index b21745f64af0d..ba6ba03fca8a1 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -83,11 +83,9 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() int32_t offset = 0; for (uint32_t i = 0; i < NSLICES; i++) { processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset); -#ifdef GPUCA_HAVE_O2HEADERS if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) { processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset()); } -#endif offset += mIOPtrs.nClusterData[i]; } mRec->MemoryScalers()->nTPCHits = offset; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index d2e1ae295de05..0f17bbcc26842 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -82,7 +82,6 @@ int32_t GPUChainTracking::RunTRDTracking() } } } else { -#ifdef GPUCA_HAVE_O2HEADERS for (uint32_t i = 0; i < mIOPtrs.nOutputTracksTPCO2; i++) { const auto& trk = mIOPtrs.outputTracksTPCO2[i]; @@ -111,7 +110,6 @@ int32_t GPUChainTracking::RunTRDTracking() return 1; } } -#endif } DoTRDGPUTracking(); @@ -121,10 +119,8 @@ int32_t GPUChainTracking::RunTRDTracking() mIOPtrs.trdTracks = Tracker.Tracks(); mIOPtrs.trdTracksO2 = nullptr; } else { -#ifdef GPUCA_HAVE_O2HEADERS mIOPtrs.trdTracks = nullptr; mIOPtrs.trdTracksO2 = Tracker.Tracks(); -#endif } mRec->PopNonPersistentMemory(RecoStep::TRDTracking, qStr2Tag("TRDTRACK")); @@ -134,7 +130,6 @@ int32_t GPUChainTracking::RunTRDTracking() template int32_t GPUChainTracking::DoTRDGPUTracking(T* externalInstance) { -#ifdef GPUCA_HAVE_O2HEADERS bool doGPU = GetRecoStepsGPU() & RecoStep::TRDTracking; auto* Tracker = &processors()->getTRDTracker(); auto* TrackerShadow = doGPU ? &processorsShadow()->getTRDTracker() : Tracker; @@ -191,7 +186,6 @@ int32_t GPUChainTracking::DoTRDGPUTracking(T* externalInstance) if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("GPU TRD tracker Finished"); } -#endif return (0); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index 326bfbb4d2313..d91fed4046de0 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -21,12 +21,8 @@ #include "GPUMemorySizeScalers.h" #include "AliHLTTPCRawCluster.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/ClusterNative.h" #include "CommonDataFormat/InteractionRecord.h" -#else -#include "GPUO2FakeClasses.h" -#endif #include "utils/strtag.h" using namespace o2::gpu; @@ -39,7 +35,6 @@ bool GPUChainTracking::NeedTPCClustersOnGPU() int32_t GPUChainTracking::ConvertNativeToClusterData() { -#ifdef GPUCA_HAVE_O2HEADERS mRec->PushNonPersistentMemory(qStr2Tag("TPCTRANS")); const auto& threadContext = GetThreadContext(); bool doGPU = GetRecoStepsGPU() & RecoStep::TPCConversion; @@ -84,7 +79,6 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() mIOPtrs.clusterData[i] = convert.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; } mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS")); -#endif return 0; } @@ -128,7 +122,6 @@ void GPUChainTracking::ConvertRun2RawToNative() void GPUChainTracking::ConvertZSEncoder(int32_t version) { -#ifdef GPUCA_HAVE_O2HEADERS mIOMem.tpcZSmeta2.reset(new GPUTrackingInOutZS::GPUTrackingInOutZSMeta); mIOMem.tpcZSmeta.reset(new GPUTrackingInOutZS); o2::InteractionRecord ir{0, mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit ? mIOPtrs.settingsTF->tfStartOrbit : 0u}; @@ -146,7 +139,6 @@ void GPUChainTracking::ConvertZSEncoder(int32_t version) } } } -#endif } void GPUChainTracking::ConvertZSFilter(bool zs12bit) @@ -156,7 +148,6 @@ void GPUChainTracking::ConvertZSFilter(bool zs12bit) int32_t GPUChainTracking::ForwardTPCDigits() { -#ifdef GPUCA_HAVE_O2HEADERS if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { throw std::runtime_error("Cannot forward TPC digits with Clusterizer on GPU"); } @@ -193,6 +184,5 @@ int32_t GPUChainTracking::ForwardTPCDigits() mIOPtrs.clustersNative = mClusterNativeAccess.get(); GPUInfo("Forwarded %u TPC clusters", nTotal); mRec->MemoryScalers()->nTPCHits = nTotal; -#endif return 0; } diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.h b/GPU/GPUTracking/Interface/GPUO2Interface.h index aab3c1562c67d..3a819de7c7b7c 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.h +++ b/GPU/GPUTracking/Interface/GPUO2Interface.h @@ -16,9 +16,6 @@ #define GPUO2INTERFACE_H // Some defines denoting that we are compiling for O2 -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h index bb92eca425336..425c8b880b4e3 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h @@ -23,9 +23,6 @@ #define GPUO2INTERFACECONFIGURABLEPARAM_H // Some defines denoting that we are compiling for O2 -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h index dd819f7ef7c05..af597fc4bf6b9 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfiguration.h @@ -15,9 +15,6 @@ #ifndef GPUO2INTERFACECONFIGURATION_H #define GPUO2INTERFACECONFIGURATION_H -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.h b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.h index 5b7a8672e746d..e6d9cb76aed79 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.h @@ -16,9 +16,6 @@ #define GPUO2INTERFACEDisplay_H // Some defines denoting that we are compiling for O2 -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceQA.h b/GPU/GPUTracking/Interface/GPUO2InterfaceQA.h index e046183e646cd..18af81b6cedc7 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceQA.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceQA.h @@ -16,9 +16,6 @@ #define GPUO2INTERFACEQA_H // Some defines denoting that we are compiling for O2 -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceRefit.h b/GPU/GPUTracking/Interface/GPUO2InterfaceRefit.h index c3a253b647df3..9f743c89d5743 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceRefit.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceRefit.h @@ -16,9 +16,6 @@ #define GPUO2INTERFACEREFIT_H // Some defines denoting that we are compiling for O2 -#ifndef GPUCA_HAVE_O2HEADERS -#define GPUCA_HAVE_O2HEADERS -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 2278afd112384..fab4469eeb488 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -51,16 +51,12 @@ #include "GPUTPCGMSliceTrack.h" #include "GPUTPCGMBorderTrack.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/TrackTPC.h" #ifndef GPUCA_GPUCODE #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "SimulationDataFormat/MCCompLabel.h" #endif -#else -#include "GPUO2FakeClasses.h" -#endif using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index b0a150bbd6a92..a59af7529a97d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -35,10 +35,8 @@ #include "GPUReconstruction.h" #include "GPUDebugStreamer.h" #include "GPUTPCClusterOccupancyMap.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUTrackingRefit.h" #include "CorrectionMapsHelper.h" -#endif using namespace o2::gpu; using namespace gputpcgmmergertypes; @@ -205,9 +203,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() -#ifdef GPUCA_HAVE_O2HEADERS << " dEdx " << (trk.OK() ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) -#endif << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; } out << std::setprecision(ss); @@ -299,7 +295,6 @@ const GPUTPCGMBorderTrack& GPUTPCGMMerger::MergedTrackStreamerFindBorderTrack(co void GPUTPCGMMerger::DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) const { -#ifdef GPUCA_HAVE_O2HEADERS GPUTPCGMMergedTrack trk = track; GPUTrackingRefit refit; ((GPUConstantMem*)GetConstantMem())->ioPtrs.mergedTrackHitStates = ClusterStateExt(); @@ -326,7 +321,6 @@ void GPUTPCGMMerger::DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) con } else { printf("REFIT ERROR\n"); } -#endif } std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSlice, int32_t iRow, float time) const diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 6355db9483b05..9f344a04739fd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -1084,16 +1084,11 @@ GPUd() void GPUTPCGMPropagator::Mirror(bool inFlyDirection) GPUd() o2::base::MatBudget GPUTPCGMPropagator::getMatBudget(const float* p1, const float* p2) { -#ifdef GPUCA_HAVE_O2HEADERS return mMatLUT->getMatBudget(p1[0], p1[1], p1[2], p2[0], p2[1], p2[2]); -#else - return o2::base::MatBudget(); -#endif } GPUdic(0, 1) void GPUTPCGMPropagator::UpdateMaterial(const GPUTPCGMPhysicalTrackModel& GPUrestrict() t0e) { -#ifdef GPUCA_HAVE_O2HEADERS float xyz1[3] = {getGlobalX(mT0.GetX(), mT0.GetY()), getGlobalY(mT0.GetX(), mT0.GetY()), mT0.GetZ()}; float xyz2[3] = {getGlobalX(t0e.GetX(), t0e.GetY()), getGlobalY(t0e.GetX(), t0e.GetY()), t0e.GetZ()}; o2::base::MatBudget mat = getMatBudget(xyz1, xyz2); @@ -1102,5 +1097,4 @@ GPUdic(0, 1) void GPUTPCGMPropagator::UpdateMaterial(const GPUTPCGMPhysicalTrack } else { SetMaterialTPC(); } -#endif } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index 27d531543bf6d..8e8c82393d659 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -23,9 +23,7 @@ #include "GPUTPCTrackletConstructor.h" #include "GPUTPCGlobalTracking.h" #include "CorrectionMapsHelper.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "CalibdEdxContainer.h" -#endif // GPUCA_HAVE_O2HEADERS #include "GPUParam.inc" #include "GPUCommonMath.h" @@ -378,7 +376,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } while (false); (void)found; -#if defined(GPUCA_HAVE_O2HEADERS) if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { @@ -386,7 +383,6 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, rowHit = CALINK_DEAD_CHANNEL; } } -#endif } while (0); } if (r.mNHits == 8 && r.mNMissed == 0 && rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL && rowHits && tracker.Param().par.continuousTracking && rowHits[r.mFirstRow] != CALINK_INVAL && rowHits[r.mFirstRow] != CALINK_DEAD_CHANNEL && rowHits[r.mLastRow] != CALINK_INVAL && rowHits[r.mLastRow] != CALINK_DEAD_CHANNEL) { diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index ae92f8a380bda..4bfcc312e27e7 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -58,9 +58,7 @@ #endif #include "GPUO2DataTypes.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "GPUChainITS.h" -#endif using namespace o2::gpu; @@ -73,9 +71,7 @@ extern GPUSettingsStandalone configStandalone; GPUReconstruction *rec, *recAsync, *recPipeline; GPUChainTracking *chainTracking, *chainTrackingAsync, *chainTrackingPipeline; -#ifdef GPUCA_HAVE_O2HEADERS GPUChainITS *chainITS, *chainITSAsync, *chainITSPipeline; -#endif void unique_ptr_aligned_delete(char* v) { operator delete(v GPUCA_OPERATOR_NEW_ALIGNMENT); @@ -167,11 +163,6 @@ int32_t ReadConfiguration(int argc, char** argv) return 1; } #endif -#ifndef GPUCA_HAVE_O2HEADERS - configStandalone.runTRD = configStandalone.rundEdx = configStandalone.runCompression = configStandalone.runTransformation = configStandalone.testSyncAsync = configStandalone.testSync = 0; - configStandalone.rec.tpc.forceEarlyTransform = 1; - configStandalone.runRefit = false; -#endif #ifndef GPUCA_TPC_GEOMETRY_O2 configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0; configStandalone.proc.ompKernels = false; @@ -486,7 +477,6 @@ int32_t SetupReconstruction() } } -#ifdef GPUCA_HAVE_O2HEADERS o2::base::Propagator* prop = nullptr; prop = o2::base::Propagator::Instance(true); prop->setGPUField(&rec->GetParam().polynomialField); @@ -500,7 +490,6 @@ int32_t SetupReconstruction() chainTrackingPipeline->SetO2Propagator(prop); } procSet.o2PropagatorUseGPUField = true; -#endif if (rec->Init()) { printf("Error initializing GPUReconstruction!\n"); @@ -680,7 +669,6 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU } } -#ifdef GPUCA_HAVE_O2HEADERS if (tmpRetVal == 0 && configStandalone.testSyncAsync) { if (configStandalone.testSyncAsync) { printf("Running asynchronous phase\n"); @@ -716,7 +704,6 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU } recAsync->ClearAllocatedMemory(); } -#endif if (!configStandalone.proc.doublePipeline) { recUse->ClearAllocatedMemory(); } @@ -787,14 +774,12 @@ int32_t main(int argc, char** argv) chainTrackingPipeline = recPipeline->AddChain(); chainTrackingPipeline->SetQAFromForeignChain(chainTracking); } -#ifdef GPUCA_HAVE_O2HEADERS if (!configStandalone.proc.doublePipeline) { chainITS = rec->AddChain(0); if (configStandalone.testSyncAsync) { chainITSAsync = recAsync->AddChain(0); } } -#endif if (SetupReconstruction()) { return 1; diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1857d77c11b1a..1f11f0bacffac 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -122,10 +122,7 @@ if(GPUCA_CONFIG_ROOT) else() add_definitions(-DGPUCA_NO_ROOT) endif() -if(GPUCA_CONFIG_O2_EXTENSIONS) - add_definitions(-DGPUCA_HAVE_O2HEADERS) - find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") -endif() +find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") if(GPUCA_CONFIG_FMT) find_package(fmt REQUIRED HINTS $ENV{FMT_ROOT}) @@ -154,10 +151,8 @@ include_directories(${GPU_DIR}/Common ${GPUTRACKING_DIR}/qa ${GPUTRACKING_DIR}/SliceTracker ${GPUTRACKING_DIR}/DataCompression - ${GPUTRACKING_DIR}/TRDTracking) - -if(GPUCA_CONFIG_O2_EXTENSIONS) -include_directories(${GPUTRACKING_DIR}/TPCClusterFinder + ${GPUTRACKING_DIR}/TRDTracking + ${GPUTRACKING_DIR}/TPCClusterFinder ${GPUTRACKING_DIR}/ITS ${GPUTRACKING_DIR}/Interface ${O2_DIR}/Common/Field/include @@ -190,7 +185,6 @@ include_directories(${GPUTRACKING_DIR}/TPCClusterFinder ${O2_DIR}/Detectors/TRD/base/include ${O2_DIR}/Detectors/TRD/base/src ${O2_DIR}/Framework/Foundation/3rdparty/include) -endif() # Create main targets add_subdirectory(../../ GPU) @@ -203,33 +197,31 @@ target_compile_definitions(ca PUBLIC $) # Add all sources and dependencies to to support based on Config File -if(GPUCA_CONFIG_O2_EXTENSIONS) +target_sources(standalone_support PRIVATE + ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx + ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx + ${O2_DIR}/Detectors/Base/src/Ray.cxx + ${O2_DIR}/Detectors/Base/src/Propagator.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) +if(CONFIG_O2_ITS_TRAITS) target_sources(standalone_support PRIVATE - ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx - ${O2_DIR}/Detectors/Base/src/Ray.cxx - ${O2_DIR}/Detectors/Base/src/Propagator.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) - if(CONFIG_O2_ITS_TRAITS) - target_sources(standalone_support PRIVATE - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/PrimaryVertexContext.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/TrackerTraitsCPU.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ROframe.cxx) - target_link_libraries(standalone_support PUBLIC Boost::boost) - endif() + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/PrimaryVertexContext.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/TrackerTraitsCPU.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ROframe.cxx) + target_link_libraries(standalone_support PUBLIC Boost::boost) endif() if(GPUCA_CONFIG_FMT) @@ -258,9 +250,7 @@ if(GPUCA_CONFIG_ROOT) ROOT::Gui ROOT::Tree) endif() -if(GPUCA_CONFIG_O2_EXTENSIONS) - target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) -endif() +target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) if(OpenMP_CXX_FOUND) target_link_libraries(ca PUBLIC OpenMP::OpenMP_CXX) diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index d8ba50bfee804..97091d833efd8 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -19,7 +19,6 @@ set(CONFIG_OPENMP 1) set(GPUCA_CONFIG_VC 1) set(GPUCA_CONFIG_FMT 1) set(GPUCA_CONFIG_ROOT 1) -set(GPUCA_CONFIG_O2_EXTENSIONS 1) set(GPUCA_BUILD_EVENT_DISPLAY 1) set(GPUCA_BUILD_EVENT_DISPLAY_FREETYPE 1) set(GPUCA_BUILD_EVENT_DISPLAY_VULKAN 1) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h index 44df676c0d73e..f7aab78c33bd1 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h @@ -41,13 +41,11 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate findFragmentStart, }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index 670eb3a8700c6..d6daa6803ca39 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -37,13 +37,11 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate tpccf::Charge charges[PadsPerCacheline][NumOfCachedTimebins]; }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index c04a9e167529f..411c38c39459e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -43,13 +43,11 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h index abd6deefd4c28..e476674e030f9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h @@ -47,13 +47,11 @@ class GPUTPCCFDecodeZS : public GPUKernelTemplate static GPUd() void decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF); -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { @@ -68,13 +66,11 @@ class GPUTPCCFDecodeZSLinkBase : public GPUKernelTemplate { public: -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h index e907728e089b9..78fcc8ba1785a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h @@ -36,13 +36,11 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate uint8_t buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_COUNT_N]; }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h index a7b3b4938b1dd..210853237b86e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFGather.h @@ -26,13 +26,11 @@ class GPUTPCClusterFinder; class GPUTPCCFGather : public GPUKernelTemplate { public: -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h index 6bde9bf468eaa..6bdec7760527c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFMCLabelFlattener.h @@ -38,13 +38,11 @@ class GPUTPCCFMCLabelFlattener : public GPUKernelTemplate flatten, }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index a11fbeb7d852f..f5d8f533df651 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -40,13 +40,11 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N]; }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h index 5cb5b208c2fde..ec17d98322239 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h @@ -36,13 +36,11 @@ class GPUTPCCFPeakFinder : public GPUKernelTemplate PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_SEARCH_N]; }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index 9de0aab11e6f1..25d3588be6d17 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -38,13 +38,11 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { }; -#ifdef GPUCA_HAVE_O2HEADERS typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcClusterer; } -#endif GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h index 8564b0ea7371c..2af6fbf922ed4 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h @@ -17,8 +17,6 @@ #include "GPUCommonDef.h" -#if defined(GPUCA_HAVE_O2HEADERS) //&& defined(GPUCA_GPUCODE) - class TObjArray; #include "GPUDef.h" #include "TRDBase/GeometryFlat.h" @@ -83,75 +81,4 @@ class GPUTRDGeometry : private o2::trd::GeometryFlat } // namespace gpu } // namespace o2 -#else // below are dummy definitions to enable building the standalone version without O2 Headers - -#include "GPUDef.h" - -namespace o2 -{ -namespace gpu -{ - -class TGeoHMatrix -{ - public: - template - GPUd() void LocalToMaster(T*, T*) const - { - } -}; - -class GPUTRDpadPlane -{ - public: - GPUd() float GetTiltingAngle() const { return 0; } - GPUd() float GetRowSize(int32_t row) const { return 0; } - GPUd() float GetRowPos(int32_t row) const { return 0; } - GPUd() float GetRow0() const { return 0; } - GPUd() float GetRowEnd() const { return 0; } - GPUd() float GetCol0() const { return 0; } - GPUd() float GetColEnd() const { return 0; } - GPUd() float GetColPos(int32_t col) const { return 0; } - GPUd() float GetNrows() const { return 0; } - GPUd() int32_t GetPadRowNumber(double z) const { return 0; } -}; - -class GPUTRDGeometry -{ - public: - GPUd() static bool CheckGeometryAvailable() { return false; } - void clearInternalBufferUniquePtr() const {} - - // Make sub-functionality available directly in GPUTRDGeometry - GPUd() float GetPadPlaneWidthIPad(int32_t det) const { return 0; } - GPUd() float GetPadPlaneRowPos(int32_t layer, int32_t stack, int32_t row) const { return 0; } - GPUd() float GetPadPlaneRowSize(int32_t layer, int32_t stack, int32_t row) const { return 0; } - GPUd() int32_t GetGeomManagerVolUID(int32_t det, int32_t modId) const { return 0; } - - // Base functionality of Geometry - GPUd() float GetTime0(int32_t layer) const { return 0; } - GPUd() float GetCol0(int32_t layer) const { return 0; } - GPUd() float GetCdrHght() const { return 0; } - GPUd() int32_t GetLayer(int32_t det) const { return 0; } - GPUd() bool CreateClusterMatrixArray() const { return false; } - GPUd() float AnodePos() const { return 0; } - GPUd() const TGeoHMatrix* GetClusterMatrix(int32_t det) const { return nullptr; } - GPUd() int32_t GetDetector(int32_t layer, int32_t stack, int32_t sector) const { return 0; } - GPUd() const GPUTRDpadPlane* GetPadPlane(int32_t layer, int32_t stack) const { return nullptr; } - GPUd() const GPUTRDpadPlane* GetPadPlane(int32_t detector) const { return nullptr; } - GPUd() int32_t GetSector(int32_t det) const { return 0; } - GPUd() int32_t GetStack(int32_t det) const { return 0; } - GPUd() int32_t GetStack(float z, int32_t layer) const { return 0; } - GPUd() float GetAlpha() const { return 0; } - GPUd() bool IsHole(int32_t la, int32_t st, int32_t se) const { return false; } - GPUd() int32_t GetRowMax(int32_t layer, int32_t stack, int32_t /* sector */) const { return 0; } - GPUd() bool ChamberInGeometry(int32_t det) const { return false; } - - static constexpr const int32_t kNstack = 0; -}; -} // namespace gpu -} // namespace o2 - -#endif // !defined(GPUCA_HAVE_O2HEADERS) - #endif // GPUTRDGEOMETRY_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 16347aae5c535..7aed063f05ad9 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -34,8 +34,6 @@ class propagatorInterface; } // namespace gpu } // namespace o2 -#if defined(GPUCA_HAVE_O2HEADERS) // Interface for O2, build only with O2 - #include "DetectorsBase/Propagator.h" #include "GPUTRDInterfaceO2Track.h" @@ -92,15 +90,11 @@ class propagatorInterface } // namespace gpu } // namespace o2 -#endif // GPUCA_HAVE_O2HEADERS - #include "GPUTPCGMPropagator.h" #include "GPUParam.h" #include "GPUDef.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTPC/TrackTPC.h" #include "ReconstructionDataFormats/TrackTPCITS.h" -#endif namespace o2 { @@ -126,7 +120,6 @@ class trackInterface : public GPUTPCGMTrackParam }; GPUdDefault() trackInterface(const trackInterface& param) = default; GPUdDefault() trackInterface& operator=(const trackInterface& param) = default; -#if defined(GPUCA_HAVE_O2HEADERS) GPUd() trackInterface(const o2::dataformats::TrackTPCITS& param) : GPUTPCGMTrackParam(), mAlpha(param.getParamOut().getAlpha()) { SetX(param.getParamOut().getX()); @@ -151,7 +144,6 @@ class trackInterface : public GPUTPCGMTrackParam SetCov(i, param.getParamOut().getCov()[i]); } } -#endif GPUd() float getX() const { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index c14e61071e2d8..c44b4c09a3d7a 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -1112,11 +1112,7 @@ namespace o2 { namespace gpu { -// instantiate version for AliExternalTrackParam / o2::TrackParCov data types -#if defined(GPUCA_HAVE_O2HEADERS) template class GPUTRDTracker_t; -#endif -// always instantiate version for GPU Track Model template class GPUTRDTracker_t; } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index 376194e4a586c..eb9eecfe6e846 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -41,7 +41,5 @@ GPUdii() void GPUTRDTrackerKernels::Thread(int32_t nBlocks, int32_t nThreads, in #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. template GPUdni() void GPUTRDTrackerKernels::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUTRDTrackerGPU* externalInstance); -#ifdef GPUCA_HAVE_O2HEADERS template GPUdni() void GPUTRDTrackerKernels::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUTRDTracker* externalInstance); -#endif // GPUCA_HAVE_O2HEADERS #endif diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 8cff279076348..4604a8cdbdf70 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -20,28 +20,14 @@ #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUdEdxInfo.h" -#if defined(GPUCA_HAVE_O2HEADERS) #include "DataFormatsTPC/Defs.h" #include "CalibdEdxContainer.h" #include "GPUDebugStreamer.h" -#endif namespace o2 { namespace gpu { -#if !defined(GPUCA_HAVE_O2HEADERS) - -class GPUdEdx -{ - public: - GPUd() void clear() {} - GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t slice, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) {} - GPUd() void fillSubThreshold(int32_t padRow, const GPUParam& param) {} - GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param) {} -}; - -#else class GPUdEdx { @@ -212,7 +198,6 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -#endif // !GPUCA_HAVE_O2HEADERS } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 3d15077c8cf10..b1685fc61fc2c 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -31,12 +31,10 @@ #include -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsITS/TrackITS.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "GPUTrackParamConvert.h" -#endif #ifdef WITH_OPENMP #include @@ -304,14 +302,12 @@ GPUDisplay::vboList GPUDisplay::DrawTracks(const GPUTPCTracker& tracker, int32_t void GPUDisplay::DrawTrackITS(int32_t trackId, int32_t iSlice) { -#ifdef GPUCA_HAVE_O2HEADERS const auto& trk = mIOPtrs->itsTracks[trackId]; for (int32_t k = 0; k < trk.getNClusters(); k++) { int32_t cid = mIOPtrs->itsTrackClusIdx[trk.getFirstClusterEntry() + k]; mVertexBuffer[iSlice].emplace_back(mGlobalPosITS[cid].x, mGlobalPosITS[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[cid].z); mGlobalPosITS[cid].w = tITSATTACHED; } -#endif } GPUDisplay::vboList GPUDisplay::DrawFinalITS() @@ -402,9 +398,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* if (std::is_same_v || (!mIOPtrs->tpcLinkTRD && mIOPtrs->trdTracksO2)) { if (mChain && ((int32_t)mConfig.showTPCTracksFromO2Format == (int32_t)mChain->GetProcessingSettings().trdTrackModelO2) && mTRDTrackIds[i] != -1 && mIOPtrs->nTRDTracklets) { if (mIOPtrs->trdTracksO2) { -#ifdef GPUCA_HAVE_O2HEADERS tmpDoTRDTracklets(mIOPtrs->trdTracksO2[mTRDTrackIds[i]]); -#endif } else { tmpDoTRDTracklets(mIOPtrs->trdTracks[mTRDTrackIds[i]]); } @@ -697,7 +691,6 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) // TODO: tilted pads ignored at the moment size_t startCount = mVertexBufferStart[sector].size(); size_t startCountInner = mVertexBuffer[sector].size(); -#ifdef GPUCA_HAVE_O2HEADERS auto* geo = trdGeometry(); if (geo) { int32_t trdsector = NSLICES / 2 - 1 - sector; @@ -756,7 +749,6 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) } } } -#endif insertVertexList(sector, startCountInner, mVertexBuffer[sector].size()); return (vboList(startCount, mVertexBufferStart[sector].size() - startCount, sector)); } @@ -908,12 +900,9 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { mThreadBuffers[numThread].clear(); for (int32_t iSet = 0; iSet < numThreads; iSet++) { -#ifdef GPUCA_HAVE_O2HEADERS if (mConfig.showTPCTracksFromO2Format) { DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); - } else -#endif - { + } else { DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); } } diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 56ce3bef39082..aaa03b8a24d18 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -24,13 +24,11 @@ #include "GPUTRDTrackletWord.h" #include "GPUParam.inc" -#ifdef GPUCA_HAVE_O2HEADERS #include "DataFormatsTOF/Cluster.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "DataFormatsTPC/TrackTPC.h" #include "TOFBase/Geo.h" #include "ITSBase/GeometryTGeo.h" -#endif #ifdef GPUCA_O2_LIB #include "ITSMFTBase/DPLAlpideParam.h" #endif @@ -95,9 +93,7 @@ void GPUDisplay::DrawGLScene_updateEventData() } }; if (mIOPtrs->trdTracksO2) { -#ifdef GPUCA_HAVE_O2HEADERS tmpDoTRDTracklets(mIOPtrs->trdTracksO2); -#endif } else { tmpDoTRDTracklets(mIOPtrs->trdTracks); } @@ -182,10 +178,8 @@ void GPUDisplay::DrawGLScene_updateEventData() for (int32_t i = 0; i < mCurrentSpacePointsTRD; i++) { while (mParam->par.continuousTracking && trdTriggerRecord < (int32_t)mIOPtrs->nTRDTriggerRecords - 1 && mIOPtrs->trdTrackletIdxFirst[trdTriggerRecord + 1] <= i) { trdTriggerRecord++; -#ifdef GPUCA_HAVE_O2HEADERS float trdTime = mIOPtrs->trdTriggerTimes[trdTriggerRecord] * 1e3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; trdZoffset = fabsf(mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(0, trdTime, mParam->continuousMaxTimeBin)); -#endif } const auto& sp = mIOPtrs->trdSpacePoints[i]; int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); @@ -213,7 +207,6 @@ void GPUDisplay::DrawGLScene_updateEventData() GPUCA_OPENMP(parallel for num_threads(getNumThreads()) reduction(max : mMaxClusterZ)) for (int32_t i = 0; i < mCurrentClustersTOF; i++) { -#ifdef GPUCA_HAVE_O2HEADERS float4* ptr = &mGlobalPosTOF[i]; mParam->Slice2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); float ZOffset = 0; @@ -229,11 +222,9 @@ void GPUDisplay::DrawGLScene_updateEventData() ptr->y *= GL_SCALE_FACTOR; ptr->z *= GL_SCALE_FACTOR; ptr->w = tTOFCLUSTER; -#endif } if (mCurrentClustersITS) { -#ifdef GPUCA_HAVE_O2HEADERS float itsROFhalfLen = 0; #ifdef GPUCA_O2_LIB // Not available in standalone benchmark if (mParam->par.continuousTracking) { @@ -271,6 +262,5 @@ void GPUDisplay::DrawGLScene_updateEventData() i++; } } -#endif } } diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index c46419c439eb0..57f0cce4989f3 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -17,7 +17,6 @@ o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx G o2_gpu_kernel_file_list(TPCTRACKLETCONS GPUTPCTrackletConstructor.cxx) o2_gpu_kernel_file_list(TPCSLICEDATA TPCTRACKER GPUTPCSliceData.cxx) o2_gpu_kernel_file_list(TPCOCCUPANCY GPUTPCClusterOccupancyMap.cxx) -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR GPUCA_CONFIG_O2_EXTENSIONS) o2_gpu_kernel_file_list(TPCDEDX GPUdEdx.cxx) o2_gpu_kernel_file_list(MATLUT MatLayerCylSet.cxx MatLayerCyl.cxx Ray.cxx) o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSliceTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) @@ -27,11 +26,6 @@ o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) -else() -o2_gpu_kernel_file_list(TPCDEDX) -o2_gpu_kernel_file_list(MATLUT) -o2_gpu_kernel_file_list(TPCMERGER) -endif() o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) @@ -88,8 +82,6 @@ o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMM o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) - -if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR GPUCA_CONFIG_O2_EXTENSIONS) o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER" NO simple) o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB simple) @@ -132,4 +124,3 @@ o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFD o2_gpu_add_kernel("GPUTPCCFGather" "=" LB single o2::tpc::ClusterNative* dest) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB simple) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB simple) -endif() diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 34318a1bab613..2aa0611b33779 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -52,10 +52,8 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" #include "GPUROOTDump.h" -#ifdef GPUCA_HAVE_O2HEADERS #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "SimulationDataFormat/MCCompLabel.h" -#endif #ifdef GPUCA_O2_LIB #include "DetectorsRaw/HBFUtils.h" #include "DataFormatsTPC/TrackTPC.h" diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index 32c22c5193603..133bf35281b55 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -60,8 +60,6 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") Vc::Vc ROOT::Core ROOT::Matrix ROOT::Tree ROOT::Gpad ROOT::Minuit ) - target_compile_definitions(${targetName} PRIVATE GPUCA_HAVE_O2HEADERS) - o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} LINKDEF TPCFastTransformationLinkDef_O2.h) diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index e4612e6a9490f..01ca3eb59c029 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -31,8 +31,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT} LINKDEF GPUUtilsLinkDef.h) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB - GPUCA_TPC_GEOMETRY_O2 GPUCA_HAVE_O2HEADERS) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() From 1b5428f0b740542b4acc3b48a1cbc0e9fcf074c0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 21 Jan 2025 02:12:36 +0100 Subject: [PATCH 0042/1914] GPU: Fix some protections for VC and ROOT --- GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx | 2 ++ GPU/GPUTracking/Debug/GPUROOTDump.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx index 47a6e4cff72df..4e093da5d2cf6 100644 --- a/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx +++ b/GPU/GPUTracking/DataTypes/CalibdEdxTrackTopologyPol.cxx @@ -9,7 +9,9 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#ifndef GPUCA_NO_VC #include "Rtypes.h" +#endif #include "CalibdEdxTrackTopologyPol.h" #include diff --git a/GPU/GPUTracking/Debug/GPUROOTDump.h b/GPU/GPUTracking/Debug/GPUROOTDump.h index 407076c98b4be..f8f8950a06ae0 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDump.h +++ b/GPU/GPUTracking/Debug/GPUROOTDump.h @@ -20,11 +20,13 @@ #include "GPUROOTDumpCore.h" #include #include -#include -#include #else class TNtuple; #endif +#ifndef GPUCA_GPUCODE +#include +#include +#endif namespace o2 { From 9dcdaaec1599261414705acb69a323b81a9a59d9 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 3 Feb 2025 10:27:56 +0100 Subject: [PATCH 0043/1914] DPL Analysis: improve error message on wrong index dereference (#13920) --- Framework/Core/include/Framework/ASoA.h | 20 ++++++++++---------- Framework/Core/src/ASoA.cxx | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 8af872a64176d..be6329b48b7eb 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -46,7 +46,7 @@ std::string strToUpper(std::string&& str); namespace o2::soa { void accessingInvalidIndexFor(const char* getter); -void dereferenceWithWrongType(); +void dereferenceWithWrongType(const char* getter, const char* target); void missingFilterDeclaration(int hash, int ai); void notBoundTable(const char* tableName); } // namespace o2::soa @@ -2473,7 +2473,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ if (O2_BUILTIN_UNLIKELY(!has_##_Getter_())) { \ return t->emptySlice(); \ @@ -2558,7 +2558,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ return getIterators(); \ } \ @@ -2571,7 +2571,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ return getFilteredIterators(); \ } \ @@ -2617,7 +2617,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ return t->rawIteratorAt((*mColumnIterator)[0]); \ } \ @@ -2630,7 +2630,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ return t->rawIteratorAt((*mColumnIterator).back()); \ } \ @@ -2715,7 +2715,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, #_Table_); \ } \ return t->rawIteratorAt(*mColumnIterator); \ } \ @@ -2793,7 +2793,7 @@ consteval auto getIndexTargets() } \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, "self"); \ } \ return t->rawIteratorAt(*mColumnIterator); \ } \ @@ -2851,7 +2851,7 @@ consteval auto getIndexTargets() { \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, "self"); \ } \ if (O2_BUILTIN_UNLIKELY(!has_##_Getter_())) { \ return t->emptySlice(); \ @@ -2912,7 +2912,7 @@ consteval auto getIndexTargets() { \ auto t = mBinding.get(); \ if (O2_BUILTIN_UNLIKELY(t == nullptr)) { \ - o2::soa::dereferenceWithWrongType(); \ + o2::soa::dereferenceWithWrongType(#_Getter_, "self"); \ } \ return getIterators(); \ } \ diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index a37d0f33891e7..8f509ea17d2ba 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -21,9 +21,9 @@ void accessingInvalidIndexFor(const char* getter) { throw o2::framework::runtime_error_f("Accessing invalid index for %s", getter); } -void dereferenceWithWrongType() +void dereferenceWithWrongType(const char* getter, const char* target) { - throw o2::framework::runtime_error_f("Trying to dereference index with a wrong type in _as<>. Note that if you have several compatible index targets in your process() signature, the last one will be the one actually bound to the getter."); + throw o2::framework::runtime_error_f("Trying to dereference index with a wrong type in %s_as for base target \"%s\". Note that if you have several compatible index targets in your process() signature, the last one will be the one actually bound.", getter, target); } void missingFilterDeclaration(int hash, int ai) { From ff8ba8164bc6a23c4fbf2ab3bdcc5608dd3a6e69 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 3 Feb 2025 15:59:44 +0100 Subject: [PATCH 0044/1914] DPL Analysis: move ownership of payloads to the fragment (#13931) This makes sure the FileFragment is the entity which owns the TTree / RNtuple, so that its caching and memory management have the correct life-cycle and we do not end up with memory churn or having to reconfigure the caches. --- .../AnalysisSupport/src/RNTuplePlugin.cxx | 60 +++--- Framework/AnalysisSupport/src/TTreePlugin.cxx | 172 ++++++++++-------- .../include/Framework/RootArrowFilesystem.h | 45 ++++- Framework/Core/src/Plugin.cxx | 26 ++- Framework/Core/src/RootArrowFilesystem.cxx | 71 +++++++- Framework/Core/test/test_Root2ArrowTable.cxx | 22 ++- 6 files changed, 265 insertions(+), 131 deletions(-) diff --git a/Framework/AnalysisSupport/src/RNTuplePlugin.cxx b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx index f66723419c24e..51b585d0714bb 100644 --- a/Framework/AnalysisSupport/src/RNTuplePlugin.cxx +++ b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -51,10 +52,6 @@ class RNTupleFileSystem : public VirtualRootFileSystemBase public: ~RNTupleFileSystem() override; - std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override - { - return std::dynamic_pointer_cast(shared_from_this()); - }; virtual ROOT::Experimental::RNTuple* GetRNTuple(arrow::dataset::FileSource source) = 0; }; @@ -100,9 +97,28 @@ class RNTupleFileFragment : public arrow::dataset::FileFragment std::shared_ptr format, arrow::compute::Expression partition_expression, std::shared_ptr physical_schema) - : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) + : FileFragment(source, format, partition_expression, physical_schema) { + auto fs = std::dynamic_pointer_cast(source.filesystem()); + if (!fs.get()) { + throw runtime_error_f("Do not know how to extract %s from %s", source.path().c_str(), fs->type_name().c_str()); + } + auto handler = fs->GetObjectHandler(source); + if (!handler->format->Equals(*format)) { + throw runtime_error_f("Format for %s does not match. Found %s, expected %s.", source.path().c_str(), + handler->format->type_name().c_str(), + format->type_name().c_str()); + } + mNTuple = handler->GetObjectAsOwner(); } + + ROOT::Experimental::RNTuple* GetRNTuple() + { + return mNTuple.get(); + } + + private: + std::unique_ptr mNTuple; }; class RNTupleFileFormat : public arrow::dataset::FileFormat @@ -133,11 +149,10 @@ class RNTupleFileFormat : public arrow::dataset::FileFormat arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override { auto fs = std::dynamic_pointer_cast(source.filesystem()); - auto subFs = fs->GetSubFilesystem(source); - if (std::dynamic_pointer_cast(subFs)) { - return true; + if (!fs) { + return false; } - return false; + return fs->CheckSupport(source); } arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; @@ -493,11 +508,12 @@ arrow::Result> RNTupleFileFormat::Inspect(const a auto fs = std::dynamic_pointer_cast(source.filesystem()); // Actually get the TTree from the ROOT file. - auto ntupleFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); - if (!ntupleFs.get()) { - throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); + auto objectHandler = fs->GetObjectHandler(source); + if (objectHandler->format->type_name() != this->type_name()) { + throw runtime_error_f("Unexpected kind of filesystem %s to handle payload %s.\n", source.filesystem()->type_name().c_str(), source.path().c_str()); } - ROOT::Experimental::RNTuple* rntuple = ntupleFs->GetRNTuple(source); + // We know this is a RNTuple, so we can continue with the inspection. + auto rntuple = objectHandler->GetObjectAsOwner().release(); auto inspector = ROOT::Experimental::RNTupleInspector::Create(rntuple); @@ -526,11 +542,8 @@ arrow::Result RNTupleFileFormat::ScanBatchesAsync( std::vector> columns; std::vector> fields = dataset_schema->fields(); - auto containerFS = std::dynamic_pointer_cast(ntupleFragment->source().filesystem()); - auto fs = std::dynamic_pointer_cast(containerFS->GetSubFilesystem(ntupleFragment->source())); - int64_t rows = -1; - ROOT::Experimental::RNTuple* rntuple = fs->GetRNTuple(ntupleFragment->source()); + ROOT::Experimental::RNTuple* rntuple = ntupleFragment->GetRNTuple(); auto reader = ROOT::Experimental::RNTupleReader::Open(rntuple); auto& model = reader->GetModel(); for (auto& physicalField : fields) { @@ -670,7 +683,7 @@ arrow::Result RNTupleFileFormat::ScanBatchesAsync( if (!result.ok()) { throw runtime_error("Cannot allocate offset buffer"); } - arrowOffsetBuffer = std::move(result).ValueUnsafe(); + arrowOffsetBuffer = result.MoveValueUnsafe(); // Offset bulk auto offsetBulk = model.CreateBulk(physicalField->name()); @@ -692,7 +705,7 @@ arrow::Result RNTupleFileFormat::ScanBatchesAsync( if (!result.ok()) { throw runtime_error("Cannot allocate values buffer"); } - arrowValuesBuffer = std::move(result).ValueUnsafe(); + arrowValuesBuffer = result.MoveValueUnsafe(); ptr = (uint8_t*)(arrowValuesBuffer->mutable_data()); // Calculate the size of the buffer here. for (size_t i = 0; i < total; i++) { @@ -811,9 +824,9 @@ arrow::Result> RNTupleFileFormat:: { std::shared_ptr format = std::make_shared(mTotCompressedSize, mTotUncompressedSize); - auto fragment = std::make_shared(std::move(source), std::move(format), - std::move(partition_expression), - std::move(physical_schema)); + auto fragment = std::make_shared(source, format, + partition_expression, + physical_schema); return std::dynamic_pointer_cast(fragment); } @@ -839,9 +852,6 @@ struct RNTupleObjectReadingImplementation : public RootArrowFactoryPlugin { return new RootArrowFactory{ .options = [context]() { return context->format->DefaultWriteOptions(); }, .format = [context]() { return context->format; }, - .getSubFilesystem = [](void* handle) { - auto rntuple = (ROOT::Experimental::RNTuple*)handle; - return std::shared_ptr(new SingleRNTupleFileSystem(rntuple)); }, }; } }; diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index abc08526815cc..4b130a2144253 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -15,6 +15,7 @@ #include "Framework/Endian.h" #include #include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include O2_DECLARE_DYNAMIC_LOG(root_arrow_fs); @@ -48,11 +48,6 @@ class TTreeFileSystem : public VirtualRootFileSystemBase public: ~TTreeFileSystem() override; - std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override - { - return std::dynamic_pointer_cast(shared_from_this()); - }; - arrow::Result> OpenOutputStream( const std::string& path, const std::shared_ptr& metadata) override; @@ -60,6 +55,55 @@ class TTreeFileSystem : public VirtualRootFileSystemBase virtual std::unique_ptr& GetTree(arrow::dataset::FileSource source) = 0; }; +class TTreeFileFormat : public arrow::dataset::FileFormat +{ + size_t& mTotCompressedSize; + size_t& mTotUncompressedSize; + + public: + TTreeFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize) + : FileFormat({}), + mTotCompressedSize(totalCompressedSize), + mTotUncompressedSize(totalUncompressedSize) + { + } + + ~TTreeFileFormat() override = default; + + std::string type_name() const override + { + return "ttree"; + } + + bool Equals(const FileFormat& other) const override + { + return other.type_name() == this->type_name(); + } + + arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override + { + auto fs = std::dynamic_pointer_cast(source.filesystem()); + if (!fs) { + return false; + } + return fs->CheckSupport(source); + } + + arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; + /// \brief Create a FileFragment for a FileSource. + arrow::Result> MakeFragment( + arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) override; + + arrow::Result> MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const override; + + std::shared_ptr DefaultWriteOptions() override; + + arrow::Result ScanBatchesAsync( + const std::shared_ptr& options, + const std::shared_ptr& fragment) const override; +}; + class SingleTreeFileSystem : public TTreeFileSystem { public: @@ -76,6 +120,11 @@ class SingleTreeFileSystem : public TTreeFileSystem return "ttree"; } + std::shared_ptr GetObjectHandler(arrow::dataset::FileSource source) override + { + return std::make_shared((void*)mTree.get(), std::make_shared(mTotCompressedSize, mTotUncompressedSize)); + } + std::unique_ptr& GetTree(arrow::dataset::FileSource) override { // Simply return the only TTree we have @@ -83,6 +132,8 @@ class SingleTreeFileSystem : public TTreeFileSystem } private: + size_t mTotUncompressedSize; + size_t mTotCompressedSize; std::unique_ptr mTree; }; @@ -103,66 +154,28 @@ class TTreeFileFragment : public arrow::dataset::FileFragment std::shared_ptr format, arrow::compute::Expression partition_expression, std::shared_ptr physical_schema) - : FileFragment(std::move(source), std::move(format), std::move(partition_expression), std::move(physical_schema)) - { - } - - std::unique_ptr& GetTree() + : FileFragment(source, format, std::move(partition_expression), physical_schema) { - auto topFs = std::dynamic_pointer_cast(source().filesystem()); - auto treeFs = std::dynamic_pointer_cast(topFs->GetSubFilesystem(source())); - return treeFs->GetTree(source()); - } -}; - -class TTreeFileFormat : public arrow::dataset::FileFormat -{ - size_t& mTotCompressedSize; - size_t& mTotUncompressedSize; - - public: - TTreeFileFormat(size_t& totalCompressedSize, size_t& totalUncompressedSize) - : FileFormat({}), - mTotCompressedSize(totalCompressedSize), - mTotUncompressedSize(totalUncompressedSize) - { - } - - ~TTreeFileFormat() override = default; - - std::string type_name() const override - { - return "ttree"; - } - - bool Equals(const FileFormat& other) const override - { - return other.type_name() == this->type_name(); + auto rootFS = std::dynamic_pointer_cast(this->source().filesystem()); + if (rootFS.get() == nullptr) { + throw runtime_error_f("Unknown filesystem %s when reading %s.", + source.filesystem()->type_name().c_str(), source.path().c_str()); + } + auto objectHandler = rootFS->GetObjectHandler(source); + if (!objectHandler->format->Equals(*format)) { + throw runtime_error_f("Cannot read source %s with format %s to pupulate a TTreeFileFragment.", + source.path().c_str(), objectHandler->format->type_name().c_str()); + }; + mTree = objectHandler->GetObjectAsOwner(); } - arrow::Result IsSupported(const arrow::dataset::FileSource& source) const override + TTree* GetTree() { - auto fs = std::dynamic_pointer_cast(source.filesystem()); - auto subFs = fs->GetSubFilesystem(source); - if (std::dynamic_pointer_cast(subFs)) { - return true; - } - return false; + return mTree.get(); } - arrow::Result> Inspect(const arrow::dataset::FileSource& source) const override; - /// \brief Create a FileFragment for a FileSource. - arrow::Result> MakeFragment( - arrow::dataset::FileSource source, arrow::compute::Expression partition_expression, - std::shared_ptr physical_schema) override; - - arrow::Result> MakeWriter(std::shared_ptr destination, std::shared_ptr schema, std::shared_ptr options, arrow::fs::FileLocator destination_locator) const override; - - std::shared_ptr DefaultWriteOptions() override; - - arrow::Result ScanBatchesAsync( - const std::shared_ptr& options, - const std::shared_ptr& fragment) const override; + private: + std::unique_ptr mTree; }; // An arrow outputstream which allows to write to a TTree. Eventually @@ -250,9 +263,6 @@ struct TTreeObjectReadingImplementation : public RootArrowFactoryPlugin { return new RootArrowFactory{ .options = [context]() { return context->format->DefaultWriteOptions(); }, .format = [context]() { return context->format; }, - .getSubFilesystem = [](void* handle) { - auto tree = (TTree*)handle; - return std::shared_ptr(new SingleTreeFileSystem(tree)); }, }; } }; @@ -269,16 +279,16 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( { // This is the schema we want to read auto dataset_schema = options->dataset_schema; + auto treeFragment = std::dynamic_pointer_cast(fragment); + if (treeFragment.get() == nullptr) { + return {arrow::Status::NotImplemented("Not a ttree fragment")}; + } - auto generator = [pool = options->pool, fragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, + auto generator = [pool = options->pool, treeFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { std::vector> columns; std::vector> fields = dataset_schema->fields(); - auto physical_schema = *fragment->ReadPhysicalSchema(); - - auto fs = std::dynamic_pointer_cast(fragment->source().filesystem()); - // Actually get the TTree from the ROOT file. - auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(fragment->source())); + auto physical_schema = *treeFragment->ReadPhysicalSchema(); if (dataset_schema->num_fields() > physical_schema->num_fields()) { throw runtime_error_f("One TTree must have all the fields requested in a table"); @@ -301,7 +311,7 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( } } - auto& tree = treeFs->GetTree(fragment->source()); + auto* tree = treeFragment->GetTree(); tree->SetCacheSize(25000000); auto branches = tree->GetListOfBranches(); for (auto& mapping : mappings) { @@ -586,12 +596,19 @@ struct RootTransientIndexType : arrow::ExtensionType { arrow::Result> TTreeFileFormat::Inspect(const arrow::dataset::FileSource& source) const { auto fs = std::dynamic_pointer_cast(source.filesystem()); - // Actually get the TTree from the ROOT file. - auto treeFs = std::dynamic_pointer_cast(fs->GetSubFilesystem(source)); - if (!treeFs.get()) { + + if (!fs.get()) { + throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); + } + auto objectHandler = fs->GetObjectHandler(source); + + if (!objectHandler->format->Equals(*this)) { throw runtime_error_f("Unknown filesystem %s\n", source.filesystem()->type_name().c_str()); } - auto& tree = treeFs->GetTree(source); + + // Notice that we abuse of the API here and do not release the TTree, + // so that it's still managed by ROOT. + auto tree = objectHandler->GetObjectAsOwner().release(); auto branches = tree->GetListOfBranches(); auto n = branches->GetEntries(); @@ -636,10 +653,9 @@ arrow::Result> TTreeFileFormat::Ma std::shared_ptr physical_schema) { - auto fragment = std::make_shared(std::move(source), std::dynamic_pointer_cast(shared_from_this()), - std::move(partition_expression), - std::move(physical_schema)); - return std::dynamic_pointer_cast(fragment); + return std::make_shared(source, std::dynamic_pointer_cast(shared_from_this()), + std::move(partition_expression), + physical_schema); } class TTreeFileWriter : public arrow::dataset::FileWriter diff --git a/Framework/Core/include/Framework/RootArrowFilesystem.h b/Framework/Core/include/Framework/RootArrowFilesystem.h index feab713b445fe..441b43aeca331 100644 --- a/Framework/Core/include/Framework/RootArrowFilesystem.h +++ b/Framework/Core/include/Framework/RootArrowFilesystem.h @@ -12,11 +12,13 @@ #define O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_ #include +#include #include #include #include #include #include +#include class TFile; class TBufferFile; @@ -25,6 +27,27 @@ class TDirectoryFile; namespace o2::framework { +struct RootObjectHandler { + RootObjectHandler(void* p, std::shared_ptr f) + : payload(p), format(std::move(f)) + { + } + + ~RootObjectHandler() noexcept(false); + + template + std::unique_ptr GetObjectAsOwner() + { + auto* p = payload; + payload = nullptr; + return std::unique_ptr((T*)p); + } + std::shared_ptr format; + + private: + void* payload = nullptr; +}; + // This is to avoid having to implement a bunch of unimplemented methods // for all the possible virtual filesystem we can invent on top of ROOT // data structures. @@ -40,7 +63,8 @@ class VirtualRootFileSystemBase : public arrow::fs::FileSystem return this->type_name() == other.type_name(); } - virtual std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) = 0; + virtual std::shared_ptr GetObjectHandler(arrow::dataset::FileSource source) = 0; + virtual bool CheckSupport(arrow::dataset::FileSource source) = 0; arrow::Status CreateDir(const std::string& path, bool recursive) override; @@ -72,7 +96,6 @@ class VirtualRootFileSystemBase : public arrow::fs::FileSystem struct RootArrowFactory final { std::function()> options = nullptr; std::function()> format = nullptr; - std::function(void*)> getSubFilesystem = nullptr; }; struct RootArrowFactoryPlugin { @@ -92,9 +115,10 @@ struct RootObjectReadingCapability { // Use a void * in order not to expose the kind of object to the // generic reading code. This is also where we load the plugin // which will be used for the actual creation. - std::function getHandle; - // Same as the above, but uses a TBufferFile as storage - std::function getBufferHandle; + std::function fs, std::string const& path)> getHandle; + // Wether or not this actually supports reading an object of the following class + std::function checkSupport; + // This must be implemented to load the actual RootArrowFactory plugin which // implements this capability. This way the detection of the file format // (via get handle) does not need to know about the actual code which performs @@ -125,7 +149,9 @@ class TFileFileSystem : public VirtualRootFileSystemBase return "TDirectoryFile"; } - std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override; + std::shared_ptr GetObjectHandler(arrow::dataset::FileSource source) override; + bool CheckSupport(arrow::dataset::FileSource source) override; + virtual std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source); arrow::Result> OpenOutputStream( const std::string& path, @@ -153,7 +179,12 @@ class TBufferFileFS : public VirtualRootFileSystemBase return "tbufferfile"; } - std::shared_ptr GetSubFilesystem(arrow::dataset::FileSource source) override; + bool CheckSupport(arrow::dataset::FileSource source) override; + std::shared_ptr GetObjectHandler(arrow::dataset::FileSource source) override; + TBufferFile* GetBuffer() + { + return mBuffer; + } private: TBufferFile* mBuffer; diff --git a/Framework/Core/src/Plugin.cxx b/Framework/Core/src/Plugin.cxx index 568908426c143..13b67e2a781ba 100644 --- a/Framework/Core/src/Plugin.cxx +++ b/Framework/Core/src/Plugin.cxx @@ -17,10 +17,14 @@ #include "Framework/Signpost.h" #include "Framework/VariantJSONHelpers.h" #include "Framework/PluginManager.h" +#include #include #include #include +#include +#include #include +#include #include O2_DECLARE_DYNAMIC_LOG(capabilities); @@ -177,14 +181,24 @@ struct ImplementationContext { std::vector implementations; }; -std::function getHandleByClass(char const* classname) +std::function, std::string const&)> getHandleByClass(char const* classname) { - return [c = TClass::GetClass(classname)](TDirectoryFile* file, std::string const& path) { return file->GetObjectChecked(path.c_str(), c); }; + return [c = TClass::GetClass(classname)](std::shared_ptr fs, std::string const& path) -> void* { + if (auto tfileFS = std::dynamic_pointer_cast(fs)) { + return tfileFS->GetFile()->GetObjectChecked(path.c_str(), c); + } else if (auto tbufferFS = std::dynamic_pointer_cast(fs)) { + tbufferFS->GetBuffer()->Reset(); + return tbufferFS->GetBuffer()->ReadObjectAny(c); + } + return nullptr; + }; } -std::function getBufferHandleByClass(char const* classname) +std::function matchClassByName(std::string_view classname) { - return [c = TClass::GetClass(classname)](TBufferFile* buffer, std::string const& path) { buffer->Reset(); return buffer->ReadObjectAny(c); }; + return [c = classname](char const* attempt) -> bool { + return c == attempt; + }; } void lazyLoadFactory(std::vector& implementations, char const* specs) @@ -218,7 +232,7 @@ struct RNTupleObjectReadingCapability : o2::framework::RootObjectReadingCapabili return "/" + s; } }, .getHandle = getHandleByClass("ROOT::Experimental::RNTuple"), - .getBufferHandle = getBufferHandleByClass("ROOT::Experimental::RNTuple"), + .checkSupport = matchClassByName("ROOT::Experimental::RNTuple"), .factory = [context]() -> RootArrowFactory& { lazyLoadFactory(context->implementations, "O2FrameworkAnalysisRNTupleSupport:RNTupleObjectReadingImplementation"); return context->implementations.back(); @@ -235,7 +249,7 @@ struct TTreeObjectReadingCapability : o2::framework::RootObjectReadingCapability .name = "ttree", .lfn2objectPath = [](std::string s) { return s; }, .getHandle = getHandleByClass("TTree"), - .getBufferHandle = getBufferHandleByClass("TTree"), + .checkSupport = matchClassByName("TTree"), .factory = [context]() -> RootArrowFactory& { lazyLoadFactory(context->implementations, "O2FrameworkAnalysisTTreeSupport:TTreeObjectReadingImplementation"); return context->implementations.back(); diff --git a/Framework/Core/src/RootArrowFilesystem.cxx b/Framework/Core/src/RootArrowFilesystem.cxx index 4a1286515508c..c563866e802bb 100644 --- a/Framework/Core/src/RootArrowFilesystem.cxx +++ b/Framework/Core/src/RootArrowFilesystem.cxx @@ -25,6 +25,7 @@ #include #include #include +#include template class std::shared_ptr; @@ -41,22 +42,40 @@ TFileFileSystem::TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObject ((TFile*)mFile)->SetReadaheadSize(50 * 1024 * 1024); } -std::shared_ptr TFileFileSystem::GetSubFilesystem(arrow::dataset::FileSource source) +std::shared_ptr TFileFileSystem::GetObjectHandler(arrow::dataset::FileSource source) { // We use a plugin to create the actual objects inside the // file, so that we can support TTree and RNTuple at the same time // without having to depend on both. for (auto& capability : mObjectFactory.capabilities) { auto objectPath = capability.lfn2objectPath(source.path()); - void* handle = capability.getHandle(mFile, objectPath); + void* handle = capability.getHandle(shared_from_this(), objectPath); if (!handle) { continue; } + return std::make_shared(handle, capability.factory().format()); + } + throw runtime_error_f("Unable to get handler for object %s", source.path().c_str()); +} + +bool TFileFileSystem::CheckSupport(arrow::dataset::FileSource source) +{ + // We use a plugin to create the actual objects inside the + // file, so that we can support TTree and RNTuple at the same time + // without having to depend on both. + for (auto& capability : mObjectFactory.capabilities) { + auto objectPath = capability.lfn2objectPath(source.path()); + + void* handle = capability.getHandle(shared_from_this(), objectPath); if (handle) { - return capability.factory().getSubFilesystem(handle); + return true; } } + return false; +} +std::shared_ptr TFileFileSystem::GetSubFilesystem(arrow::dataset::FileSource source) +{ auto directory = (TDirectoryFile*)mFile->GetObjectChecked(source.path().c_str(), TClass::GetClass()); if (directory) { return std::shared_ptr(new TFileFileSystem(directory, 50 * 1024 * 1024, mObjectFactory)); @@ -233,19 +252,53 @@ arrow::Result TBufferFileFS::GetFileInfo(const std::string& return result; } -std::shared_ptr TBufferFileFS::GetSubFilesystem(arrow::dataset::FileSource source) +bool TBufferFileFS::CheckSupport(arrow::dataset::FileSource source) { // We use a plugin to create the actual objects inside the // file, so that we can support TTree and RNTuple at the same time // without having to depend on both. for (auto& capability : mObjectFactory.capabilities) { + auto objectPath = capability.lfn2objectPath(source.path()); - void* handle = capability.getBufferHandle(mBuffer, source.path()); - if (handle) { - mFilesystem = capability.factory().getSubFilesystem(handle); - break; + mBuffer->SetBufferOffset(0); + mBuffer->InitMap(); + TClass* serializedClass = mBuffer->ReadClass(); + mBuffer->SetBufferOffset(0); + mBuffer->ResetMap(); + mBuffer->Reset(); + if (!serializedClass) { + continue; + } + + bool supports = capability.checkSupport(serializedClass->GetName()); + if (supports) { + return true; + } + } + return false; +} + +std::shared_ptr TBufferFileFS::GetObjectHandler(arrow::dataset::FileSource source) +{ + // We use a plugin to create the actual objects inside the + // file, so that we can support TTree and RNTuple at the same time + // without having to depend on both. + for (auto& capability : mObjectFactory.capabilities) { + auto objectPath = capability.lfn2objectPath(source.path()); + void* handle = capability.getHandle(shared_from_this(), objectPath); + if (!handle) { + continue; } + return std::make_shared(handle, capability.factory().format()); } - return mFilesystem; + throw runtime_error_f("Unable to get handler for object %s", source.path().c_str()); } + +RootObjectHandler::~RootObjectHandler() noexcept(false) +{ + if (payload) { + throw runtime_error_f("Payload not owned"); + } +} + } // namespace o2::framework diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index ebc854d1d6dc0..438f388ec86b5 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -565,12 +565,23 @@ TEST_CASE("RootTree2Dataset") { REQUIRE(success.ok()); // Let's read it back... + auto tfileFs = std::dynamic_pointer_cast(outFs); + REQUIRE(tfileFs.get()); + REQUIRE(tfileFs->GetFile()); + REQUIRE(tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree"))); arrow::dataset::FileSource source2("/DF_3", outFs); - auto newTreeFS = outFs->GetSubFilesystem(source2); - REQUIRE(format->IsSupported(source) == true); - - auto schemaOptWritten = format->Inspect(source); + REQUIRE(format->IsSupported(source2) == true); + tfileFs = std::dynamic_pointer_cast(source2.filesystem()); + REQUIRE(tfileFs.get()); + REQUIRE(tfileFs->GetFile()); + REQUIRE(tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree"))); + + auto schemaOptWritten = format->Inspect(source2); + tfileFs = std::dynamic_pointer_cast(source2.filesystem()); + REQUIRE(tfileFs.get()); + REQUIRE(tfileFs->GetFile()); + REQUIRE(tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree"))); REQUIRE(schemaOptWritten.ok()); auto schemaWritten = *schemaOptWritten; @@ -585,7 +596,7 @@ TEST_CASE("RootTree2Dataset") std::shared_ptr schema = std::make_shared(fields); REQUIRE(validateSchema(schema)); - auto fragmentWritten = format->MakeFragment(source, {}, *physicalSchema); + auto fragmentWritten = format->MakeFragment(source2, {}, *physicalSchema); REQUIRE(fragmentWritten.ok()); auto optionsWritten = std::make_shared(); options->dataset_schema = schema; @@ -610,7 +621,6 @@ TEST_CASE("RootTree2Dataset") // And now we can read back the RNTuple into a RecordBatch arrow::dataset::FileSource writtenRntupleSource("/rntuple", outFs); - auto newRNTupleFS = outFs->GetSubFilesystem(writtenRntupleSource); REQUIRE(rNtupleFormat->IsSupported(writtenRntupleSource) == true); From 94abc6d3f36e23d2c3fb3461689dedd0c66a5913 Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 2 Feb 2025 21:03:44 +0100 Subject: [PATCH 0045/1914] Enhance DCAFitterN::print, allow resetting log throttlers --- Common/DCAFitter/include/DCAFitter/DCAFitterN.h | 17 +++++++++++++++-- Common/DCAFitter/test/testDCAFitterN.cxx | 6 +++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 6bd143eae44d6..97ea6d206247b 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -324,6 +324,13 @@ class DCAFitterN pnt[2] = tr.getZ(); } + GPUdi() void clearLogThrottlers() + { + mLoggerBadCov.clear(); + mLoggerBadInv.clear(); + mLoggerBadProp.clear(); + } + void setBadCovPolicy(BadCovPolicy v) { mBadCovPolicy = v; } BadCovPolicy getBadCovPolicy() const { return mBadCovPolicy; } @@ -1084,10 +1091,16 @@ template GPUd() void DCAFitterN::print() const { #ifndef GPUCA_GPUCODE_DEVICE - LOG(info) << N << "-prong vertex fitter in " << (mUseAbsDCA ? "abs." : "weighted") << " distance minimization mode"; - LOG(info) << "Bz: " << mBz << " MaxIter: " << mMaxIter << " MaxChi2: " << mMaxChi2; + LOG(info) << N << "-prong vertex fitter in " << (mUseAbsDCA ? "abs." : "weighted") << " distance minimization mode, collinear tracks mode: " << (mIsCollinear ? "ON" : "OFF"); + LOG(info) << "Bz: " << mBz << " MaxIter: " << mMaxIter << " MaxChi2: " << mMaxChi2 << " MatCorrType: " << int(mMatCorr); LOG(info) << "Stopping condition: Max.param change < " << mMinParamChange << " Rel.Chi2 change > " << mMinRelChi2Change; LOG(info) << "Discard candidates for : Rvtx > " << getMaxR() << " DZ between tracks > " << mMaxDZIni; + LOG(info) << "PropagateToPCA:" << mPropagateToPCA << " WeightedFinalPCA:" << mWeightedFinalPCA << " UsePropagator:" << mUsePropagator << " RefitWithMatCorr:" << mRefitWithMatCorr; + std::string rep{}; + for (int i = 0; i < mCrossings.nDCA; i++) { + rep += fmt::format("seed{}:{}/{} ", i, mTrPropDone[i], mPropFailed[i]); + } + LOG(info) << "Last call: NCand:" << mCurHyp << " from " << mCrossings.nDCA << " seeds, prop.done/failed: " << rep; #else if (mUseAbsDCA) { printf("%d-prong vertex fitter in abs. distance minimization mode\n", N); diff --git a/Common/DCAFitter/test/testDCAFitterN.cxx b/Common/DCAFitter/test/testDCAFitterN.cxx index 2f9c4d455376e..a102a0a4253e3 100644 --- a/Common/DCAFitter/test/testDCAFitterN.cxx +++ b/Common/DCAFitter/test/testDCAFitterN.cxx @@ -238,6 +238,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDA < 0.1); BOOST_CHECK(meanDAW < 0.1); BOOST_CHECK(meanDW < 0.1); + ft.print(); } // 2 prongs vertices with collinear tracks (gamma conversion) @@ -316,6 +317,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDA < 2.1); BOOST_CHECK(meanDAW < 2.1); BOOST_CHECK(meanDW < 2.1); + ft.print(); } // 2 prongs vertices with one of charges set to 0: Helix : Line @@ -394,6 +396,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDA < 0.1); BOOST_CHECK(meanDAW < 0.1); BOOST_CHECK(meanDW < 0.1); + ft.print(); } // 2 prongs vertices with both of charges set to 0: Line : Line @@ -471,6 +474,7 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDA < 0.1); BOOST_CHECK(meanDAW < 0.1); BOOST_CHECK(meanDW < 0.1); + ft.print(); } // 3 prongs vertices @@ -547,8 +551,8 @@ BOOST_AUTO_TEST_CASE(DCAFitterNProngs) BOOST_CHECK(meanDA < 0.1); BOOST_CHECK(meanDAW < 0.1); BOOST_CHECK(meanDW < 0.1); + ft.print(); } - outStream.Close(); } From 2b593a24c48ecadacaf4376cdd1c411e0ebc5ecd Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Mon, 3 Feb 2025 08:31:24 +0100 Subject: [PATCH 0046/1914] [MCH] improved formatting of MCH mapping code The code formatting is uniformized among the different source files. This will also make future commits to the mapping code more readable. --- ...nCathodeSegmentationCreatorForSegType0.cxx | 26 +- ...nCathodeSegmentationCreatorForSegType1.cxx | 644 +++++++++++++++--- ...CathodeSegmentationCreatorForSegType10.cxx | 71 +- ...CathodeSegmentationCreatorForSegType11.cxx | 79 ++- ...CathodeSegmentationCreatorForSegType12.cxx | 86 ++- ...CathodeSegmentationCreatorForSegType13.cxx | 59 +- ...CathodeSegmentationCreatorForSegType14.cxx | 49 +- ...CathodeSegmentationCreatorForSegType15.cxx | 3 +- ...CathodeSegmentationCreatorForSegType16.cxx | 3 +- ...CathodeSegmentationCreatorForSegType17.cxx | 91 ++- ...CathodeSegmentationCreatorForSegType18.cxx | 64 +- ...CathodeSegmentationCreatorForSegType19.cxx | 54 +- ...nCathodeSegmentationCreatorForSegType2.cxx | 68 +- ...CathodeSegmentationCreatorForSegType20.cxx | 24 +- ...nCathodeSegmentationCreatorForSegType3.cxx | 78 ++- ...nCathodeSegmentationCreatorForSegType4.cxx | 68 +- ...nCathodeSegmentationCreatorForSegType5.cxx | 43 +- ...nCathodeSegmentationCreatorForSegType6.cxx | 33 +- ...nCathodeSegmentationCreatorForSegType7.cxx | 69 +- ...nCathodeSegmentationCreatorForSegType8.cxx | 79 ++- ...nCathodeSegmentationCreatorForSegType9.cxx | 69 +- 21 files changed, 1424 insertions(+), 336 deletions(-) diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx index 58e2eaf5477b6..0e4f0bc6c5db1 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx @@ -272,12 +272,12 @@ CathodeSegmentation* createSegType0(bool isBendingPlane) /* 1BG */ {3, 16, {61, 9, 63, 14, 7, 8, 58, 62, 21, 54, 19, 60, 5, 12, 56, 2, 6, 55, 44, 51, 0, 11, 4, 46, 35, 36, 42, 17, 33, 15, 23, 32, 20, 24, 43, 22, 49, 25, 45, 27, 28, 47, 53, 41, 29, 30, 31, 40}}, /* 1BH */ {3, 16, {7, 9, 63, 61, 62, 60, 19, 21, 16, 58, 56, 57, 12, 6, 55, 54, 52, 0, 51, 48, 46, 2, 4, 42, 44, 13, 35, 11, 34, 33, 17, 32, 20, 23, 22, 43, 25, 47, 45, 49, 50, 27, 31, 29, -1, 40, 30, -1}}, /* 1BI */ {2, 16, {7, 9, 61, 62, 19, 21, 58, 56, 12, 6, 54, 52, 51, 48, 2, 4, 44, 13, 11, 34, 17, 32, 23, 22, 25, 47, 49, 50, 31, 29, 40, 30}}, - /* 1BG */ - {1, - 16, - {59, 16, 57, 10, 52, 1, 48, 3, 13, 34, 18, 37, 38, 26, 50, 39}}}, + /* 1BG */ {1, 16, {59, 16, 57, 10, 52, 1, 48, 3, 13, 34, 18, 37, 38, 26, 50, 39}}}, /* PS */ - {{0.63, 0.42}, {0.84, 0.42}, {1.26, 0.42}, {2.52, 0.42}}}; + {{0.63, 0.42}, + {0.84, 0.42}, + {1.26, 0.42}, + {2.52, 0.42}}}; } else { return new CathodeSegmentation{ 0, @@ -515,8 +515,7 @@ CathodeSegmentation* createSegType0(bool isBendingPlane) {1267, 14, 3, -0.3149999976, 85.88999939}}, /* PGT */ {/* 1NA */ {4, 16, {59, 63, 9, 61, 16, 8, 7, 14, 57, 21, 62, 58, 10, 60, 19, 54, 0, 56, 12, 5, 52, 55, 6, 51, 3, 1, 2, 11, 48, 4, 46, 44, 42, 13, 15, 17, 18, 36, 20, 35, 34, 33, 23, 32, 22, 37, 25, 38, 24, 26, 43, 28, 45, 27, 49, 47, 29, 50, 31, 30, 41, 40, 39, 53}}, - /* 1NB */ - {13, 6, {-1, -1, -1, -1, -1, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 15, 4, 51, 42, 48, 3, 10, 56, 60, 8, 62, -1, 18, 34, 20, 22, 33, 44, 13, 52, 55, 16, 21, 63, 26, 38, 40, 41, 53, 45, 37, 36, 46, 0, 57, 59, 9, 29, 50, 30, 39, 31, 47, 24, 35, 11, 2, 12, 19, 7, 49, 27, 28, 25, 43, 23, 32, 17, 5, 54, 14, 58, 61}}, + /* 1NB */ {13, 6, {-1, -1, -1, -1, -1, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 15, 4, 51, 42, 48, 3, 10, 56, 60, 8, 62, -1, 18, 34, 20, 22, 33, 44, 13, 52, 55, 16, 21, 63, 26, 38, 40, 41, 53, 45, 37, 36, 46, 0, 57, 59, 9, 29, 50, 30, 39, 31, 47, 24, 35, 11, 2, 12, 19, 7, 49, 27, 28, 25, 43, 23, 32, 17, 5, 54, 14, 58, 61}}, /* 1NC */ {10, 14, {-1, -1, -1, -1, -1, -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, 63, 7, -1, -1, -1, -1, -1, -1, -1, 8, 62, 60, -1, -1, -1, -1, -1, -1, 21, 61, 59, 19, -1, -1, -1, -1, -1, 16, 56, 14, 58, 12, -1, -1, -1, -1, 57, 55, 6, 54, 1, 0, -1, -1, 10, 5, 3, 52, 2, 51, 4, 48, -1, 11, 46, 13, 44, 36, 18, 17, 42, 15, -1, 20, 23, 22, 24, 37, 38, 33, 34, 35, -1, -1, -1, -1, -1, -1, -1, 26, 25, 32, -1, -1, -1, -1, -1, -1, -1, 45, 27, 43, -1, -1, -1, -1, -1, -1, -1, 29, 47, 28, -1, -1, -1, -1, -1, -1, -1, 49, 30, 50, 31, -1, -1, -1, -1, -1, -1, 53, 41, 40, 39}}, /* 1ND */ {9, 21, {-1, -1, -1, -1, -1, -1, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, 63, 62, -1, -1, -1, -1, -1, -1, -1, 8, 61, -1, -1, -1, -1, -1, -1, -1, 21, 19, -1, -1, -1, -1, -1, -1, 60, 14, 58, -1, -1, -1, -1, -1, -1, 10, 6, 54, -1, -1, -1, -1, -1, 16, 1, 2, 51, -1, -1, -1, -1, -1, 12, 48, 13, -1, -1, -1, -1, -1, 59, 5, 11, 18, -1, -1, -1, -1, -1, 55, 46, 36, 20, -1, -1, -1, -1, 56, 52, 44, 22, 24, -1, -1, -1, -1, 0, 15, 33, 26, 43, -1, -1, -1, 57, 17, 23, 45, 28, 30, -1, -1, -1, 34, 37, 27, 49, 39, 40, -1, -1, 4, 35, 38, 29, -1, -1, -1, -1, 3, 42, 32, 25, 31, -1, -1, -1, -1, -1, -1, -1, -1, 53, -1, -1, -1, -1, -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, -1, -1, -1, 50, -1, -1, -1, -1, -1, -1, -1, -1, 47, -1, -1, -1, -1}}, /* 1NE */ {8, 8, {8, 62, 63, 9, 7, 21, 19, 14, 56, 16, 59, 60, 61, 58, 57, 12, 3, 1, 55, 10, 6, 54, 0, 5, 13, 46, 48, 4, 2, 52, 51, 11, 20, 18, 36, 42, 44, 15, 17, 35, 37, 22, 33, 34, 23, 32, 24, 25, 38, 26, 45, 27, 43, 28, 47, 49, 29, 30, 50, 41, 31, 53, 39, 40}}, @@ -529,14 +528,13 @@ CathodeSegmentation* createSegType0(bool isBendingPlane) /* 1NL */ {5, 18, {-1, -1, 9, 62, 7, -1, -1, 63, 60, 61, -1, -1, 8, 21, 19, -1, -1, 16, 59, 14, -1, -1, 56, 58, 57, -1, -1, 10, 12, 6, -1, -1, 55, 54, 5, -1, -1, 1, 2, 0, -1, -1, 3, 51, 52, -1, 4, 48, 46, 11, -1, 13, 42, 15, 44, -1, 18, 34, 20, 17, -1, 36, 33, 32, 35, -1, 22, 38, 25, 23, -1, 26, 29, 27, 24, 37, 47, 30, 49, 43, 45, 50, 41, 39, 28, -1, -1, 53, 40, 31}}, /* 1NM */ {5, 15, {-1, 8, -1, -1, -1, -1, 60, 63, 9, 7, -1, 56, 16, 62, 61, -1, 10, 14, 19, 21, -1, 6, 57, 58, 59, -1, 1, 55, 54, 12, -1, 3, 52, 0, 5, -1, 4, 48, 51, 2, 46, 13, 15, 44, 11, 42, 18, 34, 36, 17, 37, 24, 22, 20, 35, 45, 27, 26, 32, 23, 30, 49, 29, 38, 33, 53, 31, 50, 28, 25, 41, 39, 40, 47, 43}}, /* 1NN */ {5, 14, {8, 62, 63, 9, -1, 60, 21, 61, 7, -1, 58, 16, 59, 19, -1, 14, 57, 56, 12, -1, 10, 54, 6, 0, 5, 55, 1, 52, 2, 51, 3, 4, 48, 11, 46, 13, 15, 42, 44, 17, 36, 18, 20, 35, 23, 34, 33, 22, 32, 24, 37, 38, 26, 25, 43, 27, 45, 47, 28, 49, -1, 29, 30, 31, 53, -1, 50, 41, 40, 39}}, - /* 1NG */ - {16, - 1, - {41, 50, 38, 26, 45, 47, 29, 30, 53, 27, 43, 28, 49, 31, 39, 40}}, - /* 1NH */ - {12, 1, {41, 50, 38, 26, 45, 47, 29, 30, 53, 27, 43, 28}}}, + /* 1NG */ {16, 1, {41, 50, 38, 26, 45, 47, 29, 30, 53, 27, 43, 28, 49, 31, 39, 40}}, + /* 1NH */ {12, 1, {41, 50, 38, 26, 45, 47, 29, 30, 53, 27, 43, 28}}}, /* PS */ - {{0.63, 0.42}, {0.63, 0.84}, {0.63, 1.68}, {0.63, 3.36}}}; + {{0.63, 0.42}, + {0.63, 0.84}, + {0.63, 1.68}, + {0.63, 3.36}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType0 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx index 30d597e0db552..48e7cf98469fa 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx @@ -9,7 +9,8 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. // -// This file has been generated. Do not modify it by hand or your changes might be lost. +// This file has been generated. Do not modify it by hand or your changes might +// be lost. // #include "CathodeSegmentationCreator.h" @@ -24,107 +25,558 @@ namespace impl4 CathodeSegmentation* createSegType1(bool isBendingPlane) { if (isBendingPlane) { - return new CathodeSegmentation{1, true, - /* PG */ - {{1, 8, 2, 107, -0.75}, {2, 6, 2, 95, -0.75}, {3, 4, 1, 89, -0.75}, {4, 4, 1, 83, -0.75}, {5, 4, 1, 77, -0.75}, {6, 4, 1, 71, -0.75}, {7, 4, 1, 65, -0.75}, {8, 4, 1, 59, -0.75}, {9, 4, 1, 53, -0.75}, {10, 0, 0, 50, -0.75}, {11, 0, 0, 47, -0.75}, {12, 0, 0, 44, -0.75}, {13, 0, 0, 41, -0.75}, {14, 0, 0, 38, -0.75}, {15, 0, 0, 35, -0.75}, {16, 0, 0, 32, -0.75}, {17, 0, 0, 29, -0.75}, {18, 0, 0, 26, -0.75}, {19, 36, 0, 22.25, -0.75}, {27, 9, 2, 107, 7.25}, {28, 7, 2, 95, 7.25}, {29, 5, 1, 89, 7.25}, {30, 5, 1, 83, 7.25}, {31, 5, 1, 77, 7.25}, {32, 5, 1, 71, 7.25}, {33, 5, 1, 65, 7.25}, {34, 5, 1, 59, 7.25}, {35, 5, 1, 53, 7.25}, {36, 1, 0, 50, 7.25}, {37, 1, 0, 47, 7.25}, {38, 1, 0, 44, 7.25}, {39, 1, 0, 41, 7.25}, {40, 1, 0, 38, 7.25}, {41, 1, 0, 35, 7.25}, {42, 1, 0, 32, 7.25}, {43, 1, 0, 29, 7.25}, {44, 1, 0, 26, 7.25}, {45, 1, 0, 23, 7.25}, {46, 34, 0, 17.75, 7.25}, {53, 9, 2, 107, 15.25}, {54, 7, 2, 95, 15.25}, {55, 5, 1, 89, 15.25}, {56, 5, 1, 83, 15.25}, {57, 5, 1, 77, 15.25}, {58, 5, 1, 71, 15.25}, {59, 5, 1, 65, 15.25}, {60, 5, 1, 59, 15.25}, {61, 5, 1, 53, 15.25}, {62, 1, 0, 50, 15.25}, {63, 1, 0, 47, 15.25}, {64, 1, 0, 44, 15.25}, {65, 1, 0, 41, 15.25}, {66, 1, 0, 38, 15.25}, {67, 1, 0, 35, 15.25}, {68, 1, 0, 32, 15.25}, {69, 1, 0, 29, 15.25}, {70, 1, 0, 26, 15.25}, {71, 1, 0, 23, 15.25}, {72, 1, 0, 20, 15.25}, {73, 1, 0, 17, 15.25}, {74, 35, 0, 13.25, 15.75}, {75, 37, 0, 1.25, 19.25}, {79, 25, 2, 107, 23.25}, {80, 7, 2, 95, 23.25}, {81, 3, 2, 83, 23.25}, {82, 5, 1, 77, 23.25}, {83, 5, 1, 71, 23.25}, {84, 5, 1, 65, 23.25}, {85, 5, 1, 59, 23.25}, {86, 5, 1, 53, 23.25}, {87, 5, 1, 47, 23.25}, {88, 1, 0, 44, 23.25}, {89, 1, 0, 41, 23.25}, {90, 1, 0, 38, 23.25}, {91, 1, 0, 35, 23.25}, {92, 1, 0, 32, 23.25}, {93, 1, 0, 29, 23.25}, {94, 1, 0, 26, 23.25}, {95, 1, 0, 23, 23.25}, {96, 1, 0, 20, 23.25}, {97, 1, 0, 17, 23.25}, {98, 38, 0, 14, 23.75}, {99, 39, 0, 11, 23.25}, {100, 40, 0, 8, 23.25}, {101, 41, 0, 5, 23.25}, {102, 42, 0, 2, 23.25}, {103, 43, 0, -1, 23.25}, {105, 7, 2, 95, 31.25}, {106, 3, 2, 83, 31.25}, {107, 5, 1, 77, 31.25}, {108, 5, 1, 71, 31.25}, {109, 5, 1, 65, 31.25}, {110, 5, 1, 59, 31.25}, {111, 5, 1, 53, 31.25}, {112, 5, 1, 47, 31.25}, {113, 5, 1, 41, 31.25}, {114, 1, 0, 38, 31.25}, {115, 1, 0, 35, 31.25}, {116, 1, 0, 32, 31.25}, {117, 1, 0, 29, 31.25}, {118, 1, 0, 26, 31.25}, {119, 1, 0, 23, 31.25}, {120, 1, 0, 20, 31.25}, {121, 1, 0, 17, 31.25}, {122, 1, 0, 14, 31.25}, {123, 1, 0, 11, 31.25}, {124, 1, 0, 8, 31.25}, {125, 1, 0, 5, 31.25}, {126, 1, 0, 2, 31.25}, {127, 1, 0, -1, 31.25}, {131, 7, 2, 95, 39.25}, {132, 3, 2, 83, 39.25}, {133, 5, 1, 77, 39.25}, {134, 5, 1, 71, 39.25}, {135, 5, 1, 65, 39.25}, {136, 5, 1, 59, 39.25}, {137, 5, 1, 53, 39.25}, {138, 5, 1, 47, 39.25}, {139, 5, 1, 41, 39.25}, {140, 5, 1, 35, 39.25}, {141, 1, 0, 32, 39.25}, {142, 1, 0, 29, 39.25}, {143, 1, 0, 26, 39.25}, {144, 1, 0, 23, 39.25}, {145, 1, 0, 20, 39.25}, {146, 1, 0, 17, 39.25}, {147, 1, 0, 14, 39.25}, {148, 1, 0, 11, 39.25}, {149, 1, 0, 8, 39.25}, {150, 1, 0, 5, 39.25}, {151, 1, 0, 2, 39.25}, {152, 1, 0, -1, 39.25}, {157, 26, 2, 95, 47.25}, {158, 3, 2, 83, 47.25}, {159, 5, 1, 77, 47.25}, {160, 5, 1, 71, 47.25}, {161, 5, 1, 65, 47.25}, {162, 5, 1, 59, 47.25}, {163, 5, 1, 53, 47.25}, {164, 5, 1, 47, 47.25}, {165, 5, 1, 41, 47.25}, {166, 5, 1, 35, 47.25}, {167, 5, 1, 29, 47.25}, {168, 5, 1, 23, 47.25}, {169, 1, 0, 20, 47.25}, {170, 1, 0, 17, 47.25}, {171, 1, 0, 14, 47.25}, {172, 1, 0, 11, 47.25}, {173, 1, 0, 8, 47.25}, {174, 1, 0, 5, 47.25}, {175, 1, 0, 2, 47.25}, {176, 1, 0, -1, 47.25}, {183, 27, 2, 95, 55.25}, {184, 10, 2, 83, 55.25}, {185, 3, 2, 71, 55.25}, {186, 5, 1, 65, 55.25}, {187, 5, 1, 59, 55.25}, {188, 5, 1, 53, 55.25}, {189, 5, 1, 47, 55.25}, {190, 5, 1, 41, 55.25}, {191, 5, 1, 35, 55.25}, {192, 5, 1, 29, 55.25}, {193, 5, 1, 23, 55.25}, {194, 5, 1, 17, 55.25}, {195, 5, 1, 11, 55.25}, {196, 5, 1, 5, 55.25}, {197, 12, 1, -1, 55.25}, {201, 10, 2, 83, 63.25}, {202, 3, 2, 71, 63.25}, {203, 3, 2, 59, 63.25}, {204, 5, 1, 53, 63.25}, {205, 5, 1, 47, 63.25}, {206, 5, 1, 41, 63.25}, {207, 5, 1, 35, 63.25}, {208, 5, 1, 29, 63.25}, {209, 5, 1, 23, 63.25}, {210, 5, 1, 17, 63.25}, {211, 5, 1, 11, 63.25}, {212, 5, 1, 5, 63.25}, {213, 12, 1, -1, 63.25}, {214, 28, 2, 83, 71.25}, {215, 3, 2, 71, 71.25}, {216, 3, 2, 59, 71.25}, {217, 3, 2, 47, 71.25}, {218, 5, 1, 41, 71.25}, {219, 5, 1, 35, 71.25}, {220, 5, 1, 29, 71.25}, {221, 5, 1, 23, 71.25}, {222, 5, 1, 17, 71.25}, {223, 5, 1, 11, 71.25}, {224, 5, 1, 5, 71.25}, {225, 12, 1, -1, 71.25}, {226, 30, 2, 71, 79.25}, {227, 24, 2, 65, 79.25}, {228, 23, 2, 59, 79.25}, {229, 33, 2, 50, 95.25}, {230, 22, 2, 53, 79.25}, {231, 21, 2, 47, 79.25}, {232, 32, 2, 41, 95.25}, {233, 20, 2, 41, 79.25}, {234, 11, 2, 35, 79.25}, {235, 11, 2, 29, 79.25}, {236, 2, 1, 26, 79.25}, {237, 2, 1, 23, 79.25}, {238, 2, 1, 20, 79.25}, {239, 2, 1, 17, 79.25}, {240, 2, 1, 14, 79.25}, {241, 2, 1, 11, 79.25}, {242, 2, 1, 8, 79.25}, {243, 2, 1, 5, 79.25}, {244, 2, 1, 2, 79.25}, {245, 2, 1, -1, 79.25}, {246, 31, 2, 35, 95.25}, {247, 18, 2, 29, 95.25}, {248, 17, 2, 23, 95.25}, {249, 16, 2, 17, 95.25}, {250, 29, 2, 11, 111.25}, {251, 15, 2, 11, 95.25}, {252, 14, 2, 5, 95.25}, {253, 19, 2, -1, 111.25}, {254, 13, 2, -1, 95.25}}, - /* PGT */ - {/* 2BA */ {4, 16, {50, 29, 39, 27, 49, 40, 28, 26, 45, 53, 30, 47, 20, 25, 31, 22, 32, 43, 41, 38, 34, 18, 24, 37, 35, 33, 23, 15, 42, 13, 17, 11, 44, 3, 36, 4, 0, 1, 2, 46, 52, 6, 48, 5, 12, 10, 55, 51, 16, 56, 57, 54, 19, 60, 59, 58, 21, 7, 62, 8, 61, 9, 63, 14}}, - /* 2BB */ {4, 16, {53, 40, 30, 28, 25, 29, 39, 27, 43, 50, 31, 24, 38, 49, 41, 22, 32, 45, 26, 47, 17, 20, 23, 18, 13, 33, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 1, 5, 46, 48, 52, 55, 51, 0, 12, 58, 14, 6, 16, 8, 57, 10, 21, 63, 54, 56, 61, 7, 60, 19, 9, 62, 59}}, - /* 2BC */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BD */ {4, 16, {29, 40, 39, 31, 50, 53, 30, 41, 25, 49, 28, 26, 43, 45, 27, 24, 20, 38, 47, 22, 33, 32, 23, 18, 13, 17, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 48, 46, 5, 1, 0, 55, 51, 52, 6, 57, 54, 10, 12, 59, 14, 56, 16, 60, 58, 19, 21, 63, 8, 61, 9, 7, 62}}, - /* 2BE */ {4, 16, {40, 29, 28, 49, 53, 30, 50, 47, 25, 39, 27, 22, 45, 31, 24, 18, 20, 41, 43, 17, 33, 26, 38, 15, 13, 23, 32, 11, 35, 37, 34, 2, 3, 36, 42, 46, 44, 4, 48, 0, 1, 5, 51, 6, 52, 55, 54, 12, 10, 58, 57, 14, 56, 8, 59, 16, 21, 63, 62, 19, 61, 9, 7, 60}}, - /* 2BF */ {4, 16, {40, 29, 30, 50, 53, 39, 28, 49, 25, 31, 27, 47, 45, 41, 24, 22, 20, 26, 43, 18, 33, 23, 38, 32, 13, 37, 17, 15, 35, 36, 11, 34, 3, 4, 42, 2, 44, 5, 48, 46, 1, 55, 6, 0, 52, 58, 14, 51, 10, 8, 16, 54, 56, 7, 19, 12, 21, 63, 60, 57, 61, 9, 62, 59}}, - /* 2BG */ {4, 16, {40, 29, 30, 49, 53, 31, 41, 43, 25, 39, 50, 38, 45, 28, 26, 18, 20, 27, 47, 17, 33, 24, 23, 15, 13, 22, 32, 36, 35, 37, 34, 2, 3, 11, 42, 48, 44, 4, 46, 51, 1, 0, 5, 6, 52, 55, 54, 12, 10, 57, 14, 16, 56, 19, 59, 58, 21, 62, 61, 8, 9, 63, 7, 60}}, - /* 2BH */ {4, 16, {40, 29, 28, 39, 53, 31, 27, 41, 25, 30, 24, 26, 45, 50, 43, 47, 20, 49, 38, 23, 33, 22, 32, 18, 13, 37, 15, 17, 35, 34, 36, 11, 3, 42, 2, 4, 44, 0, 48, 46, 1, 55, 51, 5, 52, 57, 54, 6, 12, 19, 14, 10, 56, 62, 58, 16, 21, 7, 60, 59, 9, 63, 61, 8}}, - /* 2BI */ {3, 16, {29, 30, 41, 31, 50, 26, 39, 49, 47, 28, 43, 23, 27, 38, 18, 24, 32, 17, 22, 15, 11, 37, 36, 4, 34, 2, 46, 42, 48, 5, 0, 51, 6, 55, 54, 12, 57, 14, 16, 19, 59, 58, 62, 61, 8, 63, 7, 60}}, - /* 2BJ */ {3, 16, {29, 28, 39, 31, 27, 41, 30, 24, 26, 50, 43, 47, 49, 38, 23, 22, 32, 18, 37, 15, 17, 34, 36, 11, 42, 2, 4, 0, 48, 46, 55, 51, 5, 57, 54, 6, 19, 14, 12, 62, 58, 16, 7, 60, 59, 63, 61, 8}}, - /* 2BK */ {4, 16, {40, 29, 28, 39, 53, 31, 27, 41, 25, 30, 24, 26, 45, 50, 43, 47, 20, 49, 38, 23, 33, 22, 32, 18, 13, 37, 15, 17, 35, 34, 36, 11, 3, 42, 2, 4, 44, 0, 48, 46, 1, 55, 51, 5, 52, 57, 54, 6, 12, 19, 14, 10, 56, 62, 58, 16, 21, 7, 60, 59, 9, 63, 61, 8}}, - /* 2BL */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BM */ {4, 16, {29, 40, 39, 31, 50, 53, 30, 41, 25, 49, 28, 26, 43, 45, 27, 24, 20, 38, 47, 22, 33, 32, 23, 18, 13, 17, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 48, 46, 5, 1, 0, 55, 51, 52, 6, 57, 54, 10, 12, 59, 14, 56, 16, 60, 58, 19, 21, 63, 8, 61, 9, 7, 62}}, - /* 2BN */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, - /* 2BO */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BP */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, - /* 2BQ */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BR */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BS */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, -1, 9, -1}}, - /* 2BT */ {4, 12, {24, 45, 47, 49, 43, 20, 23, 22, 18, 32, 37, 38, 33, 13, 15, 17, 11, 35, 36, 34, 3, 42, 2, 4, 44, 1, 48, 46, 0, 51, 6, 5, 52, 10, 54, 55, 12, 57, 16, 14, 56, 19, 8, 58, 59, 21, 62, 60}}, - /* 2BU */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, - /* 2BV */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BW */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, - /* 2BX */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, - /* 2BY */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, -1, 9, -1}}, - /* 2Ba1 */ {3, 34, {31, 39, 28, 30, 41, 26, 27, 49, 47, 24, 22, 18, 23, 38, 34, 37, 17, 4, 15, 11, 46, 36, 2, 5, 42, 51, -1, 48, 55, -1, 6, 14, -1, 54, 57, -1, 16, 58, -1, 59, 8, -1, 62, 60, -1, 63, 7, -1, 19, 9, -1, 12, 61, -1, 52, 21, -1, 1, 56, -1, 44, -1, -1, 3, -1, -1, 35, -1, -1, 13, -1, -1, 33, -1, -1, 32, -1, -1, 20, -1, -1, 43, -1, -1, 45, -1, -1, 25, -1, -1, 50, -1, -1, 53, -1, -1, 29, -1, -1, 40, -1, -1}}, - /* 2Bb1 */ {4, 16, {40, 39, 28, 41, 53, 31, 27, -1, 25, 30, 24, -1, 45, 50, 22, -1, 20, 49, 38, -1, 33, 23, 17, -1, 13, 37, 15, -1, 35, 34, 36, -1, 3, 4, 2, -1, 44, 5, 46, -1, 1, 55, 51, -1, 52, 57, 54, -1, 12, 59, 14, -1, 56, 62, 58, -1, 21, 7, 8, -1, 9, 63, 60, -1}}, - /* 2Bc1 */ {2, 25, {31, 39, 30, 28, 27, 26, 24, 47, 23, 38, 37, 18, 15, 17, 36, 34, 4, 2, 48, 46, 6, 5, 54, 55, 16, 14, 59, 58, 60, -1, 63, -1, 9, -1, 61, -1, 21, -1, 19, -1, 56, -1, 12, -1, 10, -1, 52, -1, 0, -1}}, - /* 2Bd1 */ {3, 16, {29, 28, 39, 31, 27, 41, 30, 24, 26, 50, 43, 47, 49, 38, 23, 22, 17, -1, 37, 15, -1, 34, 36, -1, 2, 46, -1, 5, 48, -1, 55, 6, -1, 57, -1, -1, 59, -1, -1, 8, -1, -1, 62, -1, -1, 63, -1, -1}}, - /* 2Be1 */ {6, 12, {24, 45, 41, 28, 30, 47, 43, 20, 49, 26, 27, 17, 18, 32, 38, 23, 37, 34, 33, 13, 36, 15, 4, -1, 11, 35, 48, 2, 5, -1, 3, 42, 6, 55, -1, -1, 44, 1, 14, 16, -1, -1, 0, 51, 8, 58, -1, -1, 52, 10, 62, -1, -1, -1, 12, 57, 63, -1, -1, -1, 56, 19, -1, -1, -1, -1, 59, 21, -1, -1, -1, -1}}, - /* 2Bf1 */ {5, 24, {40, 31, 39, 28, 26, 29, 30, 41, 47, -1, 53, 27, 49, 38, -1, 50, 24, 22, 17, -1, 25, 23, 37, 34, -1, 45, 18, 15, 2, -1, 43, 11, 36, 48, -1, 20, 4, 42, -1, -1, 32, 46, 51, -1, -1, 33, 5, 6, -1, -1, 13, 54, 55, -1, -1, 35, 58, 14, -1, -1, 3, 16, 57, -1, -1, 44, 59, -1, -1, -1, 1, 8, -1, -1, -1, 0, 60, -1, -1, -1, 52, 62, -1, -1, -1, 10, 7, -1, -1, -1, 12, 63, -1, -1, -1, 56, -1, -1, -1, -1, 19, -1, -1, -1, -1, 21, -1, -1, -1, -1, 61, -1, -1, -1, -1, 9, -1, -1, -1, -1}}, - /* 2Bg1 */ {3, 28, {40, 31, -1, 29, 39, -1, 53, 30, -1, 27, 28, -1, 50, 41, -1, 25, 49, -1, 24, 26, -1, 45, 47, -1, 43, 22, -1, 20, 23, -1, 18, 38, -1, 32, 37, 17, 33, 15, 34, 13, 36, 4, 11, 42, 2, 35, 46, 48, 3, 51, 5, 44, 6, 55, 1, 54, 14, 0, 57, 16, 52, 58, 59, 10, 8, 60, 12, 62, -1, 56, 7, -1, 19, 63, -1, 21, -1, -1, 61, -1, -1, 9, -1, -1}}, - /* 2Bh1 */ {4, 20, {29, 53, 30, -1, 50, 25, 41, -1, 24, 45, 26, -1, 43, 20, 49, -1, 18, 32, 47, -1, 33, 13, 22, -1, 11, 35, 38, 23, 3, 42, 17, 37, 44, 1, 34, 15, 0, 51, 4, 36, 52, 10, 46, 2, -1, 12, 5, 48, -1, 57, 55, 6, -1, 56, 14, 54, -1, 19, 58, 16, -1, 59, 60, -1, -1, 21, 62, -1, -1, 61, -1, -1, -1, 9, -1, -1, -1, 7, -1, -1}}, - /* 2Bi1 */ {6, 12, {53, 50, 25, 55, 2, 46, 24, 45, 43, 54, 48, 5, 20, 18, 32, 14, 6, -1, 33, 13, 11, 58, -1, -1, 35, 3, 42, 8, -1, -1, 44, 1, 0, 63, -1, -1, -1, 51, 52, 62, -1, -1, -1, 10, 12, -1, -1, -1, -1, 57, 56, -1, -1, -1, -1, 19, 21, -1, -1, -1, -1, 59, -1, -1, -1, -1, -1, 61, -1, -1, -1, -1}}, - /* 2Bm1 */ {7, 16, {-1, -1, -1, -1, -1, 40, 27, -1, -1, -1, -1, -1, 28, 49, -1, -1, -1, -1, -1, 39, 24, -1, -1, -1, -1, 29, 30, 47, -1, -1, -1, -1, 53, 31, 22, -1, -1, -1, -1, 50, 41, 38, -1, -1, -1, -1, 25, 26, 18, -1, -1, -1, 45, 43, 23, 17, -1, -1, -1, 20, 32, 37, 15, -1, -1, -1, 33, 13, 36, 34, -1, -1, -1, 35, 3, 4, 11, -1, -1, 44, 48, 1, 5, 42, -1, -1, 0, 51, 52, 55, 2, -1, 6, 10, 54, 12, 58, 46, 59, 57, 56, 16, 19, 8, 14, 21, 60, 61, 7, 9, 63, 62}}, - /* 2Bn1 */ {5, 16, {-1, -1, -1, -1, 27, -1, -1, -1, 31, 39, -1, -1, -1, 53, 28, -1, -1, 50, 40, 38, -1, 32, 25, 29, 26, -1, 33, 45, 30, 24, 52, 13, 43, 41, 22, 12, 44, 20, 47, 37, 19, 48, 18, 23, 17, -1, 54, 35, 15, 34, -1, 10, 3, 36, 11, -1, 21, 1, 46, 6, -1, 59, 0, 5, 55, -1, -1, 57, 14, 16, -1, 60, 56, 58, 8, -1, 9, 61, 62, 7}}, - /* 2Bp1 */ {5, 16, {-1, -1, -1, 28, 27, -1, -1, 40, 30, 26, -1, 53, 29, 39, 47, -1, 49, 50, 31, 22, -1, 45, 25, 41, 38, -1, 20, 43, 24, 37, -1, 32, 18, 23, 15, -1, 34, 33, 17, 11, -1, 13, 35, 36, 4, -1, 3, 42, 2, 46, -1, 44, 1, 48, 5, -1, 0, 6, 55, 51, -1, 52, 12, 57, 54, 10, 16, 19, 8, 14, 56, 60, 21, 62, 58, 7, 61, 9, 63, 59}}, - /* 2Bt1 */ {18, 8, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, 53, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 32, 22, 27, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 42, 11, 17, 43, 50, 40, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 51, 1, 35, 33, 38, 49, 39, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 0, 3, 34, 18, 24, 28, -1, -1, -1, -1, -1, -1, -1, -1, 10, 12, 61, 56, 52, 44, 13, 20, 25, 29, -1, -1, -1, -1, -1, -1, -1, 8, 63, 7, 62, 58, 55, 46, 15, 23, 26, 30, 31, 16, 19, 21, 9, 60, 59, 57, 14, 5, 54, 48, 2, 4, 36, 37, 47, 41, -1}}, - /* 2Bu1 */ {4, 15, {20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, - /* 2Bv1 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv2 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv3 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv4 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv5 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}}, - /* PS */ - {{0.75, 0.5}, {1.5, 0.5}, {3, 0.5}}}; + return new CathodeSegmentation{ + 1, + true, + /* PG */ + {{1, 8, 2, 107, -0.75}, + {2, 6, 2, 95, -0.75}, + {3, 4, 1, 89, -0.75}, + {4, 4, 1, 83, -0.75}, + {5, 4, 1, 77, -0.75}, + {6, 4, 1, 71, -0.75}, + {7, 4, 1, 65, -0.75}, + {8, 4, 1, 59, -0.75}, + {9, 4, 1, 53, -0.75}, + {10, 0, 0, 50, -0.75}, + {11, 0, 0, 47, -0.75}, + {12, 0, 0, 44, -0.75}, + {13, 0, 0, 41, -0.75}, + {14, 0, 0, 38, -0.75}, + {15, 0, 0, 35, -0.75}, + {16, 0, 0, 32, -0.75}, + {17, 0, 0, 29, -0.75}, + {18, 0, 0, 26, -0.75}, + {19, 36, 0, 22.25, -0.75}, + {27, 9, 2, 107, 7.25}, + {28, 7, 2, 95, 7.25}, + {29, 5, 1, 89, 7.25}, + {30, 5, 1, 83, 7.25}, + {31, 5, 1, 77, 7.25}, + {32, 5, 1, 71, 7.25}, + {33, 5, 1, 65, 7.25}, + {34, 5, 1, 59, 7.25}, + {35, 5, 1, 53, 7.25}, + {36, 1, 0, 50, 7.25}, + {37, 1, 0, 47, 7.25}, + {38, 1, 0, 44, 7.25}, + {39, 1, 0, 41, 7.25}, + {40, 1, 0, 38, 7.25}, + {41, 1, 0, 35, 7.25}, + {42, 1, 0, 32, 7.25}, + {43, 1, 0, 29, 7.25}, + {44, 1, 0, 26, 7.25}, + {45, 1, 0, 23, 7.25}, + {46, 34, 0, 17.75, 7.25}, + {53, 9, 2, 107, 15.25}, + {54, 7, 2, 95, 15.25}, + {55, 5, 1, 89, 15.25}, + {56, 5, 1, 83, 15.25}, + {57, 5, 1, 77, 15.25}, + {58, 5, 1, 71, 15.25}, + {59, 5, 1, 65, 15.25}, + {60, 5, 1, 59, 15.25}, + {61, 5, 1, 53, 15.25}, + {62, 1, 0, 50, 15.25}, + {63, 1, 0, 47, 15.25}, + {64, 1, 0, 44, 15.25}, + {65, 1, 0, 41, 15.25}, + {66, 1, 0, 38, 15.25}, + {67, 1, 0, 35, 15.25}, + {68, 1, 0, 32, 15.25}, + {69, 1, 0, 29, 15.25}, + {70, 1, 0, 26, 15.25}, + {71, 1, 0, 23, 15.25}, + {72, 1, 0, 20, 15.25}, + {73, 1, 0, 17, 15.25}, + {74, 35, 0, 13.25, 15.75}, + {75, 37, 0, 1.25, 19.25}, + {79, 25, 2, 107, 23.25}, + {80, 7, 2, 95, 23.25}, + {81, 3, 2, 83, 23.25}, + {82, 5, 1, 77, 23.25}, + {83, 5, 1, 71, 23.25}, + {84, 5, 1, 65, 23.25}, + {85, 5, 1, 59, 23.25}, + {86, 5, 1, 53, 23.25}, + {87, 5, 1, 47, 23.25}, + {88, 1, 0, 44, 23.25}, + {89, 1, 0, 41, 23.25}, + {90, 1, 0, 38, 23.25}, + {91, 1, 0, 35, 23.25}, + {92, 1, 0, 32, 23.25}, + {93, 1, 0, 29, 23.25}, + {94, 1, 0, 26, 23.25}, + {95, 1, 0, 23, 23.25}, + {96, 1, 0, 20, 23.25}, + {97, 1, 0, 17, 23.25}, + {98, 38, 0, 14, 23.75}, + {99, 39, 0, 11, 23.25}, + {100, 40, 0, 8, 23.25}, + {101, 41, 0, 5, 23.25}, + {102, 42, 0, 2, 23.25}, + {103, 43, 0, -1, 23.25}, + {105, 7, 2, 95, 31.25}, + {106, 3, 2, 83, 31.25}, + {107, 5, 1, 77, 31.25}, + {108, 5, 1, 71, 31.25}, + {109, 5, 1, 65, 31.25}, + {110, 5, 1, 59, 31.25}, + {111, 5, 1, 53, 31.25}, + {112, 5, 1, 47, 31.25}, + {113, 5, 1, 41, 31.25}, + {114, 1, 0, 38, 31.25}, + {115, 1, 0, 35, 31.25}, + {116, 1, 0, 32, 31.25}, + {117, 1, 0, 29, 31.25}, + {118, 1, 0, 26, 31.25}, + {119, 1, 0, 23, 31.25}, + {120, 1, 0, 20, 31.25}, + {121, 1, 0, 17, 31.25}, + {122, 1, 0, 14, 31.25}, + {123, 1, 0, 11, 31.25}, + {124, 1, 0, 8, 31.25}, + {125, 1, 0, 5, 31.25}, + {126, 1, 0, 2, 31.25}, + {127, 1, 0, -1, 31.25}, + {131, 7, 2, 95, 39.25}, + {132, 3, 2, 83, 39.25}, + {133, 5, 1, 77, 39.25}, + {134, 5, 1, 71, 39.25}, + {135, 5, 1, 65, 39.25}, + {136, 5, 1, 59, 39.25}, + {137, 5, 1, 53, 39.25}, + {138, 5, 1, 47, 39.25}, + {139, 5, 1, 41, 39.25}, + {140, 5, 1, 35, 39.25}, + {141, 1, 0, 32, 39.25}, + {142, 1, 0, 29, 39.25}, + {143, 1, 0, 26, 39.25}, + {144, 1, 0, 23, 39.25}, + {145, 1, 0, 20, 39.25}, + {146, 1, 0, 17, 39.25}, + {147, 1, 0, 14, 39.25}, + {148, 1, 0, 11, 39.25}, + {149, 1, 0, 8, 39.25}, + {150, 1, 0, 5, 39.25}, + {151, 1, 0, 2, 39.25}, + {152, 1, 0, -1, 39.25}, + {157, 26, 2, 95, 47.25}, + {158, 3, 2, 83, 47.25}, + {159, 5, 1, 77, 47.25}, + {160, 5, 1, 71, 47.25}, + {161, 5, 1, 65, 47.25}, + {162, 5, 1, 59, 47.25}, + {163, 5, 1, 53, 47.25}, + {164, 5, 1, 47, 47.25}, + {165, 5, 1, 41, 47.25}, + {166, 5, 1, 35, 47.25}, + {167, 5, 1, 29, 47.25}, + {168, 5, 1, 23, 47.25}, + {169, 1, 0, 20, 47.25}, + {170, 1, 0, 17, 47.25}, + {171, 1, 0, 14, 47.25}, + {172, 1, 0, 11, 47.25}, + {173, 1, 0, 8, 47.25}, + {174, 1, 0, 5, 47.25}, + {175, 1, 0, 2, 47.25}, + {176, 1, 0, -1, 47.25}, + {183, 27, 2, 95, 55.25}, + {184, 10, 2, 83, 55.25}, + {185, 3, 2, 71, 55.25}, + {186, 5, 1, 65, 55.25}, + {187, 5, 1, 59, 55.25}, + {188, 5, 1, 53, 55.25}, + {189, 5, 1, 47, 55.25}, + {190, 5, 1, 41, 55.25}, + {191, 5, 1, 35, 55.25}, + {192, 5, 1, 29, 55.25}, + {193, 5, 1, 23, 55.25}, + {194, 5, 1, 17, 55.25}, + {195, 5, 1, 11, 55.25}, + {196, 5, 1, 5, 55.25}, + {197, 12, 1, -1, 55.25}, + {201, 10, 2, 83, 63.25}, + {202, 3, 2, 71, 63.25}, + {203, 3, 2, 59, 63.25}, + {204, 5, 1, 53, 63.25}, + {205, 5, 1, 47, 63.25}, + {206, 5, 1, 41, 63.25}, + {207, 5, 1, 35, 63.25}, + {208, 5, 1, 29, 63.25}, + {209, 5, 1, 23, 63.25}, + {210, 5, 1, 17, 63.25}, + {211, 5, 1, 11, 63.25}, + {212, 5, 1, 5, 63.25}, + {213, 12, 1, -1, 63.25}, + {214, 28, 2, 83, 71.25}, + {215, 3, 2, 71, 71.25}, + {216, 3, 2, 59, 71.25}, + {217, 3, 2, 47, 71.25}, + {218, 5, 1, 41, 71.25}, + {219, 5, 1, 35, 71.25}, + {220, 5, 1, 29, 71.25}, + {221, 5, 1, 23, 71.25}, + {222, 5, 1, 17, 71.25}, + {223, 5, 1, 11, 71.25}, + {224, 5, 1, 5, 71.25}, + {225, 12, 1, -1, 71.25}, + {226, 30, 2, 71, 79.25}, + {227, 24, 2, 65, 79.25}, + {228, 23, 2, 59, 79.25}, + {229, 33, 2, 50, 95.25}, + {230, 22, 2, 53, 79.25}, + {231, 21, 2, 47, 79.25}, + {232, 32, 2, 41, 95.25}, + {233, 20, 2, 41, 79.25}, + {234, 11, 2, 35, 79.25}, + {235, 11, 2, 29, 79.25}, + {236, 2, 1, 26, 79.25}, + {237, 2, 1, 23, 79.25}, + {238, 2, 1, 20, 79.25}, + {239, 2, 1, 17, 79.25}, + {240, 2, 1, 14, 79.25}, + {241, 2, 1, 11, 79.25}, + {242, 2, 1, 8, 79.25}, + {243, 2, 1, 5, 79.25}, + {244, 2, 1, 2, 79.25}, + {245, 2, 1, -1, 79.25}, + {246, 31, 2, 35, 95.25}, + {247, 18, 2, 29, 95.25}, + {248, 17, 2, 23, 95.25}, + {249, 16, 2, 17, 95.25}, + {250, 29, 2, 11, 111.25}, + {251, 15, 2, 11, 95.25}, + {252, 14, 2, 5, 95.25}, + {253, 19, 2, -1, 111.25}, + {254, 13, 2, -1, 95.25}}, + /* PGT */ + {/* 2BA */ {4, 16, {50, 29, 39, 27, 49, 40, 28, 26, 45, 53, 30, 47, 20, 25, 31, 22, 32, 43, 41, 38, 34, 18, 24, 37, 35, 33, 23, 15, 42, 13, 17, 11, 44, 3, 36, 4, 0, 1, 2, 46, 52, 6, 48, 5, 12, 10, 55, 51, 16, 56, 57, 54, 19, 60, 59, 58, 21, 7, 62, 8, 61, 9, 63, 14}}, + /* 2BB */ {4, 16, {53, 40, 30, 28, 25, 29, 39, 27, 43, 50, 31, 24, 38, 49, 41, 22, 32, 45, 26, 47, 17, 20, 23, 18, 13, 33, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 1, 5, 46, 48, 52, 55, 51, 0, 12, 58, 14, 6, 16, 8, 57, 10, 21, 63, 54, 56, 61, 7, 60, 19, 9, 62, 59}}, + /* 2BC */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BD */ {4, 16, {29, 40, 39, 31, 50, 53, 30, 41, 25, 49, 28, 26, 43, 45, 27, 24, 20, 38, 47, 22, 33, 32, 23, 18, 13, 17, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 48, 46, 5, 1, 0, 55, 51, 52, 6, 57, 54, 10, 12, 59, 14, 56, 16, 60, 58, 19, 21, 63, 8, 61, 9, 7, 62}}, + /* 2BE */ {4, 16, {40, 29, 28, 49, 53, 30, 50, 47, 25, 39, 27, 22, 45, 31, 24, 18, 20, 41, 43, 17, 33, 26, 38, 15, 13, 23, 32, 11, 35, 37, 34, 2, 3, 36, 42, 46, 44, 4, 48, 0, 1, 5, 51, 6, 52, 55, 54, 12, 10, 58, 57, 14, 56, 8, 59, 16, 21, 63, 62, 19, 61, 9, 7, 60}}, + /* 2BF */ {4, 16, {40, 29, 30, 50, 53, 39, 28, 49, 25, 31, 27, 47, 45, 41, 24, 22, 20, 26, 43, 18, 33, 23, 38, 32, 13, 37, 17, 15, 35, 36, 11, 34, 3, 4, 42, 2, 44, 5, 48, 46, 1, 55, 6, 0, 52, 58, 14, 51, 10, 8, 16, 54, 56, 7, 19, 12, 21, 63, 60, 57, 61, 9, 62, 59}}, + /* 2BG */ {4, 16, {40, 29, 30, 49, 53, 31, 41, 43, 25, 39, 50, 38, 45, 28, 26, 18, 20, 27, 47, 17, 33, 24, 23, 15, 13, 22, 32, 36, 35, 37, 34, 2, 3, 11, 42, 48, 44, 4, 46, 51, 1, 0, 5, 6, 52, 55, 54, 12, 10, 57, 14, 16, 56, 19, 59, 58, 21, 62, 61, 8, 9, 63, 7, 60}}, + /* 2BH */ {4, 16, {40, 29, 28, 39, 53, 31, 27, 41, 25, 30, 24, 26, 45, 50, 43, 47, 20, 49, 38, 23, 33, 22, 32, 18, 13, 37, 15, 17, 35, 34, 36, 11, 3, 42, 2, 4, 44, 0, 48, 46, 1, 55, 51, 5, 52, 57, 54, 6, 12, 19, 14, 10, 56, 62, 58, 16, 21, 7, 60, 59, 9, 63, 61, 8}}, + /* 2BI */ {3, 16, {29, 30, 41, 31, 50, 26, 39, 49, 47, 28, 43, 23, 27, 38, 18, 24, 32, 17, 22, 15, 11, 37, 36, 4, 34, 2, 46, 42, 48, 5, 0, 51, 6, 55, 54, 12, 57, 14, 16, 19, 59, 58, 62, 61, 8, 63, 7, 60}}, + /* 2BJ */ {3, 16, {29, 28, 39, 31, 27, 41, 30, 24, 26, 50, 43, 47, 49, 38, 23, 22, 32, 18, 37, 15, 17, 34, 36, 11, 42, 2, 4, 0, 48, 46, 55, 51, 5, 57, 54, 6, 19, 14, 12, 62, 58, 16, 7, 60, 59, 63, 61, 8}}, + /* 2BK */ {4, 16, {40, 29, 28, 39, 53, 31, 27, 41, 25, 30, 24, 26, 45, 50, 43, 47, 20, 49, 38, 23, 33, 22, 32, 18, 13, 37, 15, 17, 35, 34, 36, 11, 3, 42, 2, 4, 44, 0, 48, 46, 1, 55, 51, 5, 52, 57, 54, 6, 12, 19, 14, 10, 56, 62, 58, 16, 21, 7, 60, 59, 9, 63, 61, 8}}, + /* 2BL */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BM */ {4, 16, {29, 40, 39, 31, 50, 53, 30, 41, 25, 49, 28, 26, 43, 45, 27, 24, 20, 38, 47, 22, 33, 32, 23, 18, 13, 17, 37, 15, 34, 35, 36, 11, 2, 3, 4, 42, 44, 48, 46, 5, 1, 0, 55, 51, 52, 6, 57, 54, 10, 12, 59, 14, 56, 16, 60, 58, 19, 21, 63, 8, 61, 9, 7, 62}}, + /* 2BN */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, + /* 2BO */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BP */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, + /* 2BQ */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BR */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BS */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, -1, 9, -1}}, + /* 2BT */ {4, 12, {24, 45, 47, 49, 43, 20, 23, 22, 18, 32, 37, 38, 33, 13, 15, 17, 11, 35, 36, 34, 3, 42, 2, 4, 44, 1, 48, 46, 0, 51, 6, 5, 52, 10, 54, 55, 12, 57, 16, 14, 56, 19, 8, 58, 59, 21, 62, 60}}, + /* 2BU */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, + /* 2BV */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BW */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 28, 11, 41, 33, 26, 32, 49, 20, 47, 43, 22, 45, 23, 25, 38, 50, 37, 24, 17, 18, 15, 13, 34, 35, 36, 3, 4, 44, 2, 51, 46, 57, 48, 19, 5, 56, 6, 12, 55, 10, 54, 52, 14, 0, 16, 1, 58, 42, 8, 59, 60, 21, 62, 61, 63, 9, 7}}, + /* 2BX */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 50, 34, 25, 17, 24, 37, 45, 23, 43, 47, 20, 49, 18, 28, 32, 26, 33, 22, 13, 38, 11, 15, 35, 36, 3, 4, 42, 46, 44, 6, 1, 54, 0, 58, 51, 60, 52, 16, 10, 14, 12, 55, 57, 5, 56, 48, 19, 2, 59, 8, 21, 62, 61, 63, 9, 7}}, + /* 2BY */ {2, 32, {40, 39, 29, 31, 53, 30, 27, 41, 11, 34, 33, 17, 32, 37, 20, 23, 43, 47, 45, 49, 25, 28, 50, 26, 24, 22, 18, 38, 13, 15, 35, 36, 3, 4, 44, 46, 51, 6, 57, 54, 19, 58, 56, 60, 12, 16, 10, 14, 52, 55, 0, 5, 1, 48, 42, 2, 59, 8, 21, 62, 61, -1, 9, -1}}, + /* 2Ba1 */ {3, 34, {31, 39, 28, 30, 41, 26, 27, 49, 47, 24, 22, 18, 23, 38, 34, 37, 17, 4, 15, 11, 46, 36, 2, 5, 42, 51, -1, 48, 55, -1, 6, 14, -1, 54, 57, -1, 16, 58, -1, 59, 8, -1, 62, 60, -1, 63, 7, -1, 19, 9, -1, 12, 61, -1, 52, 21, -1, 1, 56, -1, 44, -1, -1, 3, -1, -1, 35, -1, -1, 13, -1, -1, 33, -1, -1, 32, -1, -1, 20, -1, -1, 43, -1, -1, 45, -1, -1, 25, -1, -1, 50, -1, -1, 53, -1, -1, 29, -1, -1, 40, -1, -1}}, + /* 2Bb1 */ {4, 16, {40, 39, 28, 41, 53, 31, 27, -1, 25, 30, 24, -1, 45, 50, 22, -1, 20, 49, 38, -1, 33, 23, 17, -1, 13, 37, 15, -1, 35, 34, 36, -1, 3, 4, 2, -1, 44, 5, 46, -1, 1, 55, 51, -1, 52, 57, 54, -1, 12, 59, 14, -1, 56, 62, 58, -1, 21, 7, 8, -1, 9, 63, 60, -1}}, + /* 2Bc1 */ {2, 25, {31, 39, 30, 28, 27, 26, 24, 47, 23, 38, 37, 18, 15, 17, 36, 34, 4, 2, 48, 46, 6, 5, 54, 55, 16, 14, 59, 58, 60, -1, 63, -1, 9, -1, 61, -1, 21, -1, 19, -1, 56, -1, 12, -1, 10, -1, 52, -1, 0, -1}}, + /* 2Bd1 */ {3, 16, {29, 28, 39, 31, 27, 41, 30, 24, 26, 50, 43, 47, 49, 38, 23, 22, 17, -1, 37, 15, -1, 34, 36, -1, 2, 46, -1, 5, 48, -1, 55, 6, -1, 57, -1, -1, 59, -1, -1, 8, -1, -1, 62, -1, -1, 63, -1, -1}}, + /* 2Be1 */ {6, 12, {24, 45, 41, 28, 30, 47, 43, 20, 49, 26, 27, 17, 18, 32, 38, 23, 37, 34, 33, 13, 36, 15, 4, -1, 11, 35, 48, 2, 5, -1, 3, 42, 6, 55, -1, -1, 44, 1, 14, 16, -1, -1, 0, 51, 8, 58, -1, -1, 52, 10, 62, -1, -1, -1, 12, 57, 63, -1, -1, -1, 56, 19, -1, -1, -1, -1, 59, 21, -1, -1, -1, -1}}, + /* 2Bf1 */ {5, 24, {40, 31, 39, 28, 26, 29, 30, 41, 47, -1, 53, 27, 49, 38, -1, 50, 24, 22, 17, -1, 25, 23, 37, 34, -1, 45, 18, 15, 2, -1, 43, 11, 36, 48, -1, 20, 4, 42, -1, -1, 32, 46, 51, -1, -1, 33, 5, 6, -1, -1, 13, 54, 55, -1, -1, 35, 58, 14, -1, -1, 3, 16, 57, -1, -1, 44, 59, -1, -1, -1, 1, 8, -1, -1, -1, 0, 60, -1, -1, -1, 52, 62, -1, -1, -1, 10, 7, -1, -1, -1, 12, 63, -1, -1, -1, 56, -1, -1, -1, -1, 19, -1, -1, -1, -1, 21, -1, -1, -1, -1, 61, -1, -1, -1, -1, 9, -1, -1, -1, -1}}, + /* 2Bg1 */ {3, 28, {40, 31, -1, 29, 39, -1, 53, 30, -1, 27, 28, -1, 50, 41, -1, 25, 49, -1, 24, 26, -1, 45, 47, -1, 43, 22, -1, 20, 23, -1, 18, 38, -1, 32, 37, 17, 33, 15, 34, 13, 36, 4, 11, 42, 2, 35, 46, 48, 3, 51, 5, 44, 6, 55, 1, 54, 14, 0, 57, 16, 52, 58, 59, 10, 8, 60, 12, 62, -1, 56, 7, -1, 19, 63, -1, 21, -1, -1, 61, -1, -1, 9, -1, -1}}, + /* 2Bh1 */ {4, 20, {29, 53, 30, -1, 50, 25, 41, -1, 24, 45, 26, -1, 43, 20, 49, -1, 18, 32, 47, -1, 33, 13, 22, -1, 11, 35, 38, 23, 3, 42, 17, 37, 44, 1, 34, 15, 0, 51, 4, 36, 52, 10, 46, 2, -1, 12, 5, 48, -1, 57, 55, 6, -1, 56, 14, 54, -1, 19, 58, 16, -1, 59, 60, -1, -1, 21, 62, -1, -1, 61, -1, -1, -1, 9, -1, -1, -1, 7, -1, -1}}, + /* 2Bi1 */ {6, 12, {53, 50, 25, 55, 2, 46, 24, 45, 43, 54, 48, 5, 20, 18, 32, 14, 6, -1, 33, 13, 11, 58, -1, -1, 35, 3, 42, 8, -1, -1, 44, 1, 0, 63, -1, -1, -1, 51, 52, 62, -1, -1, -1, 10, 12, -1, -1, -1, -1, 57, 56, -1, -1, -1, -1, 19, 21, -1, -1, -1, -1, 59, -1, -1, -1, -1, -1, 61, -1, -1, -1, -1}}, + /* 2Bm1 */ {7, 16, {-1, -1, -1, -1, -1, 40, 27, -1, -1, -1, -1, -1, 28, 49, -1, -1, -1, -1, -1, 39, 24, -1, -1, -1, -1, 29, 30, 47, -1, -1, -1, -1, 53, 31, 22, -1, -1, -1, -1, 50, 41, 38, -1, -1, -1, -1, 25, 26, 18, -1, -1, -1, 45, 43, 23, 17, -1, -1, -1, 20, 32, 37, 15, -1, -1, -1, 33, 13, 36, 34, -1, -1, -1, 35, 3, 4, 11, -1, -1, 44, 48, 1, 5, 42, -1, -1, 0, 51, 52, 55, 2, -1, 6, 10, 54, 12, 58, 46, 59, 57, 56, 16, 19, 8, 14, 21, 60, 61, 7, 9, 63, 62}}, + /* 2Bn1 */ {5, 16, {-1, -1, -1, -1, 27, -1, -1, -1, 31, 39, -1, -1, -1, 53, 28, -1, -1, 50, 40, 38, -1, 32, 25, 29, 26, -1, 33, 45, 30, 24, 52, 13, 43, 41, 22, 12, 44, 20, 47, 37, 19, 48, 18, 23, 17, -1, 54, 35, 15, 34, -1, 10, 3, 36, 11, -1, 21, 1, 46, 6, -1, 59, 0, 5, 55, -1, -1, 57, 14, 16, -1, 60, 56, 58, 8, -1, 9, 61, 62, 7}}, + /* 2Bp1 */ {5, 16, {-1, -1, -1, 28, 27, -1, -1, 40, 30, 26, -1, 53, 29, 39, 47, -1, 49, 50, 31, 22, -1, 45, 25, 41, 38, -1, 20, 43, 24, 37, -1, 32, 18, 23, 15, -1, 34, 33, 17, 11, -1, 13, 35, 36, 4, -1, 3, 42, 2, 46, -1, 44, 1, 48, 5, -1, 0, 6, 55, 51, -1, 52, 12, 57, 54, 10, 16, 19, 8, 14, 56, 60, 21, 62, 58, 7, 61, 9, 63, 59}}, + /* 2Bt1 */ {18, 8, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, 53, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 32, 22, 27, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 42, 11, 17, 43, 50, 40, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 51, 1, 35, 33, 38, 49, 39, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 0, 3, 34, 18, 24, 28, -1, -1, -1, -1, -1, -1, -1, -1, 10, 12, 61, 56, 52, 44, 13, 20, 25, 29, -1, -1, -1, -1, -1, -1, -1, 8, 63, 7, 62, 58, 55, 46, 15, 23, 26, 30, 31, 16, 19, 21, 9, 60, 59, 57, 14, 5, 54, 48, 2, 4, 36, 37, 47, 41, -1}}, + /* 2Bu1 */ {4, 15, {20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, + /* 2Bv1 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, + /* 2Bv2 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, + /* 2Bv3 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, + /* 2Bv4 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, + /* 2Bv5 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}}, + /* PS */ + {{0.75, 0.5}, + {1.5, 0.5}, + {3, 0.5}}}; } else { - return new CathodeSegmentation{1, false, - /* PG */ - {{1025, 10, 2, 105.125, -0.5}, {1026, 9, 2, 95.375, -0.5}, {1027, 3, 1, 89.375, -0.5}, {1028, 3, 1, 83.375, -0.5}, {1029, 3, 1, 77.375, -0.5}, {1030, 3, 1, 71.375, -0.5}, {1031, 3, 1, 65.375, -0.5}, {1032, 3, 1, 59.375, -0.5}, {1033, 3, 1, 53.375, -0.5}, {1034, 0, 0, 50.375, -0.5}, {1035, 0, 0, 47.375, -0.5}, {1036, 0, 0, 44.375, -0.5}, {1037, 0, 0, 41.375, -0.5}, {1038, 0, 0, 38.375, -0.5}, {1039, 0, 0, 35.375, -0.5}, {1040, 0, 0, 32.375, -0.5}, {1041, 0, 0, 29.375, -0.5}, {1042, 0, 0, 26.375, -0.5}, {1043, 26, 0, 22.625, -0.5}, {1051, 10, 2, 105.125, 7.5}, {1052, 9, 2, 95.375, 7.5}, {1053, 4, 1, 89.375, 7.5}, {1054, 4, 1, 83.375, 7.5}, {1055, 4, 1, 77.375, 7.5}, {1056, 4, 1, 71.375, 7.5}, {1057, 4, 1, 65.375, 7.5}, {1058, 4, 1, 59.375, 7.5}, {1059, 4, 1, 53.375, 7.5}, {1060, 1, 0, 50.375, 7.5}, {1061, 1, 0, 47.375, 7.5}, {1062, 1, 0, 44.375, 7.5}, {1063, 1, 0, 41.375, 7.5}, {1064, 1, 0, 38.375, 7.5}, {1065, 1, 0, 35.375, 7.5}, {1066, 1, 0, 32.375, 7.5}, {1067, 1, 0, 29.375, 7.5}, {1068, 1, 0, 26.375, 7.5}, {1069, 1, 0, 23.375, 7.5}, {1070, 23, 0, 18.125, 7.5}, {1077, 10, 2, 105.125, 15.5}, {1078, 9, 2, 95.375, 15.5}, {1079, 4, 1, 89.375, 15.5}, {1080, 4, 1, 83.375, 15.5}, {1081, 4, 1, 77.375, 15.5}, {1082, 4, 1, 71.375, 15.5}, {1083, 4, 1, 65.375, 15.5}, {1084, 4, 1, 59.375, 15.5}, {1085, 4, 1, 53.375, 15.5}, {1086, 1, 0, 50.375, 15.5}, {1087, 1, 0, 47.375, 15.5}, {1088, 1, 0, 44.375, 15.5}, {1089, 1, 0, 41.375, 15.5}, {1090, 1, 0, 38.375, 15.5}, {1091, 1, 0, 35.375, 15.5}, {1092, 1, 0, 32.375, 15.5}, {1093, 1, 0, 29.375, 15.5}, {1094, 1, 0, 26.375, 15.5}, {1095, 1, 0, 23.375, 15.5}, {1096, 1, 0, 20.375, 15.5}, {1097, 1, 0, 17.375, 15.5}, {1098, 24, 0, 13.625, 16}, {1099, 30, 0, 1.625, 19.5}, {1103, 13, 2, 107.375, 23.5}, {1104, 8, 2, 95.375, 23.5}, {1105, 6, 2, 83.375, 23.5}, {1106, 4, 1, 77.375, 23.5}, {1107, 4, 1, 71.375, 23.5}, {1108, 4, 1, 65.375, 23.5}, {1109, 4, 1, 59.375, 23.5}, {1110, 4, 1, 53.375, 23.5}, {1111, 4, 1, 47.375, 23.5}, {1112, 1, 0, 44.375, 23.5}, {1113, 1, 0, 41.375, 23.5}, {1114, 1, 0, 38.375, 23.5}, {1115, 1, 0, 35.375, 23.5}, {1116, 1, 0, 32.375, 23.5}, {1117, 1, 0, 29.375, 23.5}, {1118, 1, 0, 26.375, 23.5}, {1119, 1, 0, 23.375, 23.5}, {1120, 1, 0, 20.375, 23.5}, {1121, 1, 0, 17.375, 23.5}, {1122, 31, 0, 14.375, 24}, {1123, 32, 0, 11.375, 23.5}, {1124, 33, 0, 8.375, 23.5}, {1125, 34, 0, 5.375, 23.5}, {1126, 35, 0, 2.375, 23.5}, {1127, 36, 0, -0.625, 23.5}, {1129, 8, 2, 95.375, 31.5}, {1130, 6, 2, 83.375, 31.5}, {1131, 4, 1, 77.375, 31.5}, {1132, 4, 1, 71.375, 31.5}, {1133, 4, 1, 65.375, 31.5}, {1134, 4, 1, 59.375, 31.5}, {1135, 4, 1, 53.375, 31.5}, {1136, 4, 1, 47.375, 31.5}, {1137, 4, 1, 41.375, 31.5}, {1138, 1, 0, 38.375, 31.5}, {1139, 1, 0, 35.375, 31.5}, {1140, 1, 0, 32.375, 31.5}, {1141, 1, 0, 29.375, 31.5}, {1142, 1, 0, 26.375, 31.5}, {1143, 1, 0, 23.375, 31.5}, {1144, 1, 0, 20.375, 31.5}, {1145, 1, 0, 17.375, 31.5}, {1146, 1, 0, 14.375, 31.5}, {1147, 1, 0, 11.375, 31.5}, {1148, 1, 0, 8.375, 31.5}, {1149, 1, 0, 5.375, 31.5}, {1150, 1, 0, 2.375, 31.5}, {1151, 1, 0, -0.625, 31.5}, {1155, 14, 2, 95.375, 39.5}, {1156, 6, 2, 83.375, 39.5}, {1157, 4, 1, 77.375, 39.5}, {1158, 4, 1, 71.375, 39.5}, {1159, 4, 1, 65.375, 39.5}, {1160, 4, 1, 59.375, 39.5}, {1161, 4, 1, 53.375, 39.5}, {1162, 4, 1, 47.375, 39.5}, {1163, 4, 1, 41.375, 39.5}, {1164, 4, 1, 35.375, 39.5}, {1165, 1, 0, 32.375, 39.5}, {1166, 1, 0, 29.375, 39.5}, {1167, 1, 0, 26.375, 39.5}, {1168, 1, 0, 23.375, 39.5}, {1169, 1, 0, 20.375, 39.5}, {1170, 1, 0, 17.375, 39.5}, {1171, 1, 0, 14.375, 39.5}, {1172, 1, 0, 11.375, 39.5}, {1173, 1, 0, 8.375, 39.5}, {1174, 1, 0, 5.375, 39.5}, {1175, 1, 0, 2.375, 39.5}, {1176, 1, 0, -0.625, 39.5}, {1181, 15, 2, 95.375, 43.5}, {1182, 6, 2, 83.375, 47.5}, {1183, 4, 1, 77.375, 47.5}, {1184, 4, 1, 71.375, 47.5}, {1185, 4, 1, 65.375, 47.5}, {1186, 4, 1, 59.375, 47.5}, {1187, 4, 1, 53.375, 47.5}, {1188, 4, 1, 47.375, 47.5}, {1189, 4, 1, 41.375, 47.5}, {1190, 4, 1, 35.375, 47.5}, {1191, 4, 1, 29.375, 47.5}, {1192, 4, 1, 23.375, 47.5}, {1193, 1, 0, 20.375, 47.5}, {1194, 1, 0, 17.375, 47.5}, {1195, 1, 0, 14.375, 47.5}, {1196, 1, 0, 11.375, 47.5}, {1197, 1, 0, 8.375, 47.5}, {1198, 1, 0, 5.375, 47.5}, {1199, 1, 0, 2.375, 47.5}, {1200, 1, 0, -0.625, 47.5}, {1207, 16, 2, 94.625, 55.5}, {1208, 12, 2, 83.375, 55.5}, {1209, 7, 2, 71.375, 55.5}, {1210, 4, 1, 65.375, 55.5}, {1211, 4, 1, 59.375, 55.5}, {1212, 4, 1, 53.375, 55.5}, {1213, 4, 1, 47.375, 55.5}, {1214, 4, 1, 41.375, 55.5}, {1215, 4, 1, 35.375, 55.5}, {1216, 4, 1, 29.375, 55.5}, {1217, 4, 1, 23.375, 55.5}, {1218, 4, 1, 17.375, 55.5}, {1219, 4, 1, 11.375, 55.5}, {1220, 4, 1, 5.375, 55.5}, {1221, 4, 1, -0.625, 55.5}, {1225, 17, 2, 83.375, 63.5}, {1226, 7, 2, 71.375, 63.5}, {1227, 7, 2, 59.375, 63.5}, {1228, 4, 1, 53.375, 63.5}, {1229, 4, 1, 47.375, 63.5}, {1230, 4, 1, 41.375, 63.5}, {1231, 4, 1, 35.375, 63.5}, {1232, 4, 1, 29.375, 63.5}, {1233, 4, 1, 23.375, 63.5}, {1234, 4, 1, 17.375, 63.5}, {1235, 4, 1, 11.375, 63.5}, {1236, 4, 1, 5.375, 63.5}, {1237, 4, 1, -0.625, 63.5}, {1238, 18, 2, 83.375, 71.5}, {1239, 7, 2, 71.375, 71.5}, {1240, 7, 2, 59.375, 71.5}, {1241, 4, 1, 53.375, 71.5}, {1242, 4, 1, 47.375, 71.5}, {1243, 4, 1, 41.375, 71.5}, {1244, 4, 1, 35.375, 71.5}, {1245, 4, 1, 29.375, 71.5}, {1246, 4, 1, 23.375, 71.5}, {1247, 4, 1, 17.375, 71.5}, {1248, 4, 1, 11.375, 71.5}, {1249, 4, 1, 5.375, 71.5}, {1250, 4, 1, -0.625, 71.5}, {1251, 19, 2, 71.375, 79.5}, {1252, 37, 2, 65.375, 79.5}, {1253, 5, 2, 59.375, 79.5}, {1254, 5, 2, 53.375, 79.5}, {1255, 5, 2, 47.375, 79.5}, {1256, 5, 2, 41.375, 79.5}, {1257, 5, 2, 35.375, 79.5}, {1258, 5, 2, 29.375, 79.5}, {1259, 2, 1, 26.375, 79.5}, {1260, 2, 1, 23.375, 79.5}, {1261, 2, 1, 20.375, 79.5}, {1262, 2, 1, 17.375, 79.5}, {1263, 2, 1, 14.375, 79.5}, {1264, 2, 1, 11.375, 79.5}, {1265, 2, 1, 8.375, 79.5}, {1266, 2, 1, 5.375, 79.5}, {1267, 2, 1, 2.375, 79.5}, {1268, 2, 1, -0.625, 79.5}, {1270, 20, 2, 35.375, 95.5}, {1271, 25, 2, 32.375, 95.5}, {1272, 27, 2, 27.875, 95.5}, {1273, 28, 2, 23.375, 95.5}, {1274, 29, 2, 18.875, 95.5}, {1275, 11, 2, 15.125, 95.5}, {1276, 11, 2, 11.375, 95.5}, {1277, 11, 2, 7.625, 95.5}, {1278, 21, 2, 3.125, 95.5}, {1279, 22, 2, -0.625, 95.5}}, - /* PGT */ - {/* 2NA */ {4, 16, {19, 61, 7, 59, 16, 9, 60, 58, 12, 21, 62, 14, 52, 56, 63, 54, 0, 10, 8, 6, 2, 51, 57, 5, 3, 1, 55, 46, 11, 44, 48, 42, 13, 35, 4, 36, 32, 33, 34, 15, 20, 38, 17, 37, 45, 43, 23, 18, 49, 25, 24, 22, 50, 28, 27, 26, 53, 39, 30, 41, 29, 40, 31, 47}}, - /* 2NB */ {4, 16, {21, 9, 62, 60, 56, 61, 7, 59, 10, 19, 63, 57, 6, 16, 8, 54, 0, 12, 58, 14, 48, 52, 55, 51, 44, 1, 5, 46, 2, 3, 4, 42, 34, 35, 36, 11, 13, 33, 37, 15, 17, 20, 23, 18, 32, 45, 26, 47, 38, 49, 41, 24, 43, 53, 31, 22, 25, 29, 39, 28, 50, 40, 30, 27}}, - /* 2NC */ {4, 16, {56, 16, 58, 57, 12, 21, 8, 14, 52, 61, 60, 55, 51, 7, 62, 6, 0, 9, 63, 5, 1, 19, 59, 48, 42, 10, 54, 46, 3, 44, 2, 4, 35, 13, 34, 36, 11, 43, 22, 15, 33, 50, 27, 17, 32, 40, 31, 37, 18, 39, 30, 38, 20, 29, 28, 23, 45, 53, 41, 47, 25, 49, 26, 24}}, - /* 2NE */ {8, 8, {12, 21, 9, 63, 7, 8, 59, 14, 52, 19, 61, 62, 60, 16, 54, 6, 44, 10, 56, 58, 57, 55, 51, 46, 3, 1, 0, 5, 48, 2, 42, 4, 35, 33, 13, 36, 11, 34, 15, 17, 32, 20, 43, 23, 18, 22, 37, 38, 45, 25, 50, 26, 49, 24, 47, 27, 53, 29, 40, 31, 39, 30, 28, 41}}, - /* 2NF */ {8, 8, {19, 61, 9, 63, 7, 8, 59, 16, 12, 56, 21, 62, 60, 58, 57, 55, 0, 52, 10, 14, 54, 6, 5, 51, 1, 3, 44, 46, 48, 2, 42, 4, 33, 35, 13, 15, 17, 34, 11, 36, 32, 20, 43, 47, 22, 38, 37, 18, 45, 50, 53, 30, 28, 26, 24, 23, 25, 29, 40, 31, 39, 41, 27, 49}}, - /* 2NG */ {8, 8, {10, 56, 61, 7, 62, 60, 57, 55, 52, 12, 19, 9, 63, 59, 14, 51, 0, 6, 16, 21, 8, 58, 54, 5, 3, 1, 2, 44, 42, 4, 46, 48, 35, 33, 34, 13, 11, 36, 15, 17, 32, 38, 49, 53, 41, 26, 22, 37, 20, 45, 50, 40, 31, 27, 47, 18, 43, 25, 29, 39, 30, 28, 24, 23}}, - /* 2NH */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, - /* 2NI */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, - /* 2NJ */ {16, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, 35, 13, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 45, 50, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, 47}}, - /* 2NK */ {13, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 35, 13, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 45, 50, 53, 29, 30, 40, 31, 39, 28, 41, 27, 25, 26}}, - /* 2NL */ {14, 4, {21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, 47}}, - /* 2NM */ {5, 11, {56, 7, 62, 58, 6, 12, 9, 63, 16, 5, 52, 19, 60, 14, 48, 10, 61, 8, 54, 46, 57, 21, 59, 55, 2, 35, 3, 36, 42, 4, 45, 25, 26, 24, 34, 49, 40, 31, 47, 15, 50, 29, 39, 27, 17, 43, 28, 30, 23, 18, 22, 53, 41, 38, 37}}, - /* 2NN */ {15, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26}}, - /* 2Na1 */ {11, 8, {21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 19, 12, 55, 10, 54, 6, 51, 14, 57, 16, -1, 35, 20, 23, 43, 38, 37, 34, 36, 46, 5, -1, 32, 29, 30, 41, 24, 18, 15, 11, -1, -1, -1, 44, 50, 31, 27, 49, 33, 17, -1, -1, -1, -1, 2, 13, 40, 28, 47, 22, -1, -1, -1, -1, -1, 48, 42, 45, 39, 26, -1, -1, -1, -1, -1, -1, 0, 1, 3, 53, 25, -1, -1, -1, -1, -1, -1}}, - /* 2Nb1 */ {19, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 57, 54, 46, 36, 15, 33, 3, 44, 0, 52, 55, 10, 6, 51, 5, 48, 2, 4, 11, 34, 17, 37, 18, 38, -1, -1, -1, 35, 13, 32, 20, 23, 43, 22, 47, 24, 49, 26, 25, 27, 41, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, 50, 53, 40, 30, 29, 31, 39, -1, -1, -1, -1}}, - /* 2Nc1 */ {14, 6, {60, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 59, 57, 6, 48, 2, 36, -1, -1, -1, -1, -1, -1, -1, 12, 19, 21, 9, 62, 61, 7, 8, 56, 16, 14, 5, 42, 15, 3, 44, 0, 52, 55, 10, 54, 51, 1, 46, 4, 11, 17, -1, 35, 13, 32, 20, 23, 47, 24, 49, 43, 22, 38, 18, -1, -1, 45, 50, 53, 40, 30, 29, 31, 28, 41, 27, 25, -1, -1, -1}}, - /* 2Nd1 */ {11, 6, {63, 7, 62, 60, 8, 59, 56, 58, 16, 57, 14, 10, 54, 55, 6, 51, 5, 1, 48, 46, 2, -1, 43, 22, 23, 38, 18, 33, 34, 4, 42, -1, -1, 31, 39, 41, 26, 47, 17, 11, 36, -1, -1, -1, -1, 30, 27, 49, 37, 15, -1, -1, -1, -1, -1, -1, 28, 25, 24, -1, -1, -1, -1, -1, -1, -1}}, - /* 2Ne1 */ {17, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, -1, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, -1, 35, 13, 32, 20, 23, 43, 47, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 45, 50, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, -1, -1}}, - /* 2Nf1 */ {12, 4, {61, 9, 63, 7, 62, 60, 8, 59, 56, 58, 16, 57, 19, 12, 10, 54, 6, 5, 1, 48, 46, -1, -1, -1, 25, 45, 47, 22, 23, 38, -1, -1, -1, -1, -1, -1, 29, 40, 31, 30, -1, -1, -1, -1, -1, -1, -1, -1}}, - /* 2Ng1 */ {16, 6, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, -1, -1, 35, 13, 32, 20, 23, 43, 38, 37, 17, 15, 34, -1, -1, -1, -1, -1, 45, 50, 53, 40, 30, 27, 24, 22, 18, -1, -1, -1, -1, -1, -1, -1, 29, 31, 39, 28, 25, 49, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - /* 2Nj1 */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, -1, 45, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, - /* 2Nk1 */ {6, 11, {-1, 56, 7, 62, 58, 6, -1, 12, 9, 63, 16, 5, -1, 52, 19, 60, 14, 48, -1, 10, 61, 8, 54, 46, -1, 57, 21, 59, 55, 42, 13, 35, 3, 4, 36, 11, 20, 45, 25, 26, 24, 34, -1, 50, 40, 31, 47, 15, -1, 49, 29, 39, 27, 17, -1, 43, 28, 30, 23, 18, -1, 22, 53, 41, 38, 37}}, - /* 2Nl1 */ {6, 11, {51, 56, 7, 62, 58, 6, 0, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 2, 10, 61, 8, 54, 46, 44, 57, 21, 59, 55, 42, 11, 35, 3, 4, 36, -1, 13, 45, 25, 26, 24, -1, 33, 50, 40, 31, 47, 34, 18, 49, 29, 39, 27, 15, 32, 43, 28, 30, 23, 17, 20, 22, 53, 41, 38, 37}}, - /* 2Nm1 */ {7, 16, {-1, -1, -1, -1, -1, 9, 59, -1, -1, -1, -1, -1, 60, 16, -1, -1, -1, -1, -1, 7, 57, -1, -1, -1, -1, 61, 62, 14, -1, -1, -1, -1, 21, 63, 54, -1, -1, -1, -1, 19, 8, 6, -1, -1, -1, -1, 56, 58, 51, -1, -1, -1, 12, 10, 55, 48, -1, -1, -1, 52, 0, 5, 46, -1, -1, -1, 1, 44, 4, 2, -1, -1, -1, 3, 35, 36, 42, -1, -1, 13, 17, 33, 37, 11, -1, -1, 32, 18, 20, 23, 34, -1, 38, 43, 22, 45, 26, 15, 27, 24, 25, 49, 50, 41, 47, 53, 28, 29, 39, 40, 31, 30}}, - /* 2Nn1 */ {5, 16, {-1, -1, -1, -1, 59, -1, -1, -1, 63, 7, -1, -1, -1, 21, 60, -1, -1, 19, 9, 6, -1, 0, 56, 61, 58, -1, 1, 12, 62, 57, 20, 44, 10, 8, 54, 45, 13, 52, 14, 5, 50, 17, 51, 55, 48, -1, 22, 3, 46, 2, -1, 43, 35, 4, 42, -1, 53, 33, 15, 38, -1, 27, 32, 37, 23, -1, -1, 24, 47, 49, -1, 28, 25, 26, 41, -1, 40, 29, 30, 39}}, - /* 2No1 */ {19, 7, {35, 44, 12, 61, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 42, 10, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 3, 52, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 32, 34, 0, 56, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 38, 18, 13, 11, 1, 58, 16, 14, 57, 54, 55, 6, 5, 51, 59, 8, 60, 62, 7, 27, 49, 24, 20, 22, 36, 37, 23, 47, 26, 15, 4, -1, -1, -1, -1, -1, -1, -1, 53, 50, 25, 43, 45, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, - /* 2Np1 */ {5, 16, {-1, -1, -1, 60, 59, -1, -1, 9, 62, 58, -1, 21, 61, 7, 14, -1, 16, 19, 63, 54, -1, 12, 56, 8, 6, -1, 52, 10, 57, 5, -1, 0, 51, 55, 46, -1, 2, 1, 48, 42, -1, 44, 3, 4, 36, -1, 35, 11, 34, 15, -1, 13, 33, 17, 37, -1, 32, 38, 23, 18, -1, 20, 45, 24, 22, 43, 49, 50, 41, 47, 25, 28, 53, 30, 26, 39, 29, 40, 31, 27}}, - /* 2Nq1 */ {6, 8, {51, 56, 7, 62, 58, 6, 0, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 44, 10, 61, 8, 54, 46, 13, 57, 21, 59, 55, 2, 33, 35, 3, 36, 42, 4, 49, 45, 25, 26, 47, 23, 50, 53, 40, 31, 30, 41}}, - /* 2Nr1 */ {6, 9, {0, 56, 7, 62, 58, 6, 1, 12, 9, 63, 16, 5, 44, 52, 19, 60, 14, 48, 35, 10, 61, 8, 54, 46, 13, 57, 21, 59, 55, 2, 32, 33, 3, 36, 42, 4, 20, 45, 25, 47, 37, 15, 43, 49, 40, 30, 27, 23, 50, 53, 29, 31, 41, 26}}, - /* 2Ns1 */ {6, 10, {0, 56, 7, 62, 58, 6, 51, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 11, 10, 61, 8, 54, 46, 44, 57, 21, 59, 55, 2, 13, 35, 3, 36, 42, 4, 33, 45, 25, 26, 24, 34, 32, 49, 40, 31, 47, 15, 20, 50, 29, 39, 27, 17, 43, 53, 28, 30, 23, 37}}, - /* 2Nt1 */ {18, 8, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 54, 59, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 11, 42, 48, 10, 19, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 33, 3, 1, 6, 16, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 38, 32, 35, 2, 51, 57, 60, -1, -1, -1, -1, -1, -1, -1, -1, 43, 45, 29, 25, 20, 13, 44, 52, 56, 61, -1, -1, -1, -1, -1, -1, -1, 41, 31, 39, 30, 26, 23, 15, 46, 55, 58, 62, 63, 49, 50, 53, 40, 28, 27, 24, 47, 37, 22, 17, 34, 36, 4, 5, 14, 8, -1}}, - /* 2Nu1 */ {4, 15, {52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nv1 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nv2 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nv3 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nv4 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nv5 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, - /* 2Nw1 */ {10, 8, {10, 56, 61, 7, 62, 60, 57, 55, -1, -1, 52, 12, 19, 9, 63, 59, 14, 51, -1, -1, 0, 6, 16, 21, 8, 58, 54, 5, -1, -1, 3, 1, 2, 44, 42, 4, 46, 48, -1, -1, 35, 33, 34, 13, 11, 36, 15, 17, -1, -1, 32, 38, 49, 53, 41, 26, 47, 23, 18, 37, 20, 45, 50, 40, 31, 27, 24, 22, -1, -1, 43, 25, 29, 39, 30, 28, -1, -1, -1, -1}}}, - /* PS */ - {{0.75, 0.5}, {0.75, 1}, {0.75, 2}}}; + return new CathodeSegmentation{ + 1, + false, + /* PG */ + {{1025, 10, 2, 105.125, -0.5}, + {1026, 9, 2, 95.375, -0.5}, + {1027, 3, 1, 89.375, -0.5}, + {1028, 3, 1, 83.375, -0.5}, + {1029, 3, 1, 77.375, -0.5}, + {1030, 3, 1, 71.375, -0.5}, + {1031, 3, 1, 65.375, -0.5}, + {1032, 3, 1, 59.375, -0.5}, + {1033, 3, 1, 53.375, -0.5}, + {1034, 0, 0, 50.375, -0.5}, + {1035, 0, 0, 47.375, -0.5}, + {1036, 0, 0, 44.375, -0.5}, + {1037, 0, 0, 41.375, -0.5}, + {1038, 0, 0, 38.375, -0.5}, + {1039, 0, 0, 35.375, -0.5}, + {1040, 0, 0, 32.375, -0.5}, + {1041, 0, 0, 29.375, -0.5}, + {1042, 0, 0, 26.375, -0.5}, + {1043, 26, 0, 22.625, -0.5}, + {1051, 10, 2, 105.125, 7.5}, + {1052, 9, 2, 95.375, 7.5}, + {1053, 4, 1, 89.375, 7.5}, + {1054, 4, 1, 83.375, 7.5}, + {1055, 4, 1, 77.375, 7.5}, + {1056, 4, 1, 71.375, 7.5}, + {1057, 4, 1, 65.375, 7.5}, + {1058, 4, 1, 59.375, 7.5}, + {1059, 4, 1, 53.375, 7.5}, + {1060, 1, 0, 50.375, 7.5}, + {1061, 1, 0, 47.375, 7.5}, + {1062, 1, 0, 44.375, 7.5}, + {1063, 1, 0, 41.375, 7.5}, + {1064, 1, 0, 38.375, 7.5}, + {1065, 1, 0, 35.375, 7.5}, + {1066, 1, 0, 32.375, 7.5}, + {1067, 1, 0, 29.375, 7.5}, + {1068, 1, 0, 26.375, 7.5}, + {1069, 1, 0, 23.375, 7.5}, + {1070, 23, 0, 18.125, 7.5}, + {1077, 10, 2, 105.125, 15.5}, + {1078, 9, 2, 95.375, 15.5}, + {1079, 4, 1, 89.375, 15.5}, + {1080, 4, 1, 83.375, 15.5}, + {1081, 4, 1, 77.375, 15.5}, + {1082, 4, 1, 71.375, 15.5}, + {1083, 4, 1, 65.375, 15.5}, + {1084, 4, 1, 59.375, 15.5}, + {1085, 4, 1, 53.375, 15.5}, + {1086, 1, 0, 50.375, 15.5}, + {1087, 1, 0, 47.375, 15.5}, + {1088, 1, 0, 44.375, 15.5}, + {1089, 1, 0, 41.375, 15.5}, + {1090, 1, 0, 38.375, 15.5}, + {1091, 1, 0, 35.375, 15.5}, + {1092, 1, 0, 32.375, 15.5}, + {1093, 1, 0, 29.375, 15.5}, + {1094, 1, 0, 26.375, 15.5}, + {1095, 1, 0, 23.375, 15.5}, + {1096, 1, 0, 20.375, 15.5}, + {1097, 1, 0, 17.375, 15.5}, + {1098, 24, 0, 13.625, 16}, + {1099, 30, 0, 1.625, 19.5}, + {1103, 13, 2, 107.375, 23.5}, + {1104, 8, 2, 95.375, 23.5}, + {1105, 6, 2, 83.375, 23.5}, + {1106, 4, 1, 77.375, 23.5}, + {1107, 4, 1, 71.375, 23.5}, + {1108, 4, 1, 65.375, 23.5}, + {1109, 4, 1, 59.375, 23.5}, + {1110, 4, 1, 53.375, 23.5}, + {1111, 4, 1, 47.375, 23.5}, + {1112, 1, 0, 44.375, 23.5}, + {1113, 1, 0, 41.375, 23.5}, + {1114, 1, 0, 38.375, 23.5}, + {1115, 1, 0, 35.375, 23.5}, + {1116, 1, 0, 32.375, 23.5}, + {1117, 1, 0, 29.375, 23.5}, + {1118, 1, 0, 26.375, 23.5}, + {1119, 1, 0, 23.375, 23.5}, + {1120, 1, 0, 20.375, 23.5}, + {1121, 1, 0, 17.375, 23.5}, + {1122, 31, 0, 14.375, 24}, + {1123, 32, 0, 11.375, 23.5}, + {1124, 33, 0, 8.375, 23.5}, + {1125, 34, 0, 5.375, 23.5}, + {1126, 35, 0, 2.375, 23.5}, + {1127, 36, 0, -0.625, 23.5}, + {1129, 8, 2, 95.375, 31.5}, + {1130, 6, 2, 83.375, 31.5}, + {1131, 4, 1, 77.375, 31.5}, + {1132, 4, 1, 71.375, 31.5}, + {1133, 4, 1, 65.375, 31.5}, + {1134, 4, 1, 59.375, 31.5}, + {1135, 4, 1, 53.375, 31.5}, + {1136, 4, 1, 47.375, 31.5}, + {1137, 4, 1, 41.375, 31.5}, + {1138, 1, 0, 38.375, 31.5}, + {1139, 1, 0, 35.375, 31.5}, + {1140, 1, 0, 32.375, 31.5}, + {1141, 1, 0, 29.375, 31.5}, + {1142, 1, 0, 26.375, 31.5}, + {1143, 1, 0, 23.375, 31.5}, + {1144, 1, 0, 20.375, 31.5}, + {1145, 1, 0, 17.375, 31.5}, + {1146, 1, 0, 14.375, 31.5}, + {1147, 1, 0, 11.375, 31.5}, + {1148, 1, 0, 8.375, 31.5}, + {1149, 1, 0, 5.375, 31.5}, + {1150, 1, 0, 2.375, 31.5}, + {1151, 1, 0, -0.625, 31.5}, + {1155, 14, 2, 95.375, 39.5}, + {1156, 6, 2, 83.375, 39.5}, + {1157, 4, 1, 77.375, 39.5}, + {1158, 4, 1, 71.375, 39.5}, + {1159, 4, 1, 65.375, 39.5}, + {1160, 4, 1, 59.375, 39.5}, + {1161, 4, 1, 53.375, 39.5}, + {1162, 4, 1, 47.375, 39.5}, + {1163, 4, 1, 41.375, 39.5}, + {1164, 4, 1, 35.375, 39.5}, + {1165, 1, 0, 32.375, 39.5}, + {1166, 1, 0, 29.375, 39.5}, + {1167, 1, 0, 26.375, 39.5}, + {1168, 1, 0, 23.375, 39.5}, + {1169, 1, 0, 20.375, 39.5}, + {1170, 1, 0, 17.375, 39.5}, + {1171, 1, 0, 14.375, 39.5}, + {1172, 1, 0, 11.375, 39.5}, + {1173, 1, 0, 8.375, 39.5}, + {1174, 1, 0, 5.375, 39.5}, + {1175, 1, 0, 2.375, 39.5}, + {1176, 1, 0, -0.625, 39.5}, + {1181, 15, 2, 95.375, 43.5}, + {1182, 6, 2, 83.375, 47.5}, + {1183, 4, 1, 77.375, 47.5}, + {1184, 4, 1, 71.375, 47.5}, + {1185, 4, 1, 65.375, 47.5}, + {1186, 4, 1, 59.375, 47.5}, + {1187, 4, 1, 53.375, 47.5}, + {1188, 4, 1, 47.375, 47.5}, + {1189, 4, 1, 41.375, 47.5}, + {1190, 4, 1, 35.375, 47.5}, + {1191, 4, 1, 29.375, 47.5}, + {1192, 4, 1, 23.375, 47.5}, + {1193, 1, 0, 20.375, 47.5}, + {1194, 1, 0, 17.375, 47.5}, + {1195, 1, 0, 14.375, 47.5}, + {1196, 1, 0, 11.375, 47.5}, + {1197, 1, 0, 8.375, 47.5}, + {1198, 1, 0, 5.375, 47.5}, + {1199, 1, 0, 2.375, 47.5}, + {1200, 1, 0, -0.625, 47.5}, + {1207, 16, 2, 94.625, 55.5}, + {1208, 12, 2, 83.375, 55.5}, + {1209, 7, 2, 71.375, 55.5}, + {1210, 4, 1, 65.375, 55.5}, + {1211, 4, 1, 59.375, 55.5}, + {1212, 4, 1, 53.375, 55.5}, + {1213, 4, 1, 47.375, 55.5}, + {1214, 4, 1, 41.375, 55.5}, + {1215, 4, 1, 35.375, 55.5}, + {1216, 4, 1, 29.375, 55.5}, + {1217, 4, 1, 23.375, 55.5}, + {1218, 4, 1, 17.375, 55.5}, + {1219, 4, 1, 11.375, 55.5}, + {1220, 4, 1, 5.375, 55.5}, + {1221, 4, 1, -0.625, 55.5}, + {1225, 17, 2, 83.375, 63.5}, + {1226, 7, 2, 71.375, 63.5}, + {1227, 7, 2, 59.375, 63.5}, + {1228, 4, 1, 53.375, 63.5}, + {1229, 4, 1, 47.375, 63.5}, + {1230, 4, 1, 41.375, 63.5}, + {1231, 4, 1, 35.375, 63.5}, + {1232, 4, 1, 29.375, 63.5}, + {1233, 4, 1, 23.375, 63.5}, + {1234, 4, 1, 17.375, 63.5}, + {1235, 4, 1, 11.375, 63.5}, + {1236, 4, 1, 5.375, 63.5}, + {1237, 4, 1, -0.625, 63.5}, + {1238, 18, 2, 83.375, 71.5}, + {1239, 7, 2, 71.375, 71.5}, + {1240, 7, 2, 59.375, 71.5}, + {1241, 4, 1, 53.375, 71.5}, + {1242, 4, 1, 47.375, 71.5}, + {1243, 4, 1, 41.375, 71.5}, + {1244, 4, 1, 35.375, 71.5}, + {1245, 4, 1, 29.375, 71.5}, + {1246, 4, 1, 23.375, 71.5}, + {1247, 4, 1, 17.375, 71.5}, + {1248, 4, 1, 11.375, 71.5}, + {1249, 4, 1, 5.375, 71.5}, + {1250, 4, 1, -0.625, 71.5}, + {1251, 19, 2, 71.375, 79.5}, + {1252, 37, 2, 65.375, 79.5}, + {1253, 5, 2, 59.375, 79.5}, + {1254, 5, 2, 53.375, 79.5}, + {1255, 5, 2, 47.375, 79.5}, + {1256, 5, 2, 41.375, 79.5}, + {1257, 5, 2, 35.375, 79.5}, + {1258, 5, 2, 29.375, 79.5}, + {1259, 2, 1, 26.375, 79.5}, + {1260, 2, 1, 23.375, 79.5}, + {1261, 2, 1, 20.375, 79.5}, + {1262, 2, 1, 17.375, 79.5}, + {1263, 2, 1, 14.375, 79.5}, + {1264, 2, 1, 11.375, 79.5}, + {1265, 2, 1, 8.375, 79.5}, + {1266, 2, 1, 5.375, 79.5}, + {1267, 2, 1, 2.375, 79.5}, + {1268, 2, 1, -0.625, 79.5}, + {1270, 20, 2, 35.375, 95.5}, + {1271, 25, 2, 32.375, 95.5}, + {1272, 27, 2, 27.875, 95.5}, + {1273, 28, 2, 23.375, 95.5}, + {1274, 29, 2, 18.875, 95.5}, + {1275, 11, 2, 15.125, 95.5}, + {1276, 11, 2, 11.375, 95.5}, + {1277, 11, 2, 7.625, 95.5}, + {1278, 21, 2, 3.125, 95.5}, + {1279, 22, 2, -0.625, 95.5}}, + /* PGT */ + {/* 2NA */ {4, 16, {19, 61, 7, 59, 16, 9, 60, 58, 12, 21, 62, 14, 52, 56, 63, 54, 0, 10, 8, 6, 2, 51, 57, 5, 3, 1, 55, 46, 11, 44, 48, 42, 13, 35, 4, 36, 32, 33, 34, 15, 20, 38, 17, 37, 45, 43, 23, 18, 49, 25, 24, 22, 50, 28, 27, 26, 53, 39, 30, 41, 29, 40, 31, 47}}, + /* 2NB */ {4, 16, {21, 9, 62, 60, 56, 61, 7, 59, 10, 19, 63, 57, 6, 16, 8, 54, 0, 12, 58, 14, 48, 52, 55, 51, 44, 1, 5, 46, 2, 3, 4, 42, 34, 35, 36, 11, 13, 33, 37, 15, 17, 20, 23, 18, 32, 45, 26, 47, 38, 49, 41, 24, 43, 53, 31, 22, 25, 29, 39, 28, 50, 40, 30, 27}}, + /* 2NC */ {4, 16, {56, 16, 58, 57, 12, 21, 8, 14, 52, 61, 60, 55, 51, 7, 62, 6, 0, 9, 63, 5, 1, 19, 59, 48, 42, 10, 54, 46, 3, 44, 2, 4, 35, 13, 34, 36, 11, 43, 22, 15, 33, 50, 27, 17, 32, 40, 31, 37, 18, 39, 30, 38, 20, 29, 28, 23, 45, 53, 41, 47, 25, 49, 26, 24}}, + /* 2NE */ {8, 8, {12, 21, 9, 63, 7, 8, 59, 14, 52, 19, 61, 62, 60, 16, 54, 6, 44, 10, 56, 58, 57, 55, 51, 46, 3, 1, 0, 5, 48, 2, 42, 4, 35, 33, 13, 36, 11, 34, 15, 17, 32, 20, 43, 23, 18, 22, 37, 38, 45, 25, 50, 26, 49, 24, 47, 27, 53, 29, 40, 31, 39, 30, 28, 41}}, + /* 2NF */ {8, 8, {19, 61, 9, 63, 7, 8, 59, 16, 12, 56, 21, 62, 60, 58, 57, 55, 0, 52, 10, 14, 54, 6, 5, 51, 1, 3, 44, 46, 48, 2, 42, 4, 33, 35, 13, 15, 17, 34, 11, 36, 32, 20, 43, 47, 22, 38, 37, 18, 45, 50, 53, 30, 28, 26, 24, 23, 25, 29, 40, 31, 39, 41, 27, 49}}, + /* 2NG */ {8, 8, {10, 56, 61, 7, 62, 60, 57, 55, 52, 12, 19, 9, 63, 59, 14, 51, 0, 6, 16, 21, 8, 58, 54, 5, 3, 1, 2, 44, 42, 4, 46, 48, 35, 33, 34, 13, 11, 36, 15, 17, 32, 38, 49, 53, 41, 26, 22, 37, 20, 45, 50, 40, 31, 27, 47, 18, 43, 25, 29, 39, 30, 28, 24, 23}}, + /* 2NH */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, + /* 2NI */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, + /* 2NJ */ {16, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, 35, 13, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 45, 50, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, 47}}, + /* 2NK */ {13, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 35, 13, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 45, 50, 53, 29, 30, 40, 31, 39, 28, 41, 27, 25, 26}}, + /* 2NL */ {14, 4, {21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, 32, 20, 23, 43, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, 47}}, + /* 2NM */ {5, 11, {56, 7, 62, 58, 6, 12, 9, 63, 16, 5, 52, 19, 60, 14, 48, 10, 61, 8, 54, 46, 57, 21, 59, 55, 2, 35, 3, 36, 42, 4, 45, 25, 26, 24, 34, 49, 40, 31, 47, 15, 50, 29, 39, 27, 17, 43, 28, 30, 23, 18, 22, 53, 41, 38, 37}}, + /* 2NN */ {15, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 45, 24, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26}}, + /* 2Na1 */ {11, 8, {21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 19, 12, 55, 10, 54, 6, 51, 14, 57, 16, -1, 35, 20, 23, 43, 38, 37, 34, 36, 46, 5, -1, 32, 29, 30, 41, 24, 18, 15, 11, -1, -1, -1, 44, 50, 31, 27, 49, 33, 17, -1, -1, -1, -1, 2, 13, 40, 28, 47, 22, -1, -1, -1, -1, -1, 48, 42, 45, 39, 26, -1, -1, -1, -1, -1, -1, 0, 1, 3, 53, 25, -1, -1, -1, -1, -1, -1}}, + /* 2Nb1 */ {19, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 57, 54, 46, 36, 15, 33, 3, 44, 0, 52, 55, 10, 6, 51, 5, 48, 2, 4, 11, 34, 17, 37, 18, 38, -1, -1, -1, 35, 13, 32, 20, 23, 43, 22, 47, 24, 49, 26, 25, 27, 41, 28, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, 50, 53, 40, 30, 29, 31, 39, -1, -1, -1, -1}}, + /* 2Nc1 */ {14, 6, {60, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 59, 57, 6, 48, 2, 36, -1, -1, -1, -1, -1, -1, -1, 12, 19, 21, 9, 62, 61, 7, 8, 56, 16, 14, 5, 42, 15, 3, 44, 0, 52, 55, 10, 54, 51, 1, 46, 4, 11, 17, -1, 35, 13, 32, 20, 23, 47, 24, 49, 43, 22, 38, 18, -1, -1, 45, 50, 53, 40, 30, 29, 31, 28, 41, 27, 25, -1, -1, -1}}, + /* 2Nd1 */ {11, 6, {63, 7, 62, 60, 8, 59, 56, 58, 16, 57, 14, 10, 54, 55, 6, 51, 5, 1, 48, 46, 2, -1, 43, 22, 23, 38, 18, 33, 34, 4, 42, -1, -1, 31, 39, 41, 26, 47, 17, 11, 36, -1, -1, -1, -1, 30, 27, 49, 37, 15, -1, -1, -1, -1, -1, -1, 28, 25, 24, -1, -1, -1, -1, -1, -1, -1}}, + /* 2Ne1 */ {17, 4, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, -1, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, 42, 4, -1, 35, 13, 32, 20, 23, 43, 47, 22, 38, 18, 37, 33, 17, 15, 34, 11, 36, 45, 50, 53, 40, 30, 29, 31, 39, 28, 41, 27, 25, 26, 49, 24, -1, -1}}, + /* 2Nf1 */ {12, 4, {61, 9, 63, 7, 62, 60, 8, 59, 56, 58, 16, 57, 19, 12, 10, 54, 6, 5, 1, 48, 46, -1, -1, -1, 25, 45, 47, 22, 23, 38, -1, -1, -1, -1, -1, -1, 29, 40, 31, 30, -1, -1, -1, -1, -1, -1, -1, -1}}, + /* 2Ng1 */ {16, 6, {12, 19, 21, 9, 62, 61, 63, 7, 60, 8, 59, 56, 58, 16, 57, 14, 3, 44, 0, 52, 55, 10, 54, 6, 51, 5, 1, 48, 46, 2, -1, -1, 35, 13, 32, 20, 23, 43, 38, 37, 17, 15, 34, -1, -1, -1, -1, -1, 45, 50, 53, 40, 30, 27, 24, 22, 18, -1, -1, -1, -1, -1, -1, -1, 29, 31, 39, 28, 25, 49, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 41, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + /* 2Nj1 */ {16, 4, {12, 57, 56, 16, 19, 59, 21, 60, 61, 7, 9, 63, 62, 8, 58, 14, 3, 42, 44, 2, 1, 48, 0, 51, 52, 6, 10, 54, 55, 5, 46, 4, 35, 11, 13, 34, 33, 17, 32, 18, 20, 38, 43, 22, 23, 37, 15, 36, -1, 45, 25, 49, 50, 27, 53, 28, 29, 39, 40, 31, 30, 41, 26, 47}}, + /* 2Nk1 */ {6, 11, {-1, 56, 7, 62, 58, 6, -1, 12, 9, 63, 16, 5, -1, 52, 19, 60, 14, 48, -1, 10, 61, 8, 54, 46, -1, 57, 21, 59, 55, 42, 13, 35, 3, 4, 36, 11, 20, 45, 25, 26, 24, 34, -1, 50, 40, 31, 47, 15, -1, 49, 29, 39, 27, 17, -1, 43, 28, 30, 23, 18, -1, 22, 53, 41, 38, 37}}, + /* 2Nl1 */ {6, 11, {51, 56, 7, 62, 58, 6, 0, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 2, 10, 61, 8, 54, 46, 44, 57, 21, 59, 55, 42, 11, 35, 3, 4, 36, -1, 13, 45, 25, 26, 24, -1, 33, 50, 40, 31, 47, 34, 18, 49, 29, 39, 27, 15, 32, 43, 28, 30, 23, 17, 20, 22, 53, 41, 38, 37}}, + /* 2Nm1 */ {7, 16, {-1, -1, -1, -1, -1, 9, 59, -1, -1, -1, -1, -1, 60, 16, -1, -1, -1, -1, -1, 7, 57, -1, -1, -1, -1, 61, 62, 14, -1, -1, -1, -1, 21, 63, 54, -1, -1, -1, -1, 19, 8, 6, -1, -1, -1, -1, 56, 58, 51, -1, -1, -1, 12, 10, 55, 48, -1, -1, -1, 52, 0, 5, 46, -1, -1, -1, 1, 44, 4, 2, -1, -1, -1, 3, 35, 36, 42, -1, -1, 13, 17, 33, 37, 11, -1, -1, 32, 18, 20, 23, 34, -1, 38, 43, 22, 45, 26, 15, 27, 24, 25, 49, 50, 41, 47, 53, 28, 29, 39, 40, 31, 30}}, + /* 2Nn1 */ {5, 16, {-1, -1, -1, -1, 59, -1, -1, -1, 63, 7, -1, -1, -1, 21, 60, -1, -1, 19, 9, 6, -1, 0, 56, 61, 58, -1, 1, 12, 62, 57, 20, 44, 10, 8, 54, 45, 13, 52, 14, 5, 50, 17, 51, 55, 48, -1, 22, 3, 46, 2, -1, 43, 35, 4, 42, -1, 53, 33, 15, 38, -1, 27, 32, 37, 23, -1, -1, 24, 47, 49, -1, 28, 25, 26, 41, -1, 40, 29, 30, 39}}, + /* 2No1 */ {19, 7, {35, 44, 12, 61, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 42, 10, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 3, 52, 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 32, 34, 0, 56, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 38, 18, 13, 11, 1, 58, 16, 14, 57, 54, 55, 6, 5, 51, 59, 8, 60, 62, 7, 27, 49, 24, 20, 22, 36, 37, 23, 47, 26, 15, 4, -1, -1, -1, -1, -1, -1, -1, 53, 50, 25, 43, 45, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + /* 2Np1 */ {5, 16, {-1, -1, -1, 60, 59, -1, -1, 9, 62, 58, -1, 21, 61, 7, 14, -1, 16, 19, 63, 54, -1, 12, 56, 8, 6, -1, 52, 10, 57, 5, -1, 0, 51, 55, 46, -1, 2, 1, 48, 42, -1, 44, 3, 4, 36, -1, 35, 11, 34, 15, -1, 13, 33, 17, 37, -1, 32, 38, 23, 18, -1, 20, 45, 24, 22, 43, 49, 50, 41, 47, 25, 28, 53, 30, 26, 39, 29, 40, 31, 27}}, + /* 2Nq1 */ {6, 8, {51, 56, 7, 62, 58, 6, 0, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 44, 10, 61, 8, 54, 46, 13, 57, 21, 59, 55, 2, 33, 35, 3, 36, 42, 4, 49, 45, 25, 26, 47, 23, 50, 53, 40, 31, 30, 41}}, + /* 2Nr1 */ {6, 9, {0, 56, 7, 62, 58, 6, 1, 12, 9, 63, 16, 5, 44, 52, 19, 60, 14, 48, 35, 10, 61, 8, 54, 46, 13, 57, 21, 59, 55, 2, 32, 33, 3, 36, 42, 4, 20, 45, 25, 47, 37, 15, 43, 49, 40, 30, 27, 23, 50, 53, 29, 31, 41, 26}}, + /* 2Ns1 */ {6, 10, {0, 56, 7, 62, 58, 6, 51, 12, 9, 63, 16, 5, 1, 52, 19, 60, 14, 48, 11, 10, 61, 8, 54, 46, 44, 57, 21, 59, 55, 2, 13, 35, 3, 36, 42, 4, 33, 45, 25, 26, 24, 34, 32, 49, 40, 31, 47, 15, 20, 50, 29, 39, 27, 17, 43, 53, 28, 30, 23, 37}}, + /* 2Nt1 */ {18, 8, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 21, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 54, 59, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 11, 42, 48, 10, 19, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 33, 3, 1, 6, 16, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 38, 32, 35, 2, 51, 57, 60, -1, -1, -1, -1, -1, -1, -1, -1, 43, 45, 29, 25, 20, 13, 44, 52, 56, 61, -1, -1, -1, -1, -1, -1, -1, 41, 31, 39, 30, 26, 23, 15, 46, 55, 58, 62, 63, 49, 50, 53, 40, 28, 27, 24, 47, 37, 22, 17, 34, 36, 4, 5, 14, 8, -1}}, + /* 2Nu1 */ {4, 15, {52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nv1 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nv2 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nv3 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nv4 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nv5 */ {4, 16, {10, 61, 59, 58, 52, 9, 60, 14, 0, 19, 62, 54, 1, 21, 7, 46, 44, 56, 63, 6, 3, 16, 8, 5, 32, 12, 57, 36, 11, 51, 55, 42, 33, 2, 48, 15, 38, 35, 4, 37, 43, 13, 34, 18, 49, 20, 17, 22, 50, 45, 23, 47, 28, 25, 24, 26, 29, 53, 41, 27, 40, 31, 30, 39}}, + /* 2Nw1 */ {10, 8, {10, 56, 61, 7, 62, 60, 57, 55, -1, -1, 52, 12, 19, 9, 63, 59, 14, 51, -1, -1, 0, 6, 16, 21, 8, 58, 54, 5, -1, -1, 3, 1, 2, 44, 42, 4, 46, 48, -1, -1, 35, 33, 34, 13, 11, 36, 15, 17, -1, -1, 32, 38, 49, 53, 41, 26, 47, 23, 18, 37, 20, 45, 50, 40, 31, 27, 24, 22, -1, -1, 43, 25, 29, 39, 30, 28, -1, -1, -1, -1}}}, + /* PS */ + {{0.75, 0.5}, + {0.75, 1}, + {0.75, 2}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType1 { public: - CathodeSegmentationCreatorRegisterCreateSegType1() { registerCathodeSegmentationCreator(1, createSegType1); } + CathodeSegmentationCreatorRegisterCreateSegType1() + { + registerCathodeSegmentationCreator(1, createSegType1); + } } aCathodeSegmentationCreatorRegisterCreateSegType1; } // namespace impl4 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType10.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType10.cxx index 7aa9994c436f4..b616749501279 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType10.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType10.cxx @@ -29,7 +29,56 @@ CathodeSegmentation* createSegType10(bool isBendingPlane) 10, true, /* PG */ - {{1, 3, 0, -100, -20}, {2, 12, 0, -97.5, -20}, {3, 6, 0, -92.5, -20}, {4, 13, 0, -90, -20}, {5, 4, 0, -85, -20}, {6, 3, 0, -80, -20}, {7, 12, 0, -77.5, -20}, {8, 6, 0, -72.5, -20}, {9, 13, 0, -70, -20}, {10, 4, 0, -65, -20}, {18, 3, 1, -60, -20}, {19, 12, 1, -55, -20}, {20, 6, 1, -45, -20}, {21, 13, 1, -40, -20}, {22, 4, 1, -30, -20}, {103, 8, 2, 60, -20}, {104, 8, 2, 80, -20}, {107, 8, 2, 20, -20}, {108, 8, 2, 40, -20}, {112, 3, 1, -20, -20}, {113, 12, 1, -15, -20}, {114, 6, 1, -5, -20}, {115, 13, 1, 0, -20}, {116, 4, 1, 10, -20}, {201, 0, 2, 80, -4}, {202, 7, 2, 70, 4}, {203, 5, 2, 60, -4}, {206, 0, 2, 40, -4}, {207, 7, 2, 30, 4}, {208, 5, 2, 20, -4}, {211, 2, 1, 10, 0}, {212, 11, 1, 0, 0}, {213, 9, 1, -5, 4}, {214, 10, 1, -15, 0}, {215, 1, 1, -20, 0}, {308, 2, 0, -65, 0}, {309, 11, 0, -70, 0}, {310, 9, 0, -72.5, 4}, {311, 10, 0, -77.5, 0}, {312, 1, 0, -80, 0}, {313, 2, 0, -85, 0}, {314, 11, 0, -90, 0}, {315, 9, 0, -92.5, 4}, {316, 10, 0, -97.5, 0}, {317, 1, 0, -100, 0}, {321, 2, 1, -30, 0}, {322, 11, 1, -40, 0}, {323, 9, 1, -45, 4}, {324, 10, 1, -55, 0}, {325, 1, 1, -60, 0}}, + {{1, 3, 0, -100, -20}, + {2, 12, 0, -97.5, -20}, + {3, 6, 0, -92.5, -20}, + {4, 13, 0, -90, -20}, + {5, 4, 0, -85, -20}, + {6, 3, 0, -80, -20}, + {7, 12, 0, -77.5, -20}, + {8, 6, 0, -72.5, -20}, + {9, 13, 0, -70, -20}, + {10, 4, 0, -65, -20}, + {18, 3, 1, -60, -20}, + {19, 12, 1, -55, -20}, + {20, 6, 1, -45, -20}, + {21, 13, 1, -40, -20}, + {22, 4, 1, -30, -20}, + {103, 8, 2, 60, -20}, + {104, 8, 2, 80, -20}, + {107, 8, 2, 20, -20}, + {108, 8, 2, 40, -20}, + {112, 3, 1, -20, -20}, + {113, 12, 1, -15, -20}, + {114, 6, 1, -5, -20}, + {115, 13, 1, 0, -20}, + {116, 4, 1, 10, -20}, + {201, 0, 2, 80, -4}, + {202, 7, 2, 70, 4}, + {203, 5, 2, 60, -4}, + {206, 0, 2, 40, -4}, + {207, 7, 2, 30, 4}, + {208, 5, 2, 20, -4}, + {211, 2, 1, 10, 0}, + {212, 11, 1, 0, 0}, + {213, 9, 1, -5, 4}, + {214, 10, 1, -15, 0}, + {215, 1, 1, -20, 0}, + {308, 2, 0, -65, 0}, + {309, 11, 0, -70, 0}, + {310, 9, 0, -72.5, 4}, + {311, 10, 0, -77.5, 0}, + {312, 1, 0, -80, 0}, + {313, 2, 0, -85, 0}, + {314, 11, 0, -90, 0}, + {315, 9, 0, -92.5, 4}, + {316, 10, 0, -97.5, 0}, + {317, 1, 0, -100, 0}, + {321, 2, 1, -30, 0}, + {322, 11, 1, -40, 0}, + {323, 9, 1, -45, 4}, + {324, 10, 1, -55, 0}, + {325, 1, 1, -60, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +93,11 @@ CathodeSegmentation* createSegType10(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}, {10, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 10, @@ -120,7 +161,9 @@ CathodeSegmentation* createSegType10(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType10 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType11.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType11.cxx index 376447f9c78a2..2d268592f8574 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType11.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType11.cxx @@ -29,10 +29,75 @@ CathodeSegmentation* createSegType11(bool isBendingPlane) 11, true, /* PG */ - {{1, 10, 0, 80, -20}, {9, 17, 0, 40, -20}, {10, 25, 0, 42.5, -20}, {11, 19, 0, 47.5, -20}, {12, 26, 0, 50, -20}, {13, 18, 0, 55, -20}, {14, 17, 0, 60, -20}, {15, 25, 0, 62.5, -20}, {16, 19, 0, 67.5, -20}, {17, 26, 0, 70, -20}, {18, 18, 0, 75, -20}, {22, 17, 1, 0, -20}, {23, 25, 1, 5, -20}, {24, 19, 1, 15, -20}, {25, 26, 1, 20, -20}, {26, 18, 1, 30, -20}, {101, 13, 2, -120, -20}, {102, 21, 2, -110, -20}, {103, 14, 2, -100, -20}, {106, 13, 2, -80, -20}, {107, 21, 2, -70, -20}, {108, 14, 2, -60, -20}, {111, 17, 1, -40, -20}, {112, 25, 1, -35, -20}, {113, 19, 1, -25, -20}, {114, 26, 1, -20, -20}, {115, 18, 1, -10, -20}, {203, 20, 2, -100, 4}, {204, 20, 2, -120, 4}, {207, 20, 2, -60, 4}, {208, 20, 2, -80, 4}, {212, 16, 1, -10, 0}, {213, 24, 1, -20, 0}, {214, 22, 1, -25, 4}, {215, 23, 1, -35, 0}, {216, 15, 1, -40, 0}, {311, 16, 0, 75, 0}, {312, 24, 0, 70, 0}, {313, 22, 0, 67.5, 4}, {314, 23, 0, 62.5, 0}, {315, 15, 0, 60, 0}, {316, 16, 0, 55, 0}, {317, 24, 0, 50, 0}, {318, 22, 0, 47.5, 4}, {319, 23, 0, 42.5, 0}, {320, 15, 0, 40, 0}, {328, 16, 1, 30, 0}, {329, 24, 1, 20, 0}, {330, 22, 1, 15, 4}, {331, 23, 1, 5, 0}, {332, 15, 1, 0, 0}, {401, 11, 0, 112.5, 0.5}, {402, 12, 0, 110, 0}, {403, 0, 0, 105, -1.5}, {404, 1, 0, 102.5, -2.5}, {405, 2, 0, 100, -3.5}, {406, 3, 0, 95, -6.5}, {407, 4, 0, 95, -5}, {407, 27, 0, 95, -8.5}, {407, 28, 0, 92.5, -9.5}, {408, 5, 0, 92.5, -8.5}, {408, 29, 0, 90, -11}, {409, 6, 0, 90, -10}, {410, 7, 0, 87.5, -12}, {411, 7, 0, 85, -12}, {412, 8, 0, 82.5, -11.5}, {413, 9, 0, 80, -11.5}}, + {{1, 10, 0, 80, -20}, + {9, 17, 0, 40, -20}, + {10, 25, 0, 42.5, -20}, + {11, 19, 0, 47.5, -20}, + {12, 26, 0, 50, -20}, + {13, 18, 0, 55, -20}, + {14, 17, 0, 60, -20}, + {15, 25, 0, 62.5, -20}, + {16, 19, 0, 67.5, -20}, + {17, 26, 0, 70, -20}, + {18, 18, 0, 75, -20}, + {22, 17, 1, 0, -20}, + {23, 25, 1, 5, -20}, + {24, 19, 1, 15, -20}, + {25, 26, 1, 20, -20}, + {26, 18, 1, 30, -20}, + {101, 13, 2, -120, -20}, + {102, 21, 2, -110, -20}, + {103, 14, 2, -100, -20}, + {106, 13, 2, -80, -20}, + {107, 21, 2, -70, -20}, + {108, 14, 2, -60, -20}, + {111, 17, 1, -40, -20}, + {112, 25, 1, -35, -20}, + {113, 19, 1, -25, -20}, + {114, 26, 1, -20, -20}, + {115, 18, 1, -10, -20}, + {203, 20, 2, -100, 4}, + {204, 20, 2, -120, 4}, + {207, 20, 2, -60, 4}, + {208, 20, 2, -80, 4}, + {212, 16, 1, -10, 0}, + {213, 24, 1, -20, 0}, + {214, 22, 1, -25, 4}, + {215, 23, 1, -35, 0}, + {216, 15, 1, -40, 0}, + {311, 16, 0, 75, 0}, + {312, 24, 0, 70, 0}, + {313, 22, 0, 67.5, 4}, + {314, 23, 0, 62.5, 0}, + {315, 15, 0, 60, 0}, + {316, 16, 0, 55, 0}, + {317, 24, 0, 50, 0}, + {318, 22, 0, 47.5, 4}, + {319, 23, 0, 42.5, 0}, + {320, 15, 0, 40, 0}, + {328, 16, 1, 30, 0}, + {329, 24, 1, 20, 0}, + {330, 22, 1, 15, 4}, + {331, 23, 1, 5, 0}, + {332, 15, 1, 0, 0}, + {401, 11, 0, 112.5, 0.5}, + {402, 12, 0, 110, 0}, + {403, 0, 0, 105, -1.5}, + {404, 1, 0, 102.5, -2.5}, + {405, 2, 0, 100, -3.5}, + {406, 3, 0, 95, -6.5}, + {407, 4, 0, 95, -5}, + {407, 27, 0, 95, -8.5}, + {407, 28, 0, 92.5, -9.5}, + {408, 5, 0, 92.5, -8.5}, + {408, 29, 0, 90, -11}, + {409, 6, 0, 90, -10}, + {410, 7, 0, 87.5, -12}, + {411, 7, 0, 85, -12}, + {412, 8, 0, 82.5, -11.5}, + {413, 9, 0, 80, -11.5}}, /* PGT */ - {/* E10 */ { - 4, 43, {-1, 34, -1, -1, -1, 37, -1, -1, -1, 32, 18, -1, -1, 39, -1, -1, -1, 40, -1, -1, -1, 42, -1, -1, -1, 48, -1, -1, -1, 49, -1, -1, -1, 52, -1, -1, -1, 54, -1, -1, -1, 19, -1, -1, -1, 12, -1, -1, -1, 14, -1, -1, 63, 11, -1, -1, 62, 13, -1, -1, 61, 7, -1, -1, 60, 8, -1, -1, 53, 5, -1, -1, 51, 3, -1, -1, 50, 0, -1, -1, 47, 4, -1, -1, 45, 9, -1, -1, 44, 17, -1, -1, 33, 15, -1, -1, 36, 10, -1, -1, 41, 2, -1, -1, 59, 55, -1, -1, -1, 43, 22, -1, -1, 38, 6, -1, -1, 57, 58, -1, -1, -1, 46, 25, -1, -1, 35, 1, -1, -1, 56, 16, -1, -1, -1, 21, -1, -1, -1, 20, -1, -1, -1, 23, -1, -1, -1, 24, -1, -1, -1, 26, -1, -1, -1, 27, -1, -1, -1, 28, -1, -1, -1, 29, -1, -1, -1, 30, -1, -1, -1, 31}}, + {/* E10 */ {4, 43, {-1, 34, -1, -1, -1, 37, -1, -1, -1, 32, 18, -1, -1, 39, -1, -1, -1, 40, -1, -1, -1, 42, -1, -1, -1, 48, -1, -1, -1, 49, -1, -1, -1, 52, -1, -1, -1, 54, -1, -1, -1, 19, -1, -1, -1, 12, -1, -1, -1, 14, -1, -1, 63, 11, -1, -1, 62, 13, -1, -1, 61, 7, -1, -1, 60, 8, -1, -1, 53, 5, -1, -1, 51, 3, -1, -1, 50, 0, -1, -1, 47, 4, -1, -1, 45, 9, -1, -1, 44, 17, -1, -1, 33, 15, -1, -1, 36, 10, -1, -1, 41, 2, -1, -1, 59, 55, -1, -1, -1, 43, 22, -1, -1, 38, 6, -1, -1, 57, 58, -1, -1, -1, 46, 25, -1, -1, 35, 1, -1, -1, 56, 16, -1, -1, -1, 21, -1, -1, -1, 20, -1, -1, -1, 23, -1, -1, -1, 24, -1, -1, -1, 26, -1, -1, -1, 27, -1, -1, -1, 28, -1, -1, -1, 29, -1, -1, -1, 30, -1, -1, -1, 31}}, /* E11 */ {4, 45, {-1, 12, -1, -1, -1, 14, -1, -1, -1, 11, -1, -1, -1, 13, -1, -1, -1, 7, -1, -1, -1, 2, -1, -1, -1, 6, -1, -1, -1, 1, -1, -1, -1, 3, -1, -1, -1, 0, -1, -1, 51, 4, -1, -1, 50, 9, -1, -1, 41, 18, -1, -1, 38, 22, -1, -1, 35, 25, -1, -1, 36, -1, -1, -1, 33, -1, -1, -1, 34, -1, -1, -1, 37, -1, -1, -1, 32, -1, -1, -1, 43, -1, -1, -1, 46, -1, -1, -1, 48, -1, -1, -1, 49, -1, -1, -1, 52, -1, -1, -1, 21, -1, -1, -1, 16, -1, -1, -1, 19, -1, -1, -1, 54, -1, -1, -1, 39, 10, -1, -1, 47, 8, -1, -1, -1, 55, -1, -1, -1, 40, 15, -1, -1, 45, 5, -1, -1, -1, 58, -1, -1, -1, 42, 17, -1, -1, 44, 20, -1, -1, 53, 23, -1, -1, 56, 24, -1, -1, 57, 26, -1, -1, 59, 27, -1, -1, 60, 28, -1, -1, 61, 29, -1, -1, 62, 30, -1, -1, 63, 31}}, /* E12 */ {3, 47, {56, 5, -1, 53, 2, -1, 51, 6, -1, 50, 1, -1, 47, 3, -1, 41, 0, -1, 38, 10, -1, 35, 15, -1, 36, 17, -1, 33, 18, -1, 34, 22, -1, 37, 25, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 30, -1, -1, 29, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 8, -1, -1, 7, -1, -1, 14, -1, -1, 12, -1, -1, 28, -1, -1, 55, -1, -1, 32, 4, -1, 45, 11, -1, -1, 27, -1, -1, 58, 9, -1, 39, 13, -1, 44, 26, -1, -1, 31, -1, -1, 40, -1, -1, 57, -1, -1, 59, -1, -1, 60, -1, -1, 61, -1, -1, 62, -1, -1, 63}}, /* E13 */ {4, 53, {-1, 60, -1, -1, 61, 59, -1, -1, 62, 57, -1, -1, -1, 56, 15, -1, -1, 50, 22, -1, -1, 47, 25, -1, -1, 45, -1, -1, -1, 44, -1, -1, -1, 41, -1, -1, -1, 38, -1, -1, -1, 35, -1, -1, -1, 34, -1, -1, -1, 10, -1, -1, -1, 9, -1, -1, -1, 4, -1, -1, -1, 0, -1, -1, -1, 3, -1, -1, -1, 1, -1, -1, -1, 5, -1, -1, -1, 8, -1, -1, -1, 7, -1, -1, -1, 13, -1, -1, -1, 11, -1, -1, -1, 14, -1, -1, -1, 16, -1, -1, -1, 21, -1, -1, -1, 20, -1, -1, -1, 23, -1, -1, -1, 24, -1, -1, -1, 26, -1, -1, -1, 55, -1, -1, -1, 54, -1, -1, -1, 52, -1, -1, -1, 49, -1, -1, -1, 48, -1, -1, -1, 43, -1, -1, -1, 42, -1, -1, -1, 40, -1, -1, -1, 39, -1, -1, -1, 32, -1, -1, -1, 37, -1, -1, -1, 36, 17, -1, -1, 53, 2, -1, -1, -1, 19, -1, -1, -1, 58, -1, -1, -1, 46, 18, -1, -1, 33, 6, -1, -1, 51, 12, -1, -1, 63, 27, -1, -1, -1, 28, -1, -1, -1, 29, -1, -1, -1, 30, -1, -1, -1, 31}}, @@ -63,7 +128,9 @@ CathodeSegmentation* createSegType11(bool isBendingPlane) /* E14 */ {1, 2, {62, 63}}, /* E15 */ {2, 3, {-1, 62, 63, 61, -1, 60}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}, {10, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 11, @@ -145,7 +212,9 @@ CathodeSegmentation* createSegType11(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType11 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType12.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType12.cxx index 83db3a7b77399..e57a7dcd5c600 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType12.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType12.cxx @@ -29,7 +29,71 @@ CathodeSegmentation* createSegType12(bool isBendingPlane) 12, true, /* PG */ - {{1, 3, 0, -100, -20}, {2, 12, 0, -97.5, -20}, {3, 6, 0, -92.5, -20}, {4, 13, 0, -90, -20}, {5, 4, 0, -85, -20}, {6, 3, 0, -80, -20}, {7, 12, 0, -77.5, -20}, {8, 6, 0, -72.5, -20}, {9, 13, 0, -70, -20}, {10, 4, 0, -65, -20}, {18, 3, 0, -60, -20}, {19, 12, 0, -57.5, -20}, {20, 6, 0, -52.5, -20}, {21, 13, 0, -50, -20}, {22, 4, 0, -45, -20}, {23, 3, 0, -40, -20}, {24, 12, 0, -37.5, -20}, {25, 6, 0, -32.5, -20}, {26, 13, 0, -30, -20}, {27, 4, 0, -25, -20}, {103, 8, 2, 60, -20}, {104, 8, 2, 80, -20}, {108, 3, 1, 20, -20}, {109, 12, 1, 25, -20}, {110, 6, 1, 35, -20}, {111, 13, 1, 40, -20}, {112, 4, 1, 50, -20}, {116, 3, 1, -20, -20}, {117, 12, 1, -15, -20}, {118, 6, 1, -5, -20}, {119, 13, 1, 0, -20}, {120, 4, 1, 10, -20}, {201, 0, 2, 80, -4}, {202, 7, 2, 70, 4}, {203, 5, 2, 60, -4}, {206, 2, 1, 50, 0}, {207, 11, 1, 40, 0}, {208, 9, 1, 35, 4}, {209, 10, 1, 25, 0}, {210, 1, 1, 20, 0}, {215, 2, 1, 10, 0}, {216, 11, 1, 0, 0}, {217, 9, 1, -5, 4}, {218, 10, 1, -15, 0}, {219, 1, 1, -20, 0}, {308, 2, 0, -65, 0}, {309, 11, 0, -70, 0}, {310, 9, 0, -72.5, 4}, {311, 10, 0, -77.5, 0}, {312, 1, 0, -80, 0}, {313, 2, 0, -85, 0}, {314, 11, 0, -90, 0}, {315, 9, 0, -92.5, 4}, {316, 10, 0, -97.5, 0}, {317, 1, 0, -100, 0}, {325, 2, 0, -25, 0}, {326, 11, 0, -30, 0}, {327, 9, 0, -32.5, 4}, {328, 10, 0, -37.5, 0}, {329, 1, 0, -40, 0}, {330, 2, 0, -45, 0}, {331, 11, 0, -50, 0}, {332, 9, 0, -52.5, 4}, {333, 10, 0, -57.5, 0}, {334, 1, 0, -60, 0}}, + {{1, 3, 0, -100, -20}, + {2, 12, 0, -97.5, -20}, + {3, 6, 0, -92.5, -20}, + {4, 13, 0, -90, -20}, + {5, 4, 0, -85, -20}, + {6, 3, 0, -80, -20}, + {7, 12, 0, -77.5, -20}, + {8, 6, 0, -72.5, -20}, + {9, 13, 0, -70, -20}, + {10, 4, 0, -65, -20}, + {18, 3, 0, -60, -20}, + {19, 12, 0, -57.5, -20}, + {20, 6, 0, -52.5, -20}, + {21, 13, 0, -50, -20}, + {22, 4, 0, -45, -20}, + {23, 3, 0, -40, -20}, + {24, 12, 0, -37.5, -20}, + {25, 6, 0, -32.5, -20}, + {26, 13, 0, -30, -20}, + {27, 4, 0, -25, -20}, + {103, 8, 2, 60, -20}, + {104, 8, 2, 80, -20}, + {108, 3, 1, 20, -20}, + {109, 12, 1, 25, -20}, + {110, 6, 1, 35, -20}, + {111, 13, 1, 40, -20}, + {112, 4, 1, 50, -20}, + {116, 3, 1, -20, -20}, + {117, 12, 1, -15, -20}, + {118, 6, 1, -5, -20}, + {119, 13, 1, 0, -20}, + {120, 4, 1, 10, -20}, + {201, 0, 2, 80, -4}, + {202, 7, 2, 70, 4}, + {203, 5, 2, 60, -4}, + {206, 2, 1, 50, 0}, + {207, 11, 1, 40, 0}, + {208, 9, 1, 35, 4}, + {209, 10, 1, 25, 0}, + {210, 1, 1, 20, 0}, + {215, 2, 1, 10, 0}, + {216, 11, 1, 0, 0}, + {217, 9, 1, -5, 4}, + {218, 10, 1, -15, 0}, + {219, 1, 1, -20, 0}, + {308, 2, 0, -65, 0}, + {309, 11, 0, -70, 0}, + {310, 9, 0, -72.5, 4}, + {311, 10, 0, -77.5, 0}, + {312, 1, 0, -80, 0}, + {313, 2, 0, -85, 0}, + {314, 11, 0, -90, 0}, + {315, 9, 0, -92.5, 4}, + {316, 10, 0, -97.5, 0}, + {317, 1, 0, -100, 0}, + {325, 2, 0, -25, 0}, + {326, 11, 0, -30, 0}, + {327, 9, 0, -32.5, 4}, + {328, 10, 0, -37.5, 0}, + {329, 1, 0, -40, 0}, + {330, 2, 0, -45, 0}, + {331, 11, 0, -50, 0}, + {332, 9, 0, -52.5, 4}, + {333, 10, 0, -57.5, 0}, + {334, 1, 0, -60, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +108,11 @@ CathodeSegmentation* createSegType12(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}, {10, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 12, @@ -123,7 +179,9 @@ CathodeSegmentation* createSegType12(bool isBendingPlane) /* Q1 */ {14, 5, {-1, -1, -1, -1, 19, 24, 30, 52, 42, 34, 41, 51, -1, -1, 17, 4, 6, 7, 12, 23, 29, 54, 43, 37, 38, 50, 59, 63, 18, 9, 1, 8, 14, 20, 28, 55, 46, 32, 35, 47, 57, 62, 22, 10, 3, 5, 11, 21, 27, 58, 48, 39, 36, 45, 56, 61, 25, 15, 0, 2, 13, 16, 26, 31, 49, 40, 33, 44, 53, 60}}, /* Q2 */ {14, 5, {-1, -1, 2, 11, 21, 27, 58, 48, 39, 36, -1, -1, -1, -1, 17, 4, 6, 13, 16, 26, 31, 49, 40, 33, 44, 51, 59, 63, 18, 9, 1, 7, 19, 24, 30, 52, 42, 34, 41, 50, 57, 62, 22, 10, 3, 8, 12, 23, 29, 54, 43, 37, 38, 47, 56, 61, 25, 15, 0, 5, 14, 20, 28, 55, 46, 32, 35, 45, 53, 60}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType12 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType13.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType13.cxx index e1acc4ba2c4f3..c15762af0b9ed 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType13.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType13.cxx @@ -29,7 +29,46 @@ CathodeSegmentation* createSegType13(bool isBendingPlane) 13, true, /* PG */ - {{1, 3, 0, -100, -20}, {2, 12, 0, -95, -20}, {3, 6, 0, -85, -20}, {4, 13, 0, -80, -20}, {5, 4, 0, -70, -20}, {10, 3, 0, -60, -20}, {11, 12, 0, -55, -20}, {12, 6, 0, -45, -20}, {13, 13, 0, -40, -20}, {14, 4, 0, -30, -20}, {103, 8, 1, 60, -20}, {104, 8, 1, 80, -20}, {107, 8, 1, 20, -20}, {108, 8, 1, 40, -20}, {112, 3, 0, -20, -20}, {113, 12, 0, -15, -20}, {114, 6, 0, -5, -20}, {115, 13, 0, 0, -20}, {116, 4, 0, 10, -20}, {201, 0, 1, 80, -4}, {202, 7, 1, 70, 4}, {203, 5, 1, 60, -4}, {206, 0, 1, 40, -4}, {207, 7, 1, 30, 4}, {208, 5, 1, 20, -4}, {211, 2, 0, 10, 0}, {212, 11, 0, 0, 0}, {213, 9, 0, -5, 4}, {214, 10, 0, -15, 0}, {215, 1, 0, -20, 0}, {304, 2, 0, -70, 0}, {305, 11, 0, -80, 0}, {306, 9, 0, -85, 4}, {307, 10, 0, -95, 0}, {308, 1, 0, -100, 0}, {312, 2, 0, -30, 0}, {313, 11, 0, -40, 0}, {314, 9, 0, -45, 4}, {315, 10, 0, -55, 0}, {316, 1, 0, -60, 0}}, + {{1, 3, 0, -100, -20}, + {2, 12, 0, -95, -20}, + {3, 6, 0, -85, -20}, + {4, 13, 0, -80, -20}, + {5, 4, 0, -70, -20}, + {10, 3, 0, -60, -20}, + {11, 12, 0, -55, -20}, + {12, 6, 0, -45, -20}, + {13, 13, 0, -40, -20}, + {14, 4, 0, -30, -20}, + {103, 8, 1, 60, -20}, + {104, 8, 1, 80, -20}, + {107, 8, 1, 20, -20}, + {108, 8, 1, 40, -20}, + {112, 3, 0, -20, -20}, + {113, 12, 0, -15, -20}, + {114, 6, 0, -5, -20}, + {115, 13, 0, 0, -20}, + {116, 4, 0, 10, -20}, + {201, 0, 1, 80, -4}, + {202, 7, 1, 70, 4}, + {203, 5, 1, 60, -4}, + {206, 0, 1, 40, -4}, + {207, 7, 1, 30, 4}, + {208, 5, 1, 20, -4}, + {211, 2, 0, 10, 0}, + {212, 11, 0, 0, 0}, + {213, 9, 0, -5, 4}, + {214, 10, 0, -15, 0}, + {215, 1, 0, -20, 0}, + {304, 2, 0, -70, 0}, + {305, 11, 0, -80, 0}, + {306, 9, 0, -85, 4}, + {307, 10, 0, -95, 0}, + {308, 1, 0, -100, 0}, + {312, 2, 0, -30, 0}, + {313, 11, 0, -40, 0}, + {314, 9, 0, -45, 4}, + {315, 10, 0, -55, 0}, + {316, 1, 0, -60, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +83,10 @@ CathodeSegmentation* createSegType13(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{5, 0.5}, {10, 0.5}}}; + {{5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 13, @@ -111,7 +141,8 @@ CathodeSegmentation* createSegType13(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType13 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType14.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType14.cxx index c7cf594bf0769..db0b5377acb10 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType14.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType14.cxx @@ -29,7 +29,36 @@ CathodeSegmentation* createSegType14(bool isBendingPlane) 14, true, /* PG */ - {{1, 3, 0, -80, -20}, {2, 12, 0, -75, -20}, {3, 6, 0, -65, -20}, {4, 13, 0, -60, -20}, {5, 4, 0, -50, -20}, {103, 8, 1, 40, -20}, {104, 8, 1, 60, -20}, {107, 8, 1, 0, -20}, {108, 8, 1, 20, -20}, {112, 3, 0, -40, -20}, {113, 12, 0, -35, -20}, {114, 6, 0, -25, -20}, {115, 13, 0, -20, -20}, {116, 4, 0, -10, -20}, {201, 0, 1, 60, -4}, {202, 7, 1, 50, 4}, {203, 5, 1, 40, -4}, {206, 0, 1, 20, -4}, {207, 7, 1, 10, 4}, {208, 5, 1, 0, -4}, {211, 2, 0, -10, 0}, {212, 11, 0, -20, 0}, {213, 9, 0, -25, 4}, {214, 10, 0, -35, 0}, {215, 1, 0, -40, 0}, {304, 2, 0, -50, 0}, {305, 11, 0, -60, 0}, {306, 9, 0, -65, 4}, {307, 10, 0, -75, 0}, {308, 1, 0, -80, 0}}, + {{1, 3, 0, -80, -20}, + {2, 12, 0, -75, -20}, + {3, 6, 0, -65, -20}, + {4, 13, 0, -60, -20}, + {5, 4, 0, -50, -20}, + {103, 8, 1, 40, -20}, + {104, 8, 1, 60, -20}, + {107, 8, 1, 0, -20}, + {108, 8, 1, 20, -20}, + {112, 3, 0, -40, -20}, + {113, 12, 0, -35, -20}, + {114, 6, 0, -25, -20}, + {115, 13, 0, -20, -20}, + {116, 4, 0, -10, -20}, + {201, 0, 1, 60, -4}, + {202, 7, 1, 50, 4}, + {203, 5, 1, 40, -4}, + {206, 0, 1, 20, -4}, + {207, 7, 1, 10, 4}, + {208, 5, 1, 0, -4}, + {211, 2, 0, -10, 0}, + {212, 11, 0, -20, 0}, + {213, 9, 0, -25, 4}, + {214, 10, 0, -35, 0}, + {215, 1, 0, -40, 0}, + {304, 2, 0, -50, 0}, + {305, 11, 0, -60, 0}, + {306, 9, 0, -65, 4}, + {307, 10, 0, -75, 0}, + {308, 1, 0, -80, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +73,10 @@ CathodeSegmentation* createSegType14(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{5, 0.5}, {10, 0.5}}}; + {{5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 14, @@ -104,7 +124,8 @@ CathodeSegmentation* createSegType14(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType14 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType15.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType15.cxx index a9d03a72d8dff..65c55da52d57d 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType15.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType15.cxx @@ -48,8 +48,7 @@ CathodeSegmentation* createSegType15(bool isBendingPlane) {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L9 */ {2, 48, {13, 7, 11, 8, 14, 5, 12, 2, 19, 6, 16, 1, 21, 3, 20, 0, 23, 4, 24, 9, 26, 10, 27, 15, 28, 17, 29, 18, 30, 22, 31, 25, 58, -1, 55, -1, 54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* O11 */ {2, 32, {58, 31, 55, 30, 54, 29, 52, 28, 49, 27, 48, 26, 46, 24, 43, 23, 42, 20, 40, 21, 39, 16, 32, 19, 37, 12, 34, 14, 33, 11, 36, 13, 35, 7, 38, 8, 41, 5, 44, 2, 45, 6, 47, 1, 50, 3, 51, 0, 53, 4, 56, 9, 57, 10, 59, 15, 60, 17, 61, 18, 62, 22, 63, 25}}, - /* O12 */ - {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, + /* O12 */ {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, /* PS */ {{10, 0.5}}}; } else { diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType16.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType16.cxx index cf884ef4cb265..32d7adc1b44c8 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType16.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType16.cxx @@ -43,8 +43,7 @@ CathodeSegmentation* createSegType16(bool isBendingPlane) {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L9 */ {2, 48, {13, 7, 11, 8, 14, 5, 12, 2, 19, 6, 16, 1, 21, 3, 20, 0, 23, 4, 24, 9, 26, 10, 27, 15, 28, 17, 29, 18, 30, 22, 31, 25, 58, -1, 55, -1, 54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* O11 */ {2, 32, {58, 31, 55, 30, 54, 29, 52, 28, 49, 27, 48, 26, 46, 24, 43, 23, 42, 20, 40, 21, 39, 16, 32, 19, 37, 12, 34, 14, 33, 11, 36, 13, 35, 7, 38, 8, 41, 5, 44, 2, 45, 6, 47, 1, 50, 3, 51, 0, 53, 4, 56, 9, 57, 10, 59, 15, 60, 17, 61, 18, 62, 22, 63, 25}}, - /* O12 */ - {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, + /* O12 */ {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, /* PS */ {{10, 0.5}}}; } else { diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType17.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType17.cxx index adeb2a7a1f3c7..3429fe59f04dc 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType17.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType17.cxx @@ -29,7 +29,76 @@ CathodeSegmentation* createSegType17(bool isBendingPlane) 17, true, /* PG */ - {{1, 3, 0, -120, -20}, {2, 12, 0, -117.5, -20}, {3, 6, 0, -112.5, -20}, {4, 13, 0, -110, -20}, {5, 4, 0, -105, -20}, {6, 3, 0, -100, -20}, {7, 12, 0, -97.5, -20}, {8, 6, 0, -92.5, -20}, {9, 13, 0, -90, -20}, {10, 4, 0, -85, -20}, {18, 3, 0, -80, -20}, {19, 12, 0, -77.5, -20}, {20, 6, 0, -72.5, -20}, {21, 13, 0, -70, -20}, {22, 4, 0, -65, -20}, {23, 3, 0, -60, -20}, {24, 12, 0, -57.5, -20}, {25, 6, 0, -52.5, -20}, {26, 13, 0, -50, -20}, {27, 4, 0, -45, -20}, {35, 3, 1, -40, -20}, {36, 12, 1, -35, -20}, {37, 6, 1, -25, -20}, {38, 13, 1, -20, -20}, {39, 4, 1, -10, -20}, {103, 8, 2, 80, -20}, {104, 8, 2, 100, -20}, {107, 8, 2, 40, -20}, {108, 8, 2, 60, -20}, {112, 3, 1, 0, -20}, {113, 12, 1, 5, -20}, {114, 6, 1, 15, -20}, {115, 13, 1, 20, -20}, {116, 4, 1, 30, -20}, {201, 0, 2, 100, -4}, {202, 7, 2, 90, 4}, {203, 5, 2, 80, -4}, {206, 0, 2, 60, -4}, {207, 7, 2, 50, 4}, {208, 5, 2, 40, -4}, {211, 2, 1, 30, 0}, {212, 11, 1, 20, 0}, {213, 9, 1, 15, 4}, {214, 10, 1, 5, 0}, {215, 1, 1, 0, 0}, {308, 2, 0, -85, 0}, {309, 11, 0, -90, 0}, {310, 9, 0, -92.5, 4}, {311, 10, 0, -97.5, 0}, {312, 1, 0, -100, 0}, {313, 2, 0, -105, 0}, {314, 11, 0, -110, 0}, {315, 9, 0, -112.5, 4}, {316, 10, 0, -117.5, 0}, {317, 1, 0, -120, 0}, {325, 2, 0, -45, 0}, {326, 11, 0, -50, 0}, {327, 9, 0, -52.5, 4}, {328, 10, 0, -57.5, 0}, {329, 1, 0, -60, 0}, {330, 2, 0, -65, 0}, {331, 11, 0, -70, 0}, {332, 9, 0, -72.5, 4}, {333, 10, 0, -77.5, 0}, {334, 1, 0, -80, 0}, {338, 2, 1, -10, 0}, {339, 11, 1, -20, 0}, {340, 9, 1, -25, 4}, {341, 10, 1, -35, 0}, {342, 1, 1, -40, 0}}, + {{1, 3, 0, -120, -20}, + {2, 12, 0, -117.5, -20}, + {3, 6, 0, -112.5, -20}, + {4, 13, 0, -110, -20}, + {5, 4, 0, -105, -20}, + {6, 3, 0, -100, -20}, + {7, 12, 0, -97.5, -20}, + {8, 6, 0, -92.5, -20}, + {9, 13, 0, -90, -20}, + {10, 4, 0, -85, -20}, + {18, 3, 0, -80, -20}, + {19, 12, 0, -77.5, -20}, + {20, 6, 0, -72.5, -20}, + {21, 13, 0, -70, -20}, + {22, 4, 0, -65, -20}, + {23, 3, 0, -60, -20}, + {24, 12, 0, -57.5, -20}, + {25, 6, 0, -52.5, -20}, + {26, 13, 0, -50, -20}, + {27, 4, 0, -45, -20}, + {35, 3, 1, -40, -20}, + {36, 12, 1, -35, -20}, + {37, 6, 1, -25, -20}, + {38, 13, 1, -20, -20}, + {39, 4, 1, -10, -20}, + {103, 8, 2, 80, -20}, + {104, 8, 2, 100, -20}, + {107, 8, 2, 40, -20}, + {108, 8, 2, 60, -20}, + {112, 3, 1, 0, -20}, + {113, 12, 1, 5, -20}, + {114, 6, 1, 15, -20}, + {115, 13, 1, 20, -20}, + {116, 4, 1, 30, -20}, + {201, 0, 2, 100, -4}, + {202, 7, 2, 90, 4}, + {203, 5, 2, 80, -4}, + {206, 0, 2, 60, -4}, + {207, 7, 2, 50, 4}, + {208, 5, 2, 40, -4}, + {211, 2, 1, 30, 0}, + {212, 11, 1, 20, 0}, + {213, 9, 1, 15, 4}, + {214, 10, 1, 5, 0}, + {215, 1, 1, 0, 0}, + {308, 2, 0, -85, 0}, + {309, 11, 0, -90, 0}, + {310, 9, 0, -92.5, 4}, + {311, 10, 0, -97.5, 0}, + {312, 1, 0, -100, 0}, + {313, 2, 0, -105, 0}, + {314, 11, 0, -110, 0}, + {315, 9, 0, -112.5, 4}, + {316, 10, 0, -117.5, 0}, + {317, 1, 0, -120, 0}, + {325, 2, 0, -45, 0}, + {326, 11, 0, -50, 0}, + {327, 9, 0, -52.5, 4}, + {328, 10, 0, -57.5, 0}, + {329, 1, 0, -60, 0}, + {330, 2, 0, -65, 0}, + {331, 11, 0, -70, 0}, + {332, 9, 0, -72.5, 4}, + {333, 10, 0, -77.5, 0}, + {334, 1, 0, -80, 0}, + {338, 2, 1, -10, 0}, + {339, 11, 1, -20, 0}, + {340, 9, 1, -25, 4}, + {341, 10, 1, -35, 0}, + {342, 1, 1, -40, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +113,11 @@ CathodeSegmentation* createSegType17(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}, {10, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 17, @@ -134,7 +195,9 @@ CathodeSegmentation* createSegType17(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType17 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType18.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType18.cxx index 74cd7c6c52cab..9a7099ac66e1e 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType18.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType18.cxx @@ -29,7 +29,51 @@ CathodeSegmentation* createSegType18(bool isBendingPlane) 18, true, /* PG */ - {{1, 3, 0, -120, -20}, {2, 12, 0, -115, -20}, {3, 6, 0, -105, -20}, {4, 13, 0, -100, -20}, {5, 4, 0, -90, -20}, {10, 3, 0, -80, -20}, {11, 12, 0, -75, -20}, {12, 6, 0, -65, -20}, {13, 13, 0, -60, -20}, {14, 4, 0, -50, -20}, {19, 3, 0, -40, -20}, {20, 12, 0, -35, -20}, {21, 6, 0, -25, -20}, {22, 13, 0, -20, -20}, {23, 4, 0, -10, -20}, {103, 8, 1, 80, -20}, {104, 8, 1, 100, -20}, {107, 8, 1, 40, -20}, {108, 8, 1, 60, -20}, {111, 8, 1, 0, -20}, {112, 8, 1, 20, -20}, {201, 0, 1, 100, -4}, {202, 7, 1, 90, 4}, {203, 5, 1, 80, -4}, {206, 0, 1, 60, -4}, {207, 7, 1, 50, 4}, {208, 5, 1, 40, -4}, {211, 0, 1, 20, -4}, {212, 7, 1, 10, 4}, {213, 5, 1, 0, -4}, {304, 2, 0, -90, 0}, {305, 11, 0, -100, 0}, {306, 9, 0, -105, 4}, {307, 10, 0, -115, 0}, {308, 1, 0, -120, 0}, {312, 2, 0, -50, 0}, {313, 11, 0, -60, 0}, {314, 9, 0, -65, 4}, {315, 10, 0, -75, 0}, {316, 1, 0, -80, 0}, {320, 2, 0, -10, 0}, {321, 11, 0, -20, 0}, {322, 9, 0, -25, 4}, {323, 10, 0, -35, 0}, {324, 1, 0, -40, 0}}, + {{1, 3, 0, -120, -20}, + {2, 12, 0, -115, -20}, + {3, 6, 0, -105, -20}, + {4, 13, 0, -100, -20}, + {5, 4, 0, -90, -20}, + {10, 3, 0, -80, -20}, + {11, 12, 0, -75, -20}, + {12, 6, 0, -65, -20}, + {13, 13, 0, -60, -20}, + {14, 4, 0, -50, -20}, + {19, 3, 0, -40, -20}, + {20, 12, 0, -35, -20}, + {21, 6, 0, -25, -20}, + {22, 13, 0, -20, -20}, + {23, 4, 0, -10, -20}, + {103, 8, 1, 80, -20}, + {104, 8, 1, 100, -20}, + {107, 8, 1, 40, -20}, + {108, 8, 1, 60, -20}, + {111, 8, 1, 0, -20}, + {112, 8, 1, 20, -20}, + {201, 0, 1, 100, -4}, + {202, 7, 1, 90, 4}, + {203, 5, 1, 80, -4}, + {206, 0, 1, 60, -4}, + {207, 7, 1, 50, 4}, + {208, 5, 1, 40, -4}, + {211, 0, 1, 20, -4}, + {212, 7, 1, 10, 4}, + {213, 5, 1, 0, -4}, + {304, 2, 0, -90, 0}, + {305, 11, 0, -100, 0}, + {306, 9, 0, -105, 4}, + {307, 10, 0, -115, 0}, + {308, 1, 0, -120, 0}, + {312, 2, 0, -50, 0}, + {313, 11, 0, -60, 0}, + {314, 9, 0, -65, 4}, + {315, 10, 0, -75, 0}, + {316, 1, 0, -80, 0}, + {320, 2, 0, -10, 0}, + {321, 11, 0, -20, 0}, + {322, 9, 0, -25, 4}, + {323, 10, 0, -35, 0}, + {324, 1, 0, -40, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +88,10 @@ CathodeSegmentation* createSegType18(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{5, 0.5}, {10, 0.5}}}; + {{5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 18, @@ -108,7 +143,8 @@ CathodeSegmentation* createSegType18(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType18 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType19.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType19.cxx index 8ad57f452001a..9ed65c527d1b0 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType19.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType19.cxx @@ -29,7 +29,41 @@ CathodeSegmentation* createSegType19(bool isBendingPlane) 19, true, /* PG */ - {{1, 3, 0, -100, -20}, {2, 12, 0, -95, -20}, {3, 6, 0, -85, -20}, {4, 13, 0, -80, -20}, {5, 4, 0, -70, -20}, {10, 3, 0, -60, -20}, {11, 12, 0, -55, -20}, {12, 6, 0, -45, -20}, {13, 13, 0, -40, -20}, {14, 4, 0, -30, -20}, {103, 8, 1, 60, -20}, {104, 8, 1, 80, -20}, {107, 8, 1, 20, -20}, {108, 8, 1, 40, -20}, {111, 8, 1, -20, -20}, {112, 8, 1, 0, -20}, {201, 0, 1, 80, -4}, {202, 7, 1, 70, 4}, {203, 5, 1, 60, -4}, {206, 0, 1, 40, -4}, {207, 7, 1, 30, 4}, {208, 5, 1, 20, -4}, {211, 0, 1, 0, -4}, {212, 7, 1, -10, 4}, {213, 5, 1, -20, -4}, {304, 2, 0, -70, 0}, {305, 11, 0, -80, 0}, {306, 9, 0, -85, 4}, {307, 10, 0, -95, 0}, {308, 1, 0, -100, 0}, {312, 2, 0, -30, 0}, {313, 11, 0, -40, 0}, {314, 9, 0, -45, 4}, {315, 10, 0, -55, 0}, {316, 1, 0, -60, 0}}, + {{1, 3, 0, -100, -20}, + {2, 12, 0, -95, -20}, + {3, 6, 0, -85, -20}, + {4, 13, 0, -80, -20}, + {5, 4, 0, -70, -20}, + {10, 3, 0, -60, -20}, + {11, 12, 0, -55, -20}, + {12, 6, 0, -45, -20}, + {13, 13, 0, -40, -20}, + {14, 4, 0, -30, -20}, + {103, 8, 1, 60, -20}, + {104, 8, 1, 80, -20}, + {107, 8, 1, 20, -20}, + {108, 8, 1, 40, -20}, + {111, 8, 1, -20, -20}, + {112, 8, 1, 0, -20}, + {201, 0, 1, 80, -4}, + {202, 7, 1, 70, 4}, + {203, 5, 1, 60, -4}, + {206, 0, 1, 40, -4}, + {207, 7, 1, 30, 4}, + {208, 5, 1, 20, -4}, + {211, 0, 1, 0, -4}, + {212, 7, 1, -10, 4}, + {213, 5, 1, -20, -4}, + {304, 2, 0, -70, 0}, + {305, 11, 0, -80, 0}, + {306, 9, 0, -85, 4}, + {307, 10, 0, -95, 0}, + {308, 1, 0, -100, 0}, + {312, 2, 0, -30, 0}, + {313, 11, 0, -40, 0}, + {314, 9, 0, -45, 4}, + {315, 10, 0, -55, 0}, + {316, 1, 0, -60, 0}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, @@ -44,19 +78,10 @@ CathodeSegmentation* createSegType19(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{5, 0.5}, {10, 0.5}}}; + {{5, 0.5}, + {10, 0.5}}}; } else { return new CathodeSegmentation{ 19, @@ -101,7 +126,8 @@ CathodeSegmentation* createSegType19(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 5}, {0.714285714, 10}}}; + {{0.714285714, 5}, + {0.714285714, 10}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType19 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType2.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType2.cxx index 2e9c9d146d155..bd80de35ccb73 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType2.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType2.cxx @@ -29,7 +29,52 @@ CathodeSegmentation* createSegType2(bool isBendingPlane) 2, true, /* PG */ - {{1, 0, 0, 50, 2}, {2, 12, 0, 45, 4}, {3, 12, 0, 40, 4}, {6, 8, 0, 35, 0}, {7, 15, 0, 30, 0}, {8, 13, 0, 27.5, 4}, {9, 14, 0, 22.5, 0}, {10, 7, 0, 20, 0}, {11, 8, 0, 15, 0}, {12, 15, 0, 10, 0}, {13, 13, 0, 7.5, 4}, {14, 14, 0, 2.5, 0}, {15, 7, 0, 0, 0}, {104, 8, 1, -50, 0}, {105, 15, 1, -60, 0}, {106, 13, 1, -65, 4}, {107, 18, 1, -75, 0}, {111, 8, 1, -10, 0}, {112, 15, 1, -20, 0}, {113, 13, 1, -25, 4}, {114, 14, 1, -35, 0}, {115, 7, 1, -40, 0}, {201, 5, 1, -75, -20}, {202, 6, 1, -70, -20}, {203, 11, 1, -65, -20}, {204, 17, 1, -60, -20}, {205, 10, 1, -50, -20}, {209, 9, 1, -40, -20}, {210, 16, 1, -35, -20}, {211, 11, 1, -25, -20}, {212, 17, 1, -20, -20}, {213, 10, 1, -10, -20}, {304, 1, 0, 40, -20}, {305, 2, 0, 42.5, -20}, {306, 3, 0, 45, -20}, {307, 4, 0, 50, -20}, {315, 9, 0, 0, -20}, {316, 16, 0, 2.5, -20}, {317, 11, 0, 7.5, -20}, {318, 17, 0, 10, -20}, {319, 10, 0, 15, -20}, {320, 9, 0, 20, -20}, {321, 16, 0, 22.5, -20}, {322, 11, 0, 27.5, -20}, {323, 17, 0, 30, -20}, {324, 10, 0, 35, -20}}, + {{1, 0, 0, 50, 2}, + {2, 12, 0, 45, 4}, + {3, 12, 0, 40, 4}, + {6, 8, 0, 35, 0}, + {7, 15, 0, 30, 0}, + {8, 13, 0, 27.5, 4}, + {9, 14, 0, 22.5, 0}, + {10, 7, 0, 20, 0}, + {11, 8, 0, 15, 0}, + {12, 15, 0, 10, 0}, + {13, 13, 0, 7.5, 4}, + {14, 14, 0, 2.5, 0}, + {15, 7, 0, 0, 0}, + {104, 8, 1, -50, 0}, + {105, 15, 1, -60, 0}, + {106, 13, 1, -65, 4}, + {107, 18, 1, -75, 0}, + {111, 8, 1, -10, 0}, + {112, 15, 1, -20, 0}, + {113, 13, 1, -25, 4}, + {114, 14, 1, -35, 0}, + {115, 7, 1, -40, 0}, + {201, 5, 1, -75, -20}, + {202, 6, 1, -70, -20}, + {203, 11, 1, -65, -20}, + {204, 17, 1, -60, -20}, + {205, 10, 1, -50, -20}, + {209, 9, 1, -40, -20}, + {210, 16, 1, -35, -20}, + {211, 11, 1, -25, -20}, + {212, 17, 1, -20, -20}, + {213, 10, 1, -10, -20}, + {304, 1, 0, 40, -20}, + {305, 2, 0, 42.5, -20}, + {306, 3, 0, 45, -20}, + {307, 4, 0, 50, -20}, + {315, 9, 0, 0, -20}, + {316, 16, 0, 2.5, -20}, + {317, 11, 0, 7.5, -20}, + {318, 17, 0, 10, -20}, + {319, 10, 0, 15, -20}, + {320, 9, 0, 20, -20}, + {321, 16, 0, 22.5, -20}, + {322, 11, 0, 27.5, -20}, + {323, 17, 0, 30, -20}, + {324, 10, 0, 35, -20}}, /* PGT */ {/* C10 */ {3, 36, {28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, 4, -1, 37, 0, -1, 34, 3, -1, 33, 1, -1, 36, 6, -1, 35, 2, -1, 38, 5, -1, 41, 8, -1, 44, 7, -1, 45, 13, -1, 47, 11, -1, 50, 14, -1, 51, 12, -1, 53, 19, -1, 56, 16, 25, 57, 21, 22, 59, 20, 18, 60, 23, 17, 61, 24, 15, 62, 26, 10, 63, 27, 9}}, /* C6 */ {2, 48, {25, 35, 22, 38, 18, 41, 17, 44, 15, 45, 10, 47, 9, 50, 4, 51, 0, 53, 3, 56, 1, 57, 6, 59, 2, 60, 5, 61, 8, 62, 7, 63, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 58, -1, 55, -1, 54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1}}, @@ -49,19 +94,10 @@ CathodeSegmentation* createSegType2(bool isBendingPlane) /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}, - /* Z5 */ - {3, - 40, - {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, - -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, - -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, - -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, - -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, - 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, - 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, - 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}}, + /* Z5 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 2, @@ -102,8 +138,7 @@ CathodeSegmentation* createSegType2(bool isBendingPlane) /* PGT */ {/* C1 */ {7, 10, {51, 33, 49, 26, 13, 9, -1, 53, 36, 48, 27, 11, 4, -1, 56, 35, 46, 28, 14, 0, -1, 57, 38, 43, 29, 12, 3, -1, 59, 41, 42, 30, 19, 1, 25, 60, 44, 40, 31, 16, 6, 22, 61, 45, 39, 58, 21, 2, 18, 62, 47, 32, 55, 20, 5, 17, 63, 50, 37, 54, 23, 8, 15, -1, -1, 34, 52, 24, 7, 10}}, /* C2 */ {7, 10, {60, 41, 42, 30, 19, 1, 25, 61, 44, 40, 31, 16, 6, 22, 62, 45, 39, 58, 21, 2, 18, 63, 47, 32, 55, 20, 5, 17, -1, 50, 37, 54, 23, 8, 15, -1, 51, 34, 52, 24, 7, 10, -1, 53, 33, 49, 26, 13, 9, -1, 56, 36, 48, 27, 11, 4, -1, 57, 35, 46, 28, 14, 0, -1, 59, 38, 43, 29, 12, 3}}, - /* C3 */ - {13, 10, {50, 37, 54, 23, 14, 8, 1, 4, 10, 17, 18, 22, 25, 51, 34, 52, 24, 12, 7, 6, 0, 9, 15, -1, -1, -1, 53, 33, 49, 26, 19, 13, 2, 3, -1, -1, -1, -1, -1, 56, 36, 48, 27, 16, 11, 5, -1, -1, -1, -1, -1, -1, 57, 35, 46, 28, 21, -1, -1, -1, -1, -1, -1, -1, -1, 59, 38, 43, 29, 20, -1, -1, -1, -1, -1, -1, -1, -1, 60, 41, 42, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 61, 44, 40, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, 45, 39, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 47, 32, 55, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + /* C3 */ {13, 10, {50, 37, 54, 23, 14, 8, 1, 4, 10, 17, 18, 22, 25, 51, 34, 52, 24, 12, 7, 6, 0, 9, 15, -1, -1, -1, 53, 33, 49, 26, 19, 13, 2, 3, -1, -1, -1, -1, -1, 56, 36, 48, 27, 16, 11, 5, -1, -1, -1, -1, -1, -1, 57, 35, 46, 28, 21, -1, -1, -1, -1, -1, -1, -1, -1, 59, 38, 43, 29, 20, -1, -1, -1, -1, -1, -1, -1, -1, 60, 41, 42, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 61, 44, 40, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, 45, 39, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 47, 32, 55, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, /* C4 */ {16, 6, {-1, 15, 1, 13, 21, 28, 54, 42, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 6, 11, 20, 29, 52, 40, -1, -1, -1, -1, -1, -1, -1, -1, 25, 9, 2, 14, 23, 30, 49, 39, 33, 41, -1, -1, -1, -1, -1, -1, 22, 4, 5, 12, 24, 31, 48, 32, 36, 44, 50, -1, -1, -1, -1, -1, 18, 0, 8, 19, 26, 58, 46, 37, 35, 45, 51, 56, 59, -1, -1, -1, 17, 3, 7, 16, 27, 55, 43, 34, 38, 47, 53, 57, 60, 61, 62, 63}}, /* C5 */ {11, 7, {25, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, 0, 8, 19, 26, 58, 46, 37, 41, 53, 62, 18, 3, 7, 16, 27, 55, 43, 34, 44, 56, 63, 17, 1, 13, 21, 28, 54, 42, 33, 45, 57, -1, 15, 6, 11, 20, 29, 52, 40, 36, 47, 59, -1, 10, 2, 14, 23, 30, 49, 39, 35, 50, 60, -1, 9, 5, 12, 24, 31, 48, 32, 38, 51, 61, -1}}, /* L3 */ {20, 4, {17, 4, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 9, 1, 8, 14, 16, 23, 27, 30, 55, 49, 43, 39, 34, 35, 44, 50, 56, 60, 63, 22, 10, 3, 5, 11, 19, 20, 26, 29, 58, 52, 46, 40, 37, 36, 41, 47, 53, 59, 62, 25, 15, 0, 2, 13, 12, 21, 24, 28, 31, 54, 48, 42, 32, 33, 38, 45, 51, 57, 61}}, @@ -117,7 +152,8 @@ CathodeSegmentation* createSegType2(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType2 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType20.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType20.cxx index b4f58cd366888..760af4b1a88f8 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType20.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType20.cxx @@ -29,15 +29,33 @@ CathodeSegmentation* createSegType20(bool isBendingPlane) 20, true, /* PG */ - {{1, 1, 0, -80, -20}, {2, 5, 0, -70, -20}, {3, 2, 0, -60, -20}, {103, 5, 0, 40, -20}, {104, 5, 0, 60, -20}, {107, 5, 0, 0, -20}, {108, 5, 0, 20, -20}, {111, 5, 0, -40, -20}, {112, 5, 0, -20, -20}, {201, 0, 0, 60, -4}, {202, 4, 0, 50, 4}, {203, 3, 0, 40, -4}, {206, 0, 0, 20, -4}, {207, 4, 0, 10, 4}, {208, 3, 0, 0, -4}, {211, 0, 0, -20, -4}, {212, 4, 0, -30, 4}, {213, 3, 0, -40, -4}, {303, 4, 0, -60, 4}, {304, 4, 0, -80, 4}}, + {{1, 1, 0, -80, -20}, + {2, 5, 0, -70, -20}, + {3, 2, 0, -60, -20}, + {103, 5, 0, 40, -20}, + {104, 5, 0, 60, -20}, + {107, 5, 0, 0, -20}, + {108, 5, 0, 20, -20}, + {111, 5, 0, -40, -20}, + {112, 5, 0, -20, -20}, + {201, 0, 0, 60, -4}, + {202, 4, 0, 50, 4}, + {203, 3, 0, 40, -4}, + {206, 0, 0, 20, -4}, + {207, 4, 0, 10, 4}, + {208, 3, 0, 0, -4}, + {211, 0, 0, -20, -4}, + {212, 4, 0, -30, 4}, + {213, 3, 0, -40, -4}, + {303, 4, 0, -60, 4}, + {304, 4, 0, -80, 4}}, /* PGT */ {/* L10 */ {2, 48, {35, 36, 38, 33, 41, 34, 44, 37, 45, 32, 47, 39, 50, 40, 51, 42, 53, 43, 56, 46, 57, 48, 59, 49, 60, 52, 61, 54, 62, 55, 63, 58, -1, 31, -1, 30, -1, 29, -1, 28, -1, 27, -1, 26, -1, 24, -1, 23, -1, 20, -1, 21, -1, 16, -1, 19, -1, 12, -1, 14, -1, 11, -1, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, /* L19 */ {2, 48, {25, -1, 22, -1, 18, -1, 17, -1, 15, -1, 10, -1, 9, -1, 4, -1, 0, -1, 3, -1, 1, -1, 6, -1, 2, -1, 5, -1, 8, -1, 7, -1, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 58, 63, 55, 62, 54, 61, 52, 60, 49, 59, 48, 57, 46, 56, 43, 53, 42, 51, 40, 50, 39, 47, 32, 45, 37, 44, 34, 41, 33, 38, 36, 35}}, /* L20 */ {2, 48, {-1, 63, -1, 62, -1, 61, -1, 60, -1, 59, -1, 57, -1, 56, -1, 53, -1, 51, -1, 50, -1, 47, -1, 45, -1, 44, -1, 41, -1, 38, -1, 35, -1, 36, -1, 33, -1, 34, -1, 37, -1, 32, -1, 39, -1, 40, -1, 42, -1, 43, -1, 46, -1, 48, -1, 49, -1, 52, -1, 54, -1, 55, -1, 58, 25, 31, 22, 30, 18, 29, 17, 28, 15, 27, 10, 26, 9, 24, 4, 23, 0, 20, 3, 21, 1, 16, 6, 19, 2, 12, 5, 14, 8, 11, 7, 13}}, /* L9 */ {2, 48, {13, 7, 11, 8, 14, 5, 12, 2, 19, 6, 16, 1, 21, 3, 20, 0, 23, 4, 24, 9, 26, 10, 27, 15, 28, 17, 29, 18, 30, 22, 31, 25, 58, -1, 55, -1, 54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* O11 */ {2, 32, {58, 31, 55, 30, 54, 29, 52, 28, 49, 27, 48, 26, 46, 24, 43, 23, 42, 20, 40, 21, 39, 16, 32, 19, 37, 12, 34, 14, 33, 11, 36, 13, 35, 7, 38, 8, 41, 5, 44, 2, 45, 6, 47, 1, 50, 3, 51, 0, 53, 4, 56, 9, 57, 10, 59, 15, 60, 17, 61, 18, 62, 22, 63, 25}}, - /* O12 */ - {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, + /* O12 */ {2, 32, {25, 63, 22, 62, 18, 61, 17, 60, 15, 59, 10, 57, 9, 56, 4, 53, 0, 51, 3, 50, 1, 47, 6, 45, 2, 44, 5, 41, 8, 38, 7, 35, 13, 36, 11, 33, 14, 34, 12, 37, 19, 32, 16, 39, 21, 40, 20, 42, 23, 43, 24, 46, 26, 48, 27, 49, 28, 52, 29, 54, 30, 55, 31, 58}}}, /* PS */ {{10, 0.5}}}; } else { diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType3.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType3.cxx index e5f88c6ad0c84..bd7bd37f11cb0 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType3.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType3.cxx @@ -29,7 +29,62 @@ CathodeSegmentation* createSegType3(bool isBendingPlane) 3, true, /* PG */ - {{4, 20, 0, 40, -20}, {5, 21, 0, 45, -20}, {6, 21, 0, 50, -20}, {7, 11, 0, 55, -17.5}, {101, 13, 1, -75, -20}, {102, 14, 1, -70, -20}, {103, 19, 1, -65, -20}, {104, 26, 1, -60, -20}, {105, 18, 1, -50, -20}, {109, 17, 1, -40, -20}, {110, 25, 1, -35, -20}, {111, 19, 1, -25, -20}, {112, 26, 1, -20, -20}, {113, 18, 1, -10, -20}, {118, 17, 0, 0, -20}, {119, 25, 0, 2.5, -20}, {120, 19, 0, 7.5, -20}, {121, 26, 0, 10, -20}, {122, 18, 0, 15, -20}, {123, 17, 0, 20, -20}, {124, 25, 0, 22.5, -20}, {125, 19, 0, 27.5, -20}, {126, 26, 0, 30, -20}, {127, 18, 0, 35, -20}, {204, 16, 1, -50, 0}, {205, 24, 1, -60, 0}, {206, 22, 1, -65, 4}, {207, 27, 1, -75, 0}, {211, 16, 1, -10, 0}, {212, 24, 1, -20, 0}, {213, 22, 1, -25, 4}, {214, 23, 1, -35, 0}, {215, 15, 1, -40, 0}, {223, 16, 0, 35, 0}, {224, 24, 0, 30, 0}, {225, 22, 0, 27.5, 4}, {226, 23, 0, 22.5, 0}, {227, 15, 0, 20, 0}, {228, 16, 0, 15, 0}, {229, 24, 0, 10, 0}, {230, 22, 0, 7.5, 4}, {231, 23, 0, 2.5, 0}, {232, 15, 0, 0, 0}, {401, 12, 0, 75, -7}, {402, 0, 0, 72.5, -7.5}, {403, 1, 0, 70, -8}, {404, 2, 0, 67.5, 1}, {405, 3, 0, 65, -8.5}, {406, 4, 0, 62.5, -10}, {407, 5, 0, 60, -11}, {408, 6, 0, 55, -4}, {409, 7, 0, 52.5, -4}, {410, 8, 0, 50, -4}, {411, 10, 0, 45, -4}, {412, 7, 0, 42.5, -4}, {413, 9, 0, 40, -4}}, + {{4, 20, 0, 40, -20}, + {5, 21, 0, 45, -20}, + {6, 21, 0, 50, -20}, + {7, 11, 0, 55, -17.5}, + {101, 13, 1, -75, -20}, + {102, 14, 1, -70, -20}, + {103, 19, 1, -65, -20}, + {104, 26, 1, -60, -20}, + {105, 18, 1, -50, -20}, + {109, 17, 1, -40, -20}, + {110, 25, 1, -35, -20}, + {111, 19, 1, -25, -20}, + {112, 26, 1, -20, -20}, + {113, 18, 1, -10, -20}, + {118, 17, 0, 0, -20}, + {119, 25, 0, 2.5, -20}, + {120, 19, 0, 7.5, -20}, + {121, 26, 0, 10, -20}, + {122, 18, 0, 15, -20}, + {123, 17, 0, 20, -20}, + {124, 25, 0, 22.5, -20}, + {125, 19, 0, 27.5, -20}, + {126, 26, 0, 30, -20}, + {127, 18, 0, 35, -20}, + {204, 16, 1, -50, 0}, + {205, 24, 1, -60, 0}, + {206, 22, 1, -65, 4}, + {207, 27, 1, -75, 0}, + {211, 16, 1, -10, 0}, + {212, 24, 1, -20, 0}, + {213, 22, 1, -25, 4}, + {214, 23, 1, -35, 0}, + {215, 15, 1, -40, 0}, + {223, 16, 0, 35, 0}, + {224, 24, 0, 30, 0}, + {225, 22, 0, 27.5, 4}, + {226, 23, 0, 22.5, 0}, + {227, 15, 0, 20, 0}, + {228, 16, 0, 15, 0}, + {229, 24, 0, 10, 0}, + {230, 22, 0, 7.5, 4}, + {231, 23, 0, 2.5, 0}, + {232, 15, 0, 0, 0}, + {401, 12, 0, 75, -7}, + {402, 0, 0, 72.5, -7.5}, + {403, 1, 0, 70, -8}, + {404, 2, 0, 67.5, 1}, + {405, 3, 0, 65, -8.5}, + {406, 4, 0, 62.5, -10}, + {407, 5, 0, 60, -11}, + {408, 6, 0, 55, -4}, + {409, 7, 0, 52.5, -4}, + {410, 8, 0, 50, -4}, + {411, 10, 0, 45, -4}, + {412, 7, 0, 42.5, -4}, + {413, 9, 0, 40, -4}}, /* PGT */ {/* A10 */ {2, 55, {32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, 25, 53, 22, 56, 18, 57, 17, 59, 15, 60, 10, 61, 9, 62, 4, 63, 0, -1, 3, -1, 1, -1, 6, -1, 2, -1, 5, -1, 8, -1, 7, -1, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 39, -1, 40, -1, 42, -1, 43, -1, 46, -1, 48, -1, 49, -1, 52, -1, 54, -1, 55, -1, 58}}, /* A11 */ {2, 56, {54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, 25, 56, 22, 57, 18, 59, 17, 60, 15, 61, 10, 62, 9, 63, 4, -1, 0, -1, 3, -1, 1, -1, 6, -1, 2, -1, 5, -1, 8, -1, 7, -1, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 55, -1, 58}}, @@ -58,19 +113,10 @@ CathodeSegmentation* createSegType3(bool isBendingPlane) /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}, - /* Z5 */ - {3, - 40, - {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, - -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, - -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, - -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, - -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, - 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, - 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, - 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}}, + /* Z5 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 3, @@ -118,8 +164,7 @@ CathodeSegmentation* createSegType3(bool isBendingPlane) /* PGT */ {/* A1 */ {9, 8, {53, 35, 42, 58, 23, 13, -1, -1, -1, 56, 38, 40, 55, 24, 11, 3, 18, 25, 57, 41, 39, 54, 26, 14, 1, 17, 22, 59, 44, 32, 52, 27, 12, 6, 15, -1, 60, 45, 37, 49, 28, 19, 2, 10, -1, 61, 47, 34, 48, 29, 16, 5, 9, -1, 62, 50, 33, 46, 30, 21, 8, 4, -1, 63, 51, 36, 43, 31, 20, 7, 0, -1}}, /* A2 */ {5, 14, {-1, 5, 27, 40, 51, 25, 8, 28, 39, 53, 22, 7, 29, 32, 56, 18, 13, 30, 37, 57, 17, 11, 31, 34, 59, 15, 14, 58, 33, 60, 10, 12, 55, 36, 61, 9, 19, 54, 35, 62, 4, 16, 52, 38, 63, 0, 21, 49, 41, -1, 3, 20, 48, 44, -1, 1, 23, 46, 45, -1, 6, 24, 43, 47, -1, 2, 26, 42, 50, -1}}, - /* A3 */ - {6, 13, {-1, 10, 14, 31, 37, 56, -1, 9, 12, 58, 34, 57, -1, 4, 19, 55, 33, 59, -1, 0, 16, 54, 36, 60, -1, 3, 21, 52, 35, 61, -1, 1, 20, 49, 38, 62, -1, 6, 23, 48, 41, 63, -1, 2, 24, 46, 44, -1, 25, 5, 26, 43, 45, -1, 22, 8, 27, 42, 47, -1, 18, 7, 28, 40, 50, -1, 17, 13, 29, 39, 51, -1, 15, 11, 30, 32, 53, -1}}, + /* A3 */ {6, 13, {-1, 10, 14, 31, 37, 56, -1, 9, 12, 58, 34, 57, -1, 4, 19, 55, 33, 59, -1, 0, 16, 54, 36, 60, -1, 3, 21, 52, 35, 61, -1, 1, 20, 49, 38, 62, -1, 6, 23, 48, 41, 63, -1, 2, 24, 46, 44, -1, 25, 5, 26, 43, 45, -1, 22, 8, 27, 42, 47, -1, 18, 7, 28, 40, 50, -1, 17, 13, 29, 39, 51, -1, 15, 11, 30, 32, 53, -1}}, /* A4 */ {6, 12, {-1, 9, 14, 30, 39, 50, -1, 4, 12, 31, 32, 51, -1, 0, 19, 58, 37, 53, -1, 3, 16, 55, 34, 56, -1, 1, 21, 54, 33, 57, -1, 6, 20, 52, 36, 59, 25, 2, 23, 49, 35, 60, 22, 5, 24, 48, 38, 61, 18, 8, 26, 46, 41, 62, 17, 7, 27, 43, 44, 63, 15, 13, 28, 42, 45, -1, 10, 11, 29, 40, 47, -1}}, /* A5 */ {7, 12, {-1, 18, 8, 26, -1, -1, -1, -1, 17, 7, 27, 46, 38, 60, -1, 15, 13, 28, 43, 41, 61, -1, 10, 11, 29, 42, 44, 62, -1, 9, 14, 30, 40, 45, 63, -1, 4, 12, 31, 39, 47, -1, -1, 0, 19, 58, 32, 50, -1, -1, 3, 16, 55, 37, 51, -1, -1, 1, 21, 54, 34, 53, -1, -1, 6, 20, 52, 33, 56, -1, 25, 2, 23, 49, 36, 57, -1, 22, 5, 24, 48, 35, 59, -1}}, /* A6 */ {7, 11, {-1, 4, 14, 29, 42, 44, 62, -1, 0, 12, 30, 40, 45, 63, -1, 3, 19, 31, 39, 47, -1, -1, 1, 16, 58, 32, 50, -1, 25, 6, 21, 55, 37, 51, -1, 22, 2, 20, 54, 34, 53, -1, 18, 5, 23, 52, 33, 56, -1, 17, 8, 24, 49, 36, 57, -1, 15, 7, 26, 48, 35, 59, -1, 10, 13, 27, 46, 38, 60, -1, 9, 11, 28, 43, 41, 61, -1}}, @@ -137,7 +182,8 @@ CathodeSegmentation* createSegType3(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType3 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType4.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType4.cxx index 11a1b04709fb4..b51d1d884f8c2 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType4.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType4.cxx @@ -29,7 +29,55 @@ CathodeSegmentation* createSegType4(bool isBendingPlane) 4, true, /* PG */ - {{1, 2, 0, -80, -20}, {2, 17, 0, -77.5, -20}, {3, 4, 0, -72.5, -20}, {4, 18, 0, -70, -20}, {5, 3, 0, -65, -20}, {6, 2, 0, -60, -20}, {7, 17, 0, -57.5, -20}, {8, 4, 0, -52.5, -20}, {9, 18, 0, -50, -20}, {10, 3, 0, -45, -20}, {104, 6, 1, 40, -20}, {105, 7, 1, 45, -20}, {106, 8, 1, 55, -20}, {107, 9, 1, 60, -20}, {111, 2, 1, 0, -20}, {112, 17, 1, 5, -20}, {113, 4, 1, 15, -20}, {114, 18, 1, 20, -20}, {115, 3, 1, 30, -20}, {119, 2, 1, -40, -20}, {120, 17, 1, -35, -20}, {121, 4, 1, -25, -20}, {122, 18, 1, -20, -20}, {123, 3, 1, -10, -20}, {201, 10, 1, 70, -12}, {202, 11, 1, 60, 0}, {203, 12, 1, 55, 4}, {204, 13, 1, 45, 0}, {205, 14, 1, 40, 0}, {209, 1, 1, 30, 0}, {210, 16, 1, 20, 0}, {211, 5, 1, 15, 4}, {212, 15, 1, 5, 0}, {213, 0, 1, 0, 0}, {218, 1, 1, -10, 0}, {219, 16, 1, -20, 0}, {220, 5, 1, -25, 4}, {221, 15, 1, -35, 0}, {222, 0, 1, -40, 0}, {308, 1, 0, -45, 0}, {309, 16, 0, -50, 0}, {310, 5, 0, -52.5, 4}, {311, 15, 0, -57.5, 0}, {312, 0, 0, -60, 0}, {313, 1, 0, -65, 0}, {314, 16, 0, -70, 0}, {315, 5, 0, -72.5, 4}, {316, 15, 0, -77.5, 0}, {317, 0, 0, -80, 0}}, + {{1, 2, 0, -80, -20}, + {2, 17, 0, -77.5, -20}, + {3, 4, 0, -72.5, -20}, + {4, 18, 0, -70, -20}, + {5, 3, 0, -65, -20}, + {6, 2, 0, -60, -20}, + {7, 17, 0, -57.5, -20}, + {8, 4, 0, -52.5, -20}, + {9, 18, 0, -50, -20}, + {10, 3, 0, -45, -20}, + {104, 6, 1, 40, -20}, + {105, 7, 1, 45, -20}, + {106, 8, 1, 55, -20}, + {107, 9, 1, 60, -20}, + {111, 2, 1, 0, -20}, + {112, 17, 1, 5, -20}, + {113, 4, 1, 15, -20}, + {114, 18, 1, 20, -20}, + {115, 3, 1, 30, -20}, + {119, 2, 1, -40, -20}, + {120, 17, 1, -35, -20}, + {121, 4, 1, -25, -20}, + {122, 18, 1, -20, -20}, + {123, 3, 1, -10, -20}, + {201, 10, 1, 70, -12}, + {202, 11, 1, 60, 0}, + {203, 12, 1, 55, 4}, + {204, 13, 1, 45, 0}, + {205, 14, 1, 40, 0}, + {209, 1, 1, 30, 0}, + {210, 16, 1, 20, 0}, + {211, 5, 1, 15, 4}, + {212, 15, 1, 5, 0}, + {213, 0, 1, 0, 0}, + {218, 1, 1, -10, 0}, + {219, 16, 1, -20, 0}, + {220, 5, 1, -25, 4}, + {221, 15, 1, -35, 0}, + {222, 0, 1, -40, 0}, + {308, 1, 0, -45, 0}, + {309, 16, 0, -50, 0}, + {310, 5, 0, -52.5, 4}, + {311, 15, 0, -57.5, 0}, + {312, 0, 0, -60, 0}, + {313, 1, 0, -65, 0}, + {314, 16, 0, -70, 0}, + {315, 5, 0, -72.5, 4}, + {316, 15, 0, -77.5, 0}, + {317, 0, 0, -80, 0}}, /* PGT */ {/* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* L6 */ {2, 40, {42, 43, 40, 46, 39, 48, 32, 49, 37, 52, 34, 54, 33, 55, 36, 58, 35, 31, 38, 30, 41, 29, 44, 28, 45, 27, 47, 26, 50, 24, 51, 23, 53, 20, 56, 21, 57, 16, 59, 19, 60, 12, 61, 14, 62, 11, 63, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, @@ -49,19 +97,10 @@ CathodeSegmentation* createSegType4(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 4, @@ -113,7 +152,8 @@ CathodeSegmentation* createSegType4(bool isBendingPlane) /* Q1 */ {14, 5, {-1, -1, -1, -1, 19, 24, 30, 52, 42, 34, 41, 51, -1, -1, 17, 4, 6, 7, 12, 23, 29, 54, 43, 37, 38, 50, 59, 63, 18, 9, 1, 8, 14, 20, 28, 55, 46, 32, 35, 47, 57, 62, 22, 10, 3, 5, 11, 21, 27, 58, 48, 39, 36, 45, 56, 61, 25, 15, 0, 2, 13, 16, 26, 31, 49, 40, 33, 44, 53, 60}}, /* Q2 */ {14, 5, {-1, -1, 2, 11, 21, 27, 58, 48, 39, 36, -1, -1, -1, -1, 17, 4, 6, 13, 16, 26, 31, 49, 40, 33, 44, 51, 59, 63, 18, 9, 1, 7, 19, 24, 30, 52, 42, 34, 41, 50, 57, 62, 22, 10, 3, 8, 12, 23, 29, 54, 43, 37, 38, 47, 56, 61, 25, 15, 0, 5, 14, 20, 28, 55, 46, 32, 35, 45, 53, 60}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType4 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType5.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType5.cxx index d20301c88b869..955bc1c0fced6 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType5.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType5.cxx @@ -29,7 +29,36 @@ CathodeSegmentation* createSegType5(bool isBendingPlane) 5, true, /* PG */ - {{4, 2, 0, 20, -20}, {5, 8, 0, 25, -20}, {6, 4, 0, 35, -20}, {7, 9, 0, 40, -20}, {8, 3, 0, 50, -20}, {12, 2, 0, -20, -20}, {13, 8, 0, -15, -20}, {14, 4, 0, -5, -20}, {15, 9, 0, 0, -20}, {16, 3, 0, 10, -20}, {20, 2, 0, -60, -20}, {21, 8, 0, -55, -20}, {22, 4, 0, -45, -20}, {23, 9, 0, -40, -20}, {24, 3, 0, -30, -20}, {101, 1, 0, 50, 0}, {102, 7, 0, 40, 0}, {103, 5, 0, 35, 4}, {104, 6, 0, 25, 0}, {105, 0, 0, 20, 0}, {110, 1, 0, 10, 0}, {111, 7, 0, 0, 0}, {112, 5, 0, -5, 4}, {113, 6, 0, -15, 0}, {114, 0, 0, -20, 0}, {119, 1, 0, -30, 0}, {120, 7, 0, -40, 0}, {121, 5, 0, -45, 4}, {122, 6, 0, -55, 0}, {123, 0, 0, -60, 0}}, + {{4, 2, 0, 20, -20}, + {5, 8, 0, 25, -20}, + {6, 4, 0, 35, -20}, + {7, 9, 0, 40, -20}, + {8, 3, 0, 50, -20}, + {12, 2, 0, -20, -20}, + {13, 8, 0, -15, -20}, + {14, 4, 0, -5, -20}, + {15, 9, 0, 0, -20}, + {16, 3, 0, 10, -20}, + {20, 2, 0, -60, -20}, + {21, 8, 0, -55, -20}, + {22, 4, 0, -45, -20}, + {23, 9, 0, -40, -20}, + {24, 3, 0, -30, -20}, + {101, 1, 0, 50, 0}, + {102, 7, 0, 40, 0}, + {103, 5, 0, 35, 4}, + {104, 6, 0, 25, 0}, + {105, 0, 0, 20, 0}, + {110, 1, 0, 10, 0}, + {111, 7, 0, 0, 0}, + {112, 5, 0, -5, 4}, + {113, 6, 0, -15, 0}, + {114, 0, 0, -20, 0}, + {119, 1, 0, -30, 0}, + {120, 7, 0, -40, 0}, + {121, 5, 0, -45, 4}, + {122, 6, 0, -55, 0}, + {123, 0, 0, -60, 0}}, /* PGT */ {/* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* L6 */ {2, 40, {42, 43, 40, 46, 39, 48, 32, 49, 37, 52, 34, 54, 33, 55, 36, 58, 35, 31, 38, 30, 41, 29, 44, 28, 45, 27, 47, 26, 50, 24, 51, 23, 53, 20, 56, 21, 57, 16, 59, 19, 60, 12, 61, 14, 62, 11, 63, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, @@ -40,17 +69,7 @@ CathodeSegmentation* createSegType5(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ {{5, 0.5}}}; } else { diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType6.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType6.cxx index 41b8cfdb9b1cb..63422f44f511f 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType6.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType6.cxx @@ -29,7 +29,26 @@ CathodeSegmentation* createSegType6(bool isBendingPlane) 6, true, /* PG */ - {{4, 2, 0, 0, -20}, {5, 8, 0, 5, -20}, {6, 4, 0, 15, -20}, {7, 9, 0, 20, -20}, {8, 3, 0, 30, -20}, {12, 2, 0, -40, -20}, {13, 8, 0, -35, -20}, {14, 4, 0, -25, -20}, {15, 9, 0, -20, -20}, {16, 3, 0, -10, -20}, {101, 1, 0, 30, 0}, {102, 7, 0, 20, 0}, {103, 5, 0, 15, 4}, {104, 6, 0, 5, 0}, {105, 0, 0, 0, 0}, {110, 1, 0, -10, 0}, {111, 7, 0, -20, 0}, {112, 5, 0, -25, 4}, {113, 6, 0, -35, 0}, {114, 0, 0, -40, 0}}, + {{4, 2, 0, 0, -20}, + {5, 8, 0, 5, -20}, + {6, 4, 0, 15, -20}, + {7, 9, 0, 20, -20}, + {8, 3, 0, 30, -20}, + {12, 2, 0, -40, -20}, + {13, 8, 0, -35, -20}, + {14, 4, 0, -25, -20}, + {15, 9, 0, -20, -20}, + {16, 3, 0, -10, -20}, + {101, 1, 0, 30, 0}, + {102, 7, 0, 20, 0}, + {103, 5, 0, 15, 4}, + {104, 6, 0, 5, 0}, + {105, 0, 0, 0, 0}, + {110, 1, 0, -10, 0}, + {111, 7, 0, -20, 0}, + {112, 5, 0, -25, 4}, + {113, 6, 0, -35, 0}, + {114, 0, 0, -40, 0}}, /* PGT */ {/* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* L6 */ {2, 40, {42, 43, 40, 46, 39, 48, 32, 49, 37, 52, 34, 54, 33, 55, 36, 58, 35, 31, 38, 30, 41, 29, 44, 28, 45, 27, 47, 26, 50, 24, 51, 23, 53, 20, 56, 21, 57, 16, 59, 19, 60, 12, 61, 14, 62, 11, 63, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, @@ -40,17 +59,7 @@ CathodeSegmentation* createSegType6(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ {{5, 0.5}}}; } else { diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType7.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType7.cxx index f9ec568acee5e..527f8e03c777e 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType7.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType7.cxx @@ -29,7 +29,53 @@ CathodeSegmentation* createSegType7(bool isBendingPlane) 7, true, /* PG */ - {{1, 0, 0, 50, 2}, {2, 10, 0, 45, 4}, {3, 10, 0, 40, 4}, {6, 6, 0, 35, 0}, {7, 13, 0, 30, 0}, {8, 11, 0, 27.5, 4}, {9, 12, 0, 22.5, 0}, {10, 5, 0, 20, 0}, {11, 6, 0, 15, 0}, {12, 13, 0, 10, 0}, {13, 11, 0, 7.5, 4}, {14, 12, 0, 2.5, 0}, {15, 5, 0, 0, 0}, {104, 6, 1, -50, 0}, {105, 13, 1, -60, 0}, {106, 11, 1, -65, 4}, {107, 12, 1, -75, 0}, {108, 5, 1, -80, 0}, {112, 6, 1, -10, 0}, {113, 13, 1, -20, 0}, {114, 11, 1, -25, 4}, {115, 12, 1, -35, 0}, {116, 5, 1, -40, 0}, {201, 7, 1, -80, -20}, {202, 14, 1, -75, -20}, {203, 9, 1, -65, -20}, {204, 15, 1, -60, -20}, {205, 8, 1, -50, -20}, {210, 7, 1, -40, -20}, {211, 14, 1, -35, -20}, {212, 9, 1, -25, -20}, {213, 15, 1, -20, -20}, {214, 8, 1, -10, -20}, {304, 1, 0, 40, -20}, {305, 2, 0, 42.5, -20}, {306, 3, 0, 45, -20}, {307, 4, 0, 50, -20}, {315, 7, 0, 0, -20}, {316, 14, 0, 2.5, -20}, {317, 9, 0, 7.5, -20}, {318, 15, 0, 10, -20}, {319, 8, 0, 15, -20}, {320, 7, 0, 20, -20}, {321, 14, 0, 22.5, -20}, {322, 9, 0, 27.5, -20}, {323, 15, 0, 30, -20}, {324, 8, 0, 35, -20}}, + {{1, 0, 0, 50, 2}, + {2, 10, 0, 45, 4}, + {3, 10, 0, 40, 4}, + {6, 6, 0, 35, 0}, + {7, 13, 0, 30, 0}, + {8, 11, 0, 27.5, 4}, + {9, 12, 0, 22.5, 0}, + {10, 5, 0, 20, 0}, + {11, 6, 0, 15, 0}, + {12, 13, 0, 10, 0}, + {13, 11, 0, 7.5, 4}, + {14, 12, 0, 2.5, 0}, + {15, 5, 0, 0, 0}, + {104, 6, 1, -50, 0}, + {105, 13, 1, -60, 0}, + {106, 11, 1, -65, 4}, + {107, 12, 1, -75, 0}, + {108, 5, 1, -80, 0}, + {112, 6, 1, -10, 0}, + {113, 13, 1, -20, 0}, + {114, 11, 1, -25, 4}, + {115, 12, 1, -35, 0}, + {116, 5, 1, -40, 0}, + {201, 7, 1, -80, -20}, + {202, 14, 1, -75, -20}, + {203, 9, 1, -65, -20}, + {204, 15, 1, -60, -20}, + {205, 8, 1, -50, -20}, + {210, 7, 1, -40, -20}, + {211, 14, 1, -35, -20}, + {212, 9, 1, -25, -20}, + {213, 15, 1, -20, -20}, + {214, 8, 1, -10, -20}, + {304, 1, 0, 40, -20}, + {305, 2, 0, 42.5, -20}, + {306, 3, 0, 45, -20}, + {307, 4, 0, 50, -20}, + {315, 7, 0, 0, -20}, + {316, 14, 0, 2.5, -20}, + {317, 9, 0, 7.5, -20}, + {318, 15, 0, 10, -20}, + {319, 8, 0, 15, -20}, + {320, 7, 0, 20, -20}, + {321, 14, 0, 22.5, -20}, + {322, 9, 0, 27.5, -20}, + {323, 15, 0, 30, -20}, + {324, 8, 0, 35, -20}}, /* PGT */ {/* C10 */ {3, 36, {28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, 4, -1, 37, 0, -1, 34, 3, -1, 33, 1, -1, 36, 6, -1, 35, 2, -1, 38, 5, -1, 41, 8, -1, 44, 7, -1, 45, 13, -1, 47, 11, -1, 50, 14, -1, 51, 12, -1, 53, 19, -1, 56, 16, 25, 57, 21, 22, 59, 20, 18, 60, 23, 17, 61, 24, 15, 62, 26, 10, 63, 27, 9}}, /* C6 */ {2, 48, {25, 35, 22, 38, 18, 41, 17, 44, 15, 45, 10, 47, 9, 50, 4, 51, 0, 53, 3, 56, 1, 57, 6, 59, 2, 60, 5, 61, 8, 62, 7, 63, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 58, -1, 55, -1, 54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1}}, @@ -46,19 +92,10 @@ CathodeSegmentation* createSegType7(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 7, @@ -100,8 +137,7 @@ CathodeSegmentation* createSegType7(bool isBendingPlane) /* PGT */ {/* C1 */ {7, 10, {51, 33, 49, 26, 13, 9, -1, 53, 36, 48, 27, 11, 4, -1, 56, 35, 46, 28, 14, 0, -1, 57, 38, 43, 29, 12, 3, -1, 59, 41, 42, 30, 19, 1, 25, 60, 44, 40, 31, 16, 6, 22, 61, 45, 39, 58, 21, 2, 18, 62, 47, 32, 55, 20, 5, 17, 63, 50, 37, 54, 23, 8, 15, -1, -1, 34, 52, 24, 7, 10}}, /* C2 */ {7, 10, {60, 41, 42, 30, 19, 1, 25, 61, 44, 40, 31, 16, 6, 22, 62, 45, 39, 58, 21, 2, 18, 63, 47, 32, 55, 20, 5, 17, -1, 50, 37, 54, 23, 8, 15, -1, 51, 34, 52, 24, 7, 10, -1, 53, 33, 49, 26, 13, 9, -1, 56, 36, 48, 27, 11, 4, -1, 57, 35, 46, 28, 14, 0, -1, 59, 38, 43, 29, 12, 3}}, - /* C3 */ - {13, 10, {50, 37, 54, 23, 14, 8, 1, 4, 10, 17, 18, 22, 25, 51, 34, 52, 24, 12, 7, 6, 0, 9, 15, -1, -1, -1, 53, 33, 49, 26, 19, 13, 2, 3, -1, -1, -1, -1, -1, 56, 36, 48, 27, 16, 11, 5, -1, -1, -1, -1, -1, -1, 57, 35, 46, 28, 21, -1, -1, -1, -1, -1, -1, -1, -1, 59, 38, 43, 29, 20, -1, -1, -1, -1, -1, -1, -1, -1, 60, 41, 42, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 61, 44, 40, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, 45, 39, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 47, 32, 55, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, + /* C3 */ {13, 10, {50, 37, 54, 23, 14, 8, 1, 4, 10, 17, 18, 22, 25, 51, 34, 52, 24, 12, 7, 6, 0, 9, 15, -1, -1, -1, 53, 33, 49, 26, 19, 13, 2, 3, -1, -1, -1, -1, -1, 56, 36, 48, 27, 16, 11, 5, -1, -1, -1, -1, -1, -1, 57, 35, 46, 28, 21, -1, -1, -1, -1, -1, -1, -1, -1, 59, 38, 43, 29, 20, -1, -1, -1, -1, -1, -1, -1, -1, 60, 41, 42, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 61, 44, 40, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, 45, 39, 58, -1, -1, -1, -1, -1, -1, -1, -1, -1, 63, 47, 32, 55, -1, -1, -1, -1, -1, -1, -1, -1, -1}}, /* C4 */ {16, 6, {-1, 15, 1, 13, 21, 28, 54, 42, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 6, 11, 20, 29, 52, 40, -1, -1, -1, -1, -1, -1, -1, -1, 25, 9, 2, 14, 23, 30, 49, 39, 33, 41, -1, -1, -1, -1, -1, -1, 22, 4, 5, 12, 24, 31, 48, 32, 36, 44, 50, -1, -1, -1, -1, -1, 18, 0, 8, 19, 26, 58, 46, 37, 35, 45, 51, 56, 59, -1, -1, -1, 17, 3, 7, 16, 27, 55, 43, 34, 38, 47, 53, 57, 60, 61, 62, 63}}, /* C5 */ {11, 7, {25, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, 0, 8, 19, 26, 58, 46, 37, 41, 53, 62, 18, 3, 7, 16, 27, 55, 43, 34, 44, 56, 63, 17, 1, 13, 21, 28, 54, 42, 33, 45, 57, -1, 15, 6, 11, 20, 29, 52, 40, 36, 47, 59, -1, 10, 2, 14, 23, 30, 49, 39, 35, 50, 60, -1, 9, 5, 12, 24, 31, 48, 32, 38, 51, 61, -1}}, /* L3 */ {20, 4, {17, 4, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 9, 1, 8, 14, 16, 23, 27, 30, 55, 49, 43, 39, 34, 35, 44, 50, 56, 60, 63, 22, 10, 3, 5, 11, 19, 20, 26, 29, 58, 52, 46, 40, 37, 36, 41, 47, 53, 59, 62, 25, 15, 0, 2, 13, 12, 21, 24, 28, 31, 54, 48, 42, 32, 33, 38, 45, 51, 57, 61}}, @@ -114,7 +150,8 @@ CathodeSegmentation* createSegType7(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType7 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType8.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType8.cxx index 01414acfd81fd..eb77ea2eff1f3 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType8.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType8.cxx @@ -29,7 +29,63 @@ CathodeSegmentation* createSegType8(bool isBendingPlane) 8, true, /* PG */ - {{4, 18, 0, 40, -20}, {5, 19, 0, 45, -20}, {6, 19, 0, 50, -20}, {7, 11, 0, 55, -17.5}, {101, 15, 1, -80, -20}, {102, 23, 1, -75, -20}, {103, 17, 1, -65, -20}, {104, 24, 1, -60, -20}, {105, 16, 1, -50, -20}, {110, 15, 1, -40, -20}, {111, 23, 1, -35, -20}, {112, 17, 1, -25, -20}, {113, 24, 1, -20, -20}, {114, 16, 1, -10, -20}, {119, 15, 0, 0, -20}, {120, 23, 0, 2.5, -20}, {121, 17, 0, 7.5, -20}, {122, 24, 0, 10, -20}, {123, 16, 0, 15, -20}, {124, 15, 0, 20, -20}, {125, 23, 0, 22.5, -20}, {126, 17, 0, 27.5, -20}, {127, 24, 0, 30, -20}, {128, 16, 0, 35, -20}, {204, 14, 1, -50, 0}, {205, 22, 1, -60, 0}, {206, 20, 1, -65, 4}, {207, 21, 1, -75, 0}, {208, 13, 1, -80, 0}, {212, 14, 1, -10, 0}, {213, 22, 1, -20, 0}, {214, 20, 1, -25, 4}, {215, 21, 1, -35, 0}, {216, 13, 1, -40, 0}, {224, 14, 0, 35, 0}, {225, 22, 0, 30, 0}, {226, 20, 0, 27.5, 4}, {227, 21, 0, 22.5, 0}, {228, 13, 0, 20, 0}, {229, 14, 0, 15, 0}, {230, 22, 0, 10, 0}, {231, 20, 0, 7.5, 4}, {232, 21, 0, 2.5, 0}, {233, 13, 0, 0, 0}, {401, 12, 0, 75, -7}, {402, 0, 0, 72.5, -7.5}, {403, 1, 0, 70, -8}, {404, 2, 0, 67.5, 1}, {405, 3, 0, 65, -8.5}, {406, 4, 0, 62.5, -10}, {407, 5, 0, 60, -11}, {408, 6, 0, 55, -4}, {409, 7, 0, 52.5, -4}, {410, 8, 0, 50, -4}, {411, 10, 0, 45, -4}, {412, 7, 0, 42.5, -4}, {413, 9, 0, 40, -4}}, + {{4, 18, 0, 40, -20}, + {5, 19, 0, 45, -20}, + {6, 19, 0, 50, -20}, + {7, 11, 0, 55, -17.5}, + {101, 15, 1, -80, -20}, + {102, 23, 1, -75, -20}, + {103, 17, 1, -65, -20}, + {104, 24, 1, -60, -20}, + {105, 16, 1, -50, -20}, + {110, 15, 1, -40, -20}, + {111, 23, 1, -35, -20}, + {112, 17, 1, -25, -20}, + {113, 24, 1, -20, -20}, + {114, 16, 1, -10, -20}, + {119, 15, 0, 0, -20}, + {120, 23, 0, 2.5, -20}, + {121, 17, 0, 7.5, -20}, + {122, 24, 0, 10, -20}, + {123, 16, 0, 15, -20}, + {124, 15, 0, 20, -20}, + {125, 23, 0, 22.5, -20}, + {126, 17, 0, 27.5, -20}, + {127, 24, 0, 30, -20}, + {128, 16, 0, 35, -20}, + {204, 14, 1, -50, 0}, + {205, 22, 1, -60, 0}, + {206, 20, 1, -65, 4}, + {207, 21, 1, -75, 0}, + {208, 13, 1, -80, 0}, + {212, 14, 1, -10, 0}, + {213, 22, 1, -20, 0}, + {214, 20, 1, -25, 4}, + {215, 21, 1, -35, 0}, + {216, 13, 1, -40, 0}, + {224, 14, 0, 35, 0}, + {225, 22, 0, 30, 0}, + {226, 20, 0, 27.5, 4}, + {227, 21, 0, 22.5, 0}, + {228, 13, 0, 20, 0}, + {229, 14, 0, 15, 0}, + {230, 22, 0, 10, 0}, + {231, 20, 0, 7.5, 4}, + {232, 21, 0, 2.5, 0}, + {233, 13, 0, 0, 0}, + {401, 12, 0, 75, -7}, + {402, 0, 0, 72.5, -7.5}, + {403, 1, 0, 70, -8}, + {404, 2, 0, 67.5, 1}, + {405, 3, 0, 65, -8.5}, + {406, 4, 0, 62.5, -10}, + {407, 5, 0, 60, -11}, + {408, 6, 0, 55, -4}, + {409, 7, 0, 52.5, -4}, + {410, 8, 0, 50, -4}, + {411, 10, 0, 45, -4}, + {412, 7, 0, 42.5, -4}, + {413, 9, 0, 40, -4}}, /* PGT */ {/* A10 */ {2, 55, {32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, 25, 53, 22, 56, 18, 57, 17, 59, 15, 60, 10, 61, 9, 62, 4, 63, 0, -1, 3, -1, 1, -1, 6, -1, 2, -1, 5, -1, 8, -1, 7, -1, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 39, -1, 40, -1, 42, -1, 43, -1, 46, -1, 48, -1, 49, -1, 52, -1, 54, -1, 55, -1, 58}}, /* A11 */ {2, 56, {54, -1, 52, -1, 49, -1, 48, -1, 46, -1, 43, -1, 42, -1, 40, -1, 39, -1, 32, -1, 37, -1, 34, -1, 33, -1, 36, -1, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, 25, 56, 22, 57, 18, 59, 17, 60, 15, 61, 10, 62, 9, 63, 4, -1, 0, -1, 3, -1, 1, -1, 6, -1, 2, -1, 5, -1, 8, -1, 7, -1, 13, -1, 11, -1, 14, -1, 12, -1, 19, -1, 16, -1, 21, -1, 20, -1, 23, -1, 24, -1, 26, -1, 27, -1, 28, -1, 29, -1, 30, -1, 31, -1, 55, -1, 58}}, @@ -55,19 +111,10 @@ CathodeSegmentation* createSegType8(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 8, @@ -116,8 +163,7 @@ CathodeSegmentation* createSegType8(bool isBendingPlane) /* PGT */ {/* A1 */ {9, 8, {53, 35, 42, 58, 23, 13, -1, -1, -1, 56, 38, 40, 55, 24, 11, 3, 18, 25, 57, 41, 39, 54, 26, 14, 1, 17, 22, 59, 44, 32, 52, 27, 12, 6, 15, -1, 60, 45, 37, 49, 28, 19, 2, 10, -1, 61, 47, 34, 48, 29, 16, 5, 9, -1, 62, 50, 33, 46, 30, 21, 8, 4, -1, 63, 51, 36, 43, 31, 20, 7, 0, -1}}, /* A2 */ {5, 14, {-1, 5, 27, 40, 51, 25, 8, 28, 39, 53, 22, 7, 29, 32, 56, 18, 13, 30, 37, 57, 17, 11, 31, 34, 59, 15, 14, 58, 33, 60, 10, 12, 55, 36, 61, 9, 19, 54, 35, 62, 4, 16, 52, 38, 63, 0, 21, 49, 41, -1, 3, 20, 48, 44, -1, 1, 23, 46, 45, -1, 6, 24, 43, 47, -1, 2, 26, 42, 50, -1}}, - /* A3 */ - {6, 13, {-1, 10, 14, 31, 37, 56, -1, 9, 12, 58, 34, 57, -1, 4, 19, 55, 33, 59, -1, 0, 16, 54, 36, 60, -1, 3, 21, 52, 35, 61, -1, 1, 20, 49, 38, 62, -1, 6, 23, 48, 41, 63, -1, 2, 24, 46, 44, -1, 25, 5, 26, 43, 45, -1, 22, 8, 27, 42, 47, -1, 18, 7, 28, 40, 50, -1, 17, 13, 29, 39, 51, -1, 15, 11, 30, 32, 53, -1}}, + /* A3 */ {6, 13, {-1, 10, 14, 31, 37, 56, -1, 9, 12, 58, 34, 57, -1, 4, 19, 55, 33, 59, -1, 0, 16, 54, 36, 60, -1, 3, 21, 52, 35, 61, -1, 1, 20, 49, 38, 62, -1, 6, 23, 48, 41, 63, -1, 2, 24, 46, 44, -1, 25, 5, 26, 43, 45, -1, 22, 8, 27, 42, 47, -1, 18, 7, 28, 40, 50, -1, 17, 13, 29, 39, 51, -1, 15, 11, 30, 32, 53, -1}}, /* A4 */ {6, 12, {-1, 9, 14, 30, 39, 50, -1, 4, 12, 31, 32, 51, -1, 0, 19, 58, 37, 53, -1, 3, 16, 55, 34, 56, -1, 1, 21, 54, 33, 57, -1, 6, 20, 52, 36, 59, 25, 2, 23, 49, 35, 60, 22, 5, 24, 48, 38, 61, 18, 8, 26, 46, 41, 62, 17, 7, 27, 43, 44, 63, 15, 13, 28, 42, 45, -1, 10, 11, 29, 40, 47, -1}}, /* A5 */ {7, 12, {-1, 18, 8, 26, -1, -1, -1, -1, 17, 7, 27, 46, 38, 60, -1, 15, 13, 28, 43, 41, 61, -1, 10, 11, 29, 42, 44, 62, -1, 9, 14, 30, 40, 45, 63, -1, 4, 12, 31, 39, 47, -1, -1, 0, 19, 58, 32, 50, -1, -1, 3, 16, 55, 37, 51, -1, -1, 1, 21, 54, 34, 53, -1, -1, 6, 20, 52, 33, 56, -1, 25, 2, 23, 49, 36, 57, -1, 22, 5, 24, 48, 35, 59, -1}}, /* A6 */ {7, 11, {-1, 4, 14, 29, 42, 44, 62, -1, 0, 12, 30, 40, 45, 63, -1, 3, 19, 31, 39, 47, -1, -1, 1, 16, 58, 32, 50, -1, 25, 6, 21, 55, 37, 51, -1, 22, 2, 20, 54, 34, 53, -1, 18, 5, 23, 52, 33, 56, -1, 17, 8, 24, 49, 36, 57, -1, 15, 7, 26, 48, 35, 59, -1, 10, 13, 27, 46, 38, 60, -1, 9, 11, 28, 43, 41, 61, -1}}, @@ -134,7 +180,8 @@ CathodeSegmentation* createSegType8(bool isBendingPlane) /* Q3 */ {16, 5, {-1, -1, 56, 45, 36, 39, 48, 58, 28, 23, 19, 13, 2, 0, 15, 25, -1, -1, 57, 47, 35, 32, 46, 55, 29, 24, 16, 11, 5, 3, 10, 22, -1, -1, 59, 50, 38, 37, 43, 54, 30, 26, 21, 14, 8, 1, 9, 18, -1, -1, 60, 51, 41, 34, 42, 52, 31, 27, 20, 12, 7, 6, 4, 17, 63, 62, 61, 53, 44, 33, 40, 49, -1, -1, -1, -1, -1, -1, -1, -1}}, /* Q4 */ {16, 5, {60, 53, 45, 35, 37, 42, 49, 58, 27, 21, 11, 2, 4, 18, -1, -1, 61, 56, 47, 38, 34, 40, 48, 55, 28, 20, 14, 5, 0, 17, -1, -1, 62, 57, 50, 41, 33, 39, 46, 54, 29, 23, 12, 8, 3, 15, -1, -1, 63, 59, 51, 44, 36, 32, 43, 52, 30, 24, 19, 7, 1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 31, 26, 16, 13, 6, 9, 22, 25}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType8 diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType9.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType9.cxx index 3c0d6f24138e8..7cce01d6749ee 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType9.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType9.cxx @@ -29,7 +29,56 @@ CathodeSegmentation* createSegType9(bool isBendingPlane) 9, true, /* PG */ - {{1, 2, 0, -80, -20}, {2, 8, 0, -77.5, -20}, {3, 4, 0, -72.5, -20}, {4, 9, 0, -70, -20}, {5, 3, 0, -65, -20}, {6, 2, 0, -60, -20}, {7, 8, 0, -57.5, -20}, {8, 4, 0, -52.5, -20}, {9, 9, 0, -50, -20}, {10, 3, 0, -45, -20}, {104, 2, 1, 40, -20}, {105, 8, 1, 45, -20}, {106, 4, 1, 55, -20}, {107, 9, 1, 60, -20}, {108, 3, 1, 70, -20}, {112, 2, 1, 0, -20}, {113, 8, 1, 5, -20}, {114, 4, 1, 15, -20}, {115, 9, 1, 20, -20}, {116, 3, 1, 30, -20}, {120, 2, 1, -40, -20}, {121, 8, 1, -35, -20}, {122, 4, 1, -25, -20}, {123, 9, 1, -20, -20}, {124, 3, 1, -10, -20}, {201, 1, 1, 70, 0}, {202, 7, 1, 60, 0}, {203, 5, 1, 55, 4}, {204, 6, 1, 45, 0}, {205, 0, 1, 40, 0}, {210, 1, 1, 30, 0}, {211, 7, 1, 20, 0}, {212, 5, 1, 15, 4}, {213, 6, 1, 5, 0}, {214, 0, 1, 0, 0}, {219, 1, 1, -10, 0}, {220, 7, 1, -20, 0}, {221, 5, 1, -25, 4}, {222, 6, 1, -35, 0}, {223, 0, 1, -40, 0}, {308, 1, 0, -45, 0}, {309, 7, 0, -50, 0}, {310, 5, 0, -52.5, 4}, {311, 6, 0, -57.5, 0}, {312, 0, 0, -60, 0}, {313, 1, 0, -65, 0}, {314, 7, 0, -70, 0}, {315, 5, 0, -72.5, 4}, {316, 6, 0, -77.5, 0}, {317, 0, 0, -80, 0}}, + {{1, 2, 0, -80, -20}, + {2, 8, 0, -77.5, -20}, + {3, 4, 0, -72.5, -20}, + {4, 9, 0, -70, -20}, + {5, 3, 0, -65, -20}, + {6, 2, 0, -60, -20}, + {7, 8, 0, -57.5, -20}, + {8, 4, 0, -52.5, -20}, + {9, 9, 0, -50, -20}, + {10, 3, 0, -45, -20}, + {104, 2, 1, 40, -20}, + {105, 8, 1, 45, -20}, + {106, 4, 1, 55, -20}, + {107, 9, 1, 60, -20}, + {108, 3, 1, 70, -20}, + {112, 2, 1, 0, -20}, + {113, 8, 1, 5, -20}, + {114, 4, 1, 15, -20}, + {115, 9, 1, 20, -20}, + {116, 3, 1, 30, -20}, + {120, 2, 1, -40, -20}, + {121, 8, 1, -35, -20}, + {122, 4, 1, -25, -20}, + {123, 9, 1, -20, -20}, + {124, 3, 1, -10, -20}, + {201, 1, 1, 70, 0}, + {202, 7, 1, 60, 0}, + {203, 5, 1, 55, 4}, + {204, 6, 1, 45, 0}, + {205, 0, 1, 40, 0}, + {210, 1, 1, 30, 0}, + {211, 7, 1, 20, 0}, + {212, 5, 1, 15, 4}, + {213, 6, 1, 5, 0}, + {214, 0, 1, 0, 0}, + {219, 1, 1, -10, 0}, + {220, 7, 1, -20, 0}, + {221, 5, 1, -25, 4}, + {222, 6, 1, -35, 0}, + {223, 0, 1, -40, 0}, + {308, 1, 0, -45, 0}, + {309, 7, 0, -50, 0}, + {310, 5, 0, -52.5, 4}, + {311, 6, 0, -57.5, 0}, + {312, 0, 0, -60, 0}, + {313, 1, 0, -65, 0}, + {314, 7, 0, -70, 0}, + {315, 5, 0, -72.5, 4}, + {316, 6, 0, -77.5, 0}, + {317, 0, 0, -80, 0}}, /* PGT */ {/* L5 */ {2, 40, {23, 20, 24, 21, 26, 16, 27, 19, 28, 12, 29, 14, 30, 11, 31, 13, 58, 7, 55, 8, 54, 5, 52, 2, 49, 6, 48, 1, 46, 3, 43, 0, 42, 4, 40, 9, 39, 10, 32, 15, 37, 17, 34, 18, 33, 22, 36, 25, 35, -1, 38, -1, 41, -1, 44, -1, 45, -1, 47, -1, 50, -1, 51, -1, 53, -1, 56, -1, 57, -1, 59, -1, 60, -1, 61, -1, 62, -1, 63, -1}}, /* L6 */ {2, 40, {42, 43, 40, 46, 39, 48, 32, 49, 37, 52, 34, 54, 33, 55, 36, 58, 35, 31, 38, 30, 41, 29, 44, 28, 45, 27, 47, 26, 50, 24, 51, 23, 53, 20, 56, 21, 57, 16, 59, 19, 60, 12, 61, 14, 62, 11, 63, 13, -1, 7, -1, 8, -1, 5, -1, 2, -1, 6, -1, 1, -1, 3, -1, 0, -1, 4, -1, 9, -1, 10, -1, 15, -1, 17, -1, 18, -1, 22, -1, 25}}, @@ -40,19 +89,10 @@ CathodeSegmentation* createSegType9(bool isBendingPlane) /* Z1 */ {3, 40, {-1, 0, 4, -1, 3, 9, -1, 1, 10, -1, 6, 15, -1, 2, 17, -1, 5, 18, -1, 8, 22, -1, 7, 25, -1, 13, -1, -1, 11, -1, -1, 14, -1, -1, 12, -1, -1, 19, -1, -1, 16, -1, -1, 21, -1, -1, 20, -1, -1, 23, -1, -1, 24, -1, -1, 26, -1, -1, 27, -1, -1, 28, -1, -1, 29, -1, -1, 30, -1, -1, 31, -1, 63, 58, -1, 62, 55, -1, 61, 54, -1, 60, 52, -1, 59, 49, -1, 57, 48, -1, 56, 46, -1, 53, 43, -1, 51, 42, -1, 50, 40, -1, 47, 39, -1, 45, 32, -1, 44, 37, -1, 41, 34, -1, 38, 33, -1, 35, 36, -1}}, /* Z2 */ {3, 40, {53, 51, -1, 56, 50, -1, 57, 47, -1, 59, 45, -1, 60, 44, -1, 61, 41, -1, 62, 38, -1, 63, 35, -1, -1, 36, -1, -1, 33, -1, -1, 34, -1, -1, 37, -1, -1, 32, -1, -1, 39, -1, -1, 40, -1, -1, 42, -1, -1, 43, -1, -1, 46, -1, -1, 48, -1, -1, 49, -1, -1, 52, -1, -1, 54, -1, -1, 55, -1, -1, 58, -1, -1, 31, 25, -1, 30, 22, -1, 29, 18, -1, 28, 17, -1, 27, 15, -1, 26, 10, -1, 24, 9, -1, 23, 4, -1, 20, 0, -1, 21, 3, -1, 16, 1, -1, 19, 6, -1, 12, 2, -1, 14, 5, -1, 11, 8, -1, 13, 7}}, /* Z3 */ {3, 40, {7, 13, -1, 8, 11, -1, 5, 14, -1, 2, 12, -1, 6, 19, -1, 1, 16, -1, 3, 21, -1, 0, 20, -1, 4, 23, -1, 9, 24, -1, 10, 26, -1, 15, 27, -1, 17, 28, -1, 18, 29, -1, 22, 30, -1, 25, 31, -1, -1, 58, -1, -1, 55, -1, -1, 54, -1, -1, 52, -1, -1, 49, -1, -1, 48, -1, -1, 46, -1, -1, 43, -1, -1, 42, -1, -1, 40, -1, -1, 39, -1, -1, 32, -1, -1, 37, -1, -1, 34, -1, -1, 33, -1, -1, 36, -1, -1, 35, 63, -1, 38, 62, -1, 41, 61, -1, 44, 60, -1, 45, 59, -1, 47, 57, -1, 50, 56, -1, 51, 53}}, - /* Z4 */ - {3, - 40, - {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, - -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, - -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, - -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, - -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, - -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, - -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, - 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, + /* Z4 */ {3, 40, {-1, 36, 35, -1, 33, 38, -1, 34, 41, -1, 37, 44, -1, 32, 45, -1, 39, 47, -1, 40, 50, -1, 42, 51, -1, 43, 53, -1, 46, 56, -1, 48, 57, -1, 49, 59, -1, 52, 60, -1, 54, 61, -1, 55, 62, -1, 58, 63, -1, 31, -1, -1, 30, -1, -1, 29, -1, -1, 28, -1, -1, 27, -1, -1, 26, -1, -1, 24, -1, -1, 23, -1, -1, 20, -1, -1, 21, -1, -1, 16, -1, -1, 19, -1, -1, 12, -1, -1, 14, -1, -1, 11, -1, -1, 13, -1, 25, 7, -1, 22, 8, -1, 18, 5, -1, 17, 2, -1, 15, 6, -1, 10, 1, -1, 9, 3, -1, 4, 0, -1}}}, /* PS */ - {{2.5, 0.5}, {5, 0.5}}}; + {{2.5, 0.5}, + {5, 0.5}}}; } else { return new CathodeSegmentation{ 9, @@ -104,7 +144,8 @@ CathodeSegmentation* createSegType9(bool isBendingPlane) /* Q1 */ {14, 5, {-1, -1, -1, -1, 19, 24, 30, 52, 42, 34, 41, 51, -1, -1, 17, 4, 6, 7, 12, 23, 29, 54, 43, 37, 38, 50, 59, 63, 18, 9, 1, 8, 14, 20, 28, 55, 46, 32, 35, 47, 57, 62, 22, 10, 3, 5, 11, 21, 27, 58, 48, 39, 36, 45, 56, 61, 25, 15, 0, 2, 13, 16, 26, 31, 49, 40, 33, 44, 53, 60}}, /* Q2 */ {14, 5, {-1, -1, 2, 11, 21, 27, 58, 48, 39, 36, -1, -1, -1, -1, 17, 4, 6, 13, 16, 26, 31, 49, 40, 33, 44, 51, 59, 63, 18, 9, 1, 7, 19, 24, 30, 52, 42, 34, 41, 50, 57, 62, 22, 10, 3, 8, 12, 23, 29, 54, 43, 37, 38, 47, 56, 61, 25, 15, 0, 5, 14, 20, 28, 55, 46, 32, 35, 45, 53, 60}}}, /* PS */ - {{0.714285714, 2.5}, {0.714285714, 5}}}; + {{0.714285714, 2.5}, + {0.714285714, 5}}}; } } class CathodeSegmentationCreatorRegisterCreateSegType9 From 15a7e2f3f34b9a9fcba81ebf9a9e6bfd6d4d830a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 3 Feb 2025 00:52:31 +0100 Subject: [PATCH 0047/1914] GPU: Remove obsolete code paths --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 14 +- GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 - GPU/GPUTracking/Global/GPUChainTracking.cxx | 20 +- .../Global/GPUChainTrackingMerger.cxx | 73 +++---- .../Global/GPUChainTrackingSliceTracker.cxx | 9 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 200 +++++------------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 6 - GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 6 +- GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx | 29 +-- .../SliceTracker/GPUTPCTracker.cxx | 4 +- .../Standalone/Benchmark/standalone.cxx | 4 +- GPU/GPUTracking/qa/GPUQA.cxx | 5 - 12 files changed, 109 insertions(+), 264 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 5df69c416e858..1496300818fd8 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -282,21 +282,9 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.nDeviceHelperThreads = 0; } - if (param().rec.nonConsecutiveIDs) { - param().rec.tpc.disableRefitAttachment = 0xFF; - } - if (!(mRecoSteps.stepsGPUMask & RecoStep::TPCMerging) || !param().rec.tpc.mergerReadFromTrackerDirectly) { - mProcessingSettings.fullMergerOnGPU = false; - } - if (mProcessingSettings.debugLevel > 3 || !IsGPU() || !mProcessingSettings.fullMergerOnGPU || mProcessingSettings.deterministicGPUReconstruction) { + if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) { mProcessingSettings.delayedOutput = false; } - if (!mProcessingSettings.fullMergerOnGPU && (GetRecoStepsGPU() & RecoStep::TPCMerging)) { - param().rec.tpc.looperInterpolationInExtraPass = 0; - if (param().rec.tpc.retryRefit == 1) { - param().rec.tpc.retryRefit = 2; - } - } UpdateAutomaticProcessingSettings(); GPUCA_GPUReconstructionUpdateDefaults(); diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 76370c17f9f53..c10793975453d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -149,7 +149,6 @@ AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation inst AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass") -AddOptionRTC(mergerReadFromTrackerDirectly, int8_t, 1, "", 0, "Forward data directly from tracker to merger on GPU") AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondary legs of looping track in TrackTPC") AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response") @@ -188,7 +187,6 @@ EndConfig() BeginSubConfig(GPUSettingsRec, rec, configStandalone, "REC", 0, "Reconstruction settings", rec) AddOptionRTC(maxTrackQPtB5, float, 1.f / GPUCA_MIN_TRACK_PTB5_DEFAULT, "", 0, "required max Q/Pt (==min Pt) of tracks") -AddOptionRTC(nonConsecutiveIDs, int8_t, false, "", 0, "Non-consecutive cluster IDs as in HLT, disables features that need access to slice data in TPC merger") AddOptionRTC(fwdTPCDigitsAsClusters, uint8_t, 0, "", 0, "Forward TPC digits as clusters (if they pass the ZS threshold)") AddOptionRTC(bz0Pt10MeV, uint8_t, 60, "", 0, "Nominal Pt to set when bz = 0 (in 10 MeV)") AddOptionRTC(fitInProjections, int8_t, -1, "", 0, "Fit in projection, -1 to enable full fit for all but passes but the first one") @@ -261,7 +259,6 @@ AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster AddOption(trackletSelectorSlices, int8_t, -1, "", 0, "Number of slices to processes in parallel at max") AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline") -AddOption(fullMergerOnGPU, bool, true, "", 0, "Perform full TPC track merging on GPU instead of only refit") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit") AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 1aa5f9ca0dad8..889e12c258cb4 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -185,12 +185,8 @@ bool GPUChainTracking::ValidateSteps() GPUError("Invalid input, TPC Clusterizer needs TPC raw input"); return false; } - if (param().rec.tpc.mergerReadFromTrackerDirectly && (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) { - GPUError("Invalid input / output / step, mergerReadFromTrackerDirectly cannot read/store sectors tracks and needs TPC conversion"); - return false; - } - if (!GetProcessingSettings().fullMergerOnGPU && (param().rec.tpc.mergerReadFromTrackerDirectly || GetProcessingSettings().createO2Output) && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging)) { - GPUError("createO2Output and mergerReadFromTrackerDirectly works only in combination with fullMergerOnGPU if the merger is to run on GPU"); + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) { + GPUError("Invalid input / output / step, merger cannot read/store sectors tracks and needs TPC conversion"); return false; } bool tpcClustersAvail = (GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCClusters) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCClusterFinding) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression); @@ -265,14 +261,6 @@ bool GPUChainTracking::ValidateSettings() GPUError("Cannot do error interpolation with NWays = 1!"); return false; } - if ((param().rec.tpc.mergerReadFromTrackerDirectly || !param().par.earlyTpcTransform) && param().rec.nonConsecutiveIDs) { - GPUError("incompatible settings for non consecutive ids"); - return false; - } - if (!param().rec.tpc.mergerReadFromTrackerDirectly && GetProcessingSettings().ompKernels) { - GPUError("OMP Kernels require mergerReadFromTrackerDirectly"); - return false; - } if (param().continuousMaxTimeBin > (int32_t)GPUSettings::TPC_MAX_TF_TIME_BIN) { GPUError("configured max time bin exceeds 256 orbits"); return false; @@ -743,10 +731,6 @@ int32_t GPUChainTracking::RunChain() return 1; } - for (uint32_t i = 0; i < NSLICES; i++) { - // GPUInfo("slice %d clusters %d tracks %d", i, mClusterData[i].NumberOfClusters(), processors()->tpcTrackers[i].Output()->NTracks()); - processors()->tpcMerger.SetSliceData(i, param().rec.tpc.mergerReadFromTrackerDirectly ? nullptr : processors()->tpcTrackers[i].Output()); - } if (runRecoStep(RecoStep::TPCMerging, &GPUChainTracking::RunTPCTrackingMerger, false)) { return 1; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 8dd5140db6952..0831b260f881d 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -24,14 +24,14 @@ using namespace o2::gpu; void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) { GPUTPCGMMerger& Merger = processors()->tpcMerger; - bool doGPUall = GetRecoStepsGPU() & RecoStep::TPCMerging && GetProcessingSettings().fullMergerOnGPU; - GPUTPCGMMerger& MergerShadow = doGPUall ? processorsShadow()->tpcMerger : Merger; + bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging; + GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger; if (GetProcessingSettings().deterministicGPUReconstruction) { uint32_t nBorderTracks = withinSlice == 1 ? NSLICES : (2 * NSLICES); runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } uint32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES; - if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPUall)) { + if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); RecordMarker(&mEvents->single, 0); for (uint32_t i = 0; i < n; i++) { @@ -72,7 +72,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice runKernel(GetGridAuto(0, deviceType), i, withinSlice, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -89,12 +89,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) { mRec->PushNonPersistentMemory(qStr2Tag("TPCMERGE")); bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging; - bool doGPUall = doGPU && GetProcessingSettings().fullMergerOnGPU; - GPUReconstruction::krnlDeviceType deviceType = doGPUall ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU; - uint32_t numBlocks = (!mRec->IsGPU() || doGPUall) ? BlockCount() : 1; + GPUReconstruction::krnlDeviceType deviceType = doGPU ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU; + uint32_t numBlocks = (!mRec->IsGPU() || doGPU) ? BlockCount() : 1; GPUTPCGMMerger& Merger = processors()->tpcMerger; GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger; - GPUTPCGMMerger& MergerShadowAll = doGPUall ? processorsShadow()->tpcMerger : Merger; + GPUTPCGMMerger& MergerShadowAll = doGPU ? processorsShadow()->tpcMerger : Merger; const int32_t outputStream = OutputStream(); if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Running TPC Merger"); @@ -112,7 +111,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) memset(Merger.Memory(), 0, sizeof(*Merger.Memory())); WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0); - if (doGPUall) { + if (doGPU) { TransferMemoryResourcesToGPU(RecoStep::TPCMerging, &Merger, 0); } @@ -136,14 +135,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSLICES * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); @@ -158,7 +157,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); @@ -168,17 +167,17 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType}}, 1); runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); if (param().rec.tpc.mergeCE) { runKernel(GetGridAuto(0, deviceType), true); RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType); RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); } int32_t waitForTransfer = 0; - if (doGPUall) { + if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single); waitForTransfer = 1; } @@ -189,23 +188,21 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); } - uint32_t maxId = param().rec.nonConsecutiveIDs ? Merger.Memory()->nOutputTrackClusters : Merger.NMaxClusters(); + uint32_t maxId = Merger.NMaxClusters(); if (maxId > Merger.NMaxClusters()) { throw std::runtime_error("mNMaxClusters too small"); } - if (!param().rec.nonConsecutiveIDs) { - runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount())); - runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment())); - runKernel(GetGridAuto(0, deviceType)); - CondWaitEvent(waitForTransfer, &mEvents->single); - runKernel(GetGridAuto(0, deviceType)); - runKernel(GetGridAuto(0, deviceType)); - runKernel(GetGridAuto(0, deviceType)); - } + runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.SharedCount(), maxId * sizeof(*MergerShadowAll.SharedCount())); + runKernel({{numBlocks, -ThreadCount(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.ClusterAttachment(), maxId * sizeof(*MergerShadowAll.ClusterAttachment())); + runKernel(GetGridAuto(0, deviceType)); + CondWaitEvent(waitForTransfer, &mEvents->single); + runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); - if (doGPUall) { + if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); if (waitForTransfer) { ReleaseEvent(mEvents->single); @@ -228,29 +225,23 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().rec.tpc.looperInterpolationInExtraPass) { runKernel(GetGridAuto(0)); } - if (doGPU && !doGPUall) { - TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, 0); - SynchronizeStream(0); - } DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); runKernel(GetGridAuto(0, deviceType)); - if (!param().rec.nonConsecutiveIDs) { - runKernel(GetGridAuto(0, deviceType)); - runKernel(GetGridAuto(0, deviceType)); - } + runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); if (param().rec.tpc.mergeLoopersAfterburner) { - runKernel(doGPUall ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0); SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel } runKernel(GetGridAuto(0, deviceType)); - runKernel(doGPUall ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPUall, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); - if (doGPUall) { + if (doGPU) { RecordMarker(&mEvents->single, 0); auto* waitEvent = &mEvents->single; if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().createO2Output <= 1 || mFractionalQAEnabled) { @@ -302,7 +293,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); mRec->ReturnVolatileDeviceMemory(); - SynchronizeEventAndRelease(mEvents->single, doGPUall); + SynchronizeEventAndRelease(mEvents->single, doGPU); if (GetProcessingSettings().clearO2OutputFromGPU) { mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile @@ -316,7 +307,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) AllocateRegisteredMemory(Merger.MemoryResOutputO2MC(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2Labels)]); TransferMemoryResourcesToHost(RecoStep::TPCMerging, &Merger, -1, true); runKernel(GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)); - } else if (doGPUall) { + } else if (doGPU) { RecordMarker(&mEvents->single, 0); TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2(), outputStream, nullptr, &mEvents->single); TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResOutputO2Clus(), outputStream); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index ba6ba03fca8a1..35a8c6c455048 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -30,11 +30,8 @@ int32_t GPUChainTracking::GlobalTracking(uint32_t iSlice, int32_t threadId, bool GPUInfo("GPU Tracker running Global Tracking for slice %u on thread %d\n", iSlice, threadId); } - GPUReconstruction::krnlDeviceType deviceType = GetProcessingSettings().fullMergerOnGPU ? GPUReconstruction::krnlDeviceType::Auto : GPUReconstruction::krnlDeviceType::CPU; - runKernel({GetGridBlk(256, iSlice % mRec->NStreams(), deviceType), {iSlice}}); - if (GetProcessingSettings().fullMergerOnGPU) { - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams()); - } + runKernel({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams()); if (synchronizeOutput) { SynchronizeStream(iSlice % mRec->NStreams()); } @@ -450,7 +447,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()] = true; } } - GlobalTracking(tmpSlice, 0, !GetProcessingSettings().fullMergerOnGPU); + GlobalTracking(tmpSlice, 0, false); } } for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index fab4469eeb488..60dd18a254904 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -81,7 +81,7 @@ struct MergeLooperParam { #include "GPUMemorySizeScalers.h" GPUTPCGMMerger::GPUTPCGMMerger() - : mTrackLinks(nullptr), mNTotalSliceTracks(0), mNMaxTracks(0), mNMaxSingleSliceTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSliceTrackInfos(nullptr), mSliceTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mGlobalClusterIDs(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) + : mTrackLinks(nullptr), mNTotalSliceTracks(0), mNMaxTracks(0), mNMaxSingleSliceTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSliceTrackInfos(nullptr), mSliceTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) { //* constructor @@ -95,10 +95,6 @@ GPUTPCGMMerger::GPUTPCGMMerger() mPrevSliceInd[0] = mid; mNextSliceInd[last] = NSLICES / 2; mPrevSliceInd[NSLICES / 2] = last; - - for (int32_t i = 0; i < NSLICES; i++) { - mkSlices[i] = nullptr; - } } // DEBUG CODE @@ -180,13 +176,9 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const for (int32_t i = 0; i < nClusters; i++) { int32_t id; if constexpr (std::is_same::value) { - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sliceTrack->Slice()]; - const GPUTPCHitId& ic = tracker.TrackHits()[sliceTrack->OrigTrack()->FirstHitID() + i]; - id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sliceTrack->Slice()][0]; - } else { - id = sliceTrack->OrigTrack()->OutTrackClusters()[i].GetId(); - } + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sliceTrack->Slice()]; + const GPUTPCHitId& ic = tracker.TrackHits()[sliceTrack->OrigTrack()->FirstHitID() + i]; + id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sliceTrack->Slice()][0]; } else { id = mClusters[trk.FirstClusterRef() + i].num; } @@ -251,9 +243,6 @@ void* GPUTPCGMMerger::SetPointersMerger(void* mem) { computePointerWithAlignment(mem, mSliceTrackInfos, mNTotalSliceTracks); computePointerWithAlignment(mem, mSliceTrackInfoIndex, NSLICES * 2 + 1); - if (mRec->GetParam().rec.nonConsecutiveIDs) { - computePointerWithAlignment(mem, mGlobalClusterIDs, mNMaxOutputTrackClusters); - } if (mRec->GetProcessingSettings().deterministicGPUReconstruction) { computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSliceTracks, mNMaxTracks * 2)); } @@ -263,7 +252,7 @@ void* GPUTPCGMMerger::SetPointersMerger(void* mem) computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSliceTracks); int32_t nTracks = 0; for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - const int32_t n = mRec->GetParam().rec.tpc.mergerReadFromTrackerDirectly ? *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks() : mkSlices[iSlice]->NTracks(); + const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); mBorder[iSlice] = mBorderMemory + 2 * nTracks; mBorder[NSLICES + iSlice] = mBorderMemory + 2 * nTracks + n; mBorderRange[iSlice] = mBorderRangeMemory + 2 * nTracks; @@ -296,14 +285,6 @@ void* GPUTPCGMMerger::SetPointersMemory(void* mem) } void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) -{ - if (mRec->GetProcessingSettings().fullMergerOnGPU) { - mem = SetPointersRefitScratch2(mem); - } - return mem; -} - -void* GPUTPCGMMerger::SetPointersRefitScratch2(void* mem) { computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks); if (mRec->GetProcessingSettings().mergerSortTracks) { @@ -323,9 +304,6 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters); } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); - if (!mRec->GetProcessingSettings().fullMergerOnGPU) { - mem = SetPointersRefitScratch2(mem); - } return mem; } @@ -367,10 +345,10 @@ void* GPUTPCGMMerger::SetPointersOutputO2Scratch(void* mem) void GPUTPCGMMerger::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMerger, (mRec->GetProcessingSettings().fullMergerOnGPU ? 0 : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMerger"); + mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersMerger, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMerger"); mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersRefitScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCMergerRefitScratch"); - mMemoryResOutput = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutput, (mRec->GetProcessingSettings().fullMergerOnGPU ? (mRec->GetProcessingSettings().createO2Output > 1 ? GPUMemoryResource::MEMORY_SCRATCH : GPUMemoryResource::MEMORY_OUTPUT) : GPUMemoryResource::MEMORY_INOUT) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutput"); - mMemoryResOutputState = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputState, (mRec->GetProcessingSettings().fullMergerOnGPU ? (mRec->GetProcessingSettings().outputSharedClusterMap ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_GPU) : GPUMemoryResource::MEMORY_HOST) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputState"); + mMemoryResOutput = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutput, (mRec->GetProcessingSettings().createO2Output > 1 ? GPUMemoryResource::MEMORY_SCRATCH : GPUMemoryResource::MEMORY_OUTPUT) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutput"); + mMemoryResOutputState = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputState, (mRec->GetProcessingSettings().outputSharedClusterMap ? GPUMemoryResource::MEMORY_OUTPUT : GPUMemoryResource::MEMORY_GPU) | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputState"); if (mRec->GetProcessingSettings().createO2Output) { mMemoryResOutputO2Scratch = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputO2Scratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputO2Scratch"); mMemoryResOutputO2 = mRec->RegisterMemoryAllocation(this, &GPUTPCGMMerger::SetPointersOutputO2, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_CUSTOM, "TPCMergerOutputO2"); @@ -388,9 +366,9 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNClusters = 0; mNMaxSingleSliceTracks = 0; for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t ntrk = mRec->GetParam().rec.tpc.mergerReadFromTrackerDirectly ? *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks() : mkSlices[iSlice]->NTracks(); + uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); mNTotalSliceTracks += ntrk; - mNClusters += mRec->GetParam().rec.tpc.mergerReadFromTrackerDirectly ? *mRec->GetConstantMem().tpcTrackers[iSlice].NTrackHits() : mkSlices[iSlice]->NTrackClusters(); + mNClusters += *mRec->GetConstantMem().tpcTrackers[iSlice].NTrackHits(); if (mNMaxSingleSliceTracks < ntrk) { mNMaxSingleSliceTracks = ntrk; } @@ -417,12 +395,12 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) int32_t GPUTPCGMMerger::CheckSlices() { for (int32_t i = 0; i < NSLICES; i++) { - if ((Param().rec.tpc.mergerReadFromTrackerDirectly ? mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks : mkSlices[i]->NLocalTracks()) > mNMaxSingleSliceTracks) { + if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSliceTracks) { throw std::runtime_error("mNMaxSingleSliceTracks too small"); } } - if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) && (!Param().rec.nonConsecutiveIDs || Param().rec.tpc.mergerReadFromTrackerDirectly)) { - throw std::runtime_error("Must run also slice tracking if nonConsecutiveIDs = false or mergerReadFromTrackerDirectly"); + if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { + throw std::runtime_error("Must run also slice tracking"); } return 0; } @@ -469,32 +447,18 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c for (int32_t i = start; i != end; i += incr) { float x, y, z; int32_t row, flags; - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[slice]; - const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i]; - int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()); - row = ic.RowIndex(); - const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[slice][0] + clusterIndex]; - flags = cl.getFlags(); - if (Param().par.earlyTpcTransform) { - x = tracker.Data().ClusterData()[clusterIndex].x; - y = tracker.Data().ClusterData()[clusterIndex].y; - z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset(); - } else { - GetConstantMem()->calibObjects.fastTransformHelper->Transform(slice, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); - } + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[slice]; + const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i]; + int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()); + row = ic.RowIndex(); + const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[slice][0] + clusterIndex]; + flags = cl.getFlags(); + if (Param().par.earlyTpcTransform) { + x = tracker.Data().ClusterData()[clusterIndex].x; + y = tracker.Data().ClusterData()[clusterIndex].y; + z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset(); } else { - const GPUTPCSliceOutCluster& clo = inTrack->OutTrackCluster(i); - row = clo.GetRow(); - flags = clo.GetFlags(); - if (Param().par.earlyTpcTransform) { - x = clo.GetX(); - y = clo.GetY(); - z = clo.GetZ() - trk.TZOffset(); - } else { - const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clo.GetId()]; - GetConstantMem()->calibObjects.fastTransformHelper->Transform(slice, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); - } + GetConstantMem()->calibObjects.fastTransformHelper->Transform(slice, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); } if (prop.PropagateToXAlpha(x, alpha, true)) { return way == 0; @@ -516,25 +480,16 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSliceTrack& track, int32_t iSlice, const GPUTPCTrack* sliceTr) { - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - const GPUTPCHitId& ic1 = trk.TrackHits()[sliceTr->FirstHitID()]; - const GPUTPCHitId& ic2 = trk.TrackHits()[sliceTr->FirstHitID() + sliceTr->NHits() - 1]; - int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); - int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); - if (Param().par.earlyTpcTransform) { - track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z); - } else { - const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; - track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); - } + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCHitId& ic1 = trk.TrackHits()[sliceTr->FirstHitID()]; + const GPUTPCHitId& ic2 = trk.TrackHits()[sliceTr->FirstHitID() + sliceTr->NHits() - 1]; + int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); + int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); + if (Param().par.earlyTpcTransform) { + track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z); } else { - if (Param().par.earlyTpcTransform) { - track.SetClusterZT(sliceTr->OutTrackClusters()->GetZ(), (sliceTr->OutTrackClusters() + sliceTr->NHits() - 1)->GetZ()); - } else { - const ClusterNative* cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; - track.SetClusterZT(cls[sliceTr->OutTrackClusters()->GetId()].getTime(), cls[(sliceTr->OutTrackClusters() + sliceTr->NHits() - 1)->GetId()].getTime()); - } + const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); } } @@ -548,14 +503,10 @@ GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; float alpha = Param().Alpha(iSlice); const GPUTPCTrack* sliceTr = mMemory->firstGlobalTracks[iSlice]; - uint32_t nLocalTracks = Param().rec.tpc.mergerReadFromTrackerDirectly ? trk.CommonMemory()->nLocalTracks : mkSlices[iSlice]->NLocalTracks(); - uint32_t nTracks = Param().rec.tpc.mergerReadFromTrackerDirectly ? *trk.NTracks() : mkSlices[iSlice]->NTracks(); + uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; + uint32_t nTracks = *trk.NTracks(); for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) { - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - sliceTr = &trk.Tracks()[itr]; - } else if (itr > nLocalTracks) { - sliceTr = sliceTr->GetNextTrack(); - } + sliceTr = &trk.Tracks()[itr]; int32_t localId = mTrackIDs[(sliceTr->LocalTrackId() >> 24) * mNMaxSingleSliceTracks + (sliceTr->LocalTrackId() & 0xFFFFFF)]; if (localId == -1) { continue; @@ -576,7 +527,7 @@ GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) { const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - uint32_t nLocalTracks = Param().rec.tpc.mergerReadFromTrackerDirectly ? trk.CommonMemory()->nLocalTracks : mkSlices[iSlice]->NLocalTracks(); + uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) { mTrackIDs[iSlice * mNMaxSingleSliceTracks + i] = -1; } @@ -585,17 +536,13 @@ GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, in GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) { const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - uint32_t nLocalTracks = Param().rec.tpc.mergerReadFromTrackerDirectly ? trk.CommonMemory()->nLocalTracks : mkSlices[iSlice]->NLocalTracks(); + uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = Param().rec.tpc.mergerReadFromTrackerDirectly ? nullptr : mkSlices[iSlice]->GetFirstTrack(); + const GPUTPCTrack* sliceTr = nullptr; for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) { - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - sliceTr = &trk.Tracks()[itr]; - } else if (itr) { - sliceTr = sliceTr->GetNextTrack(); - } + sliceTr = &trk.Tracks()[itr]; GPUTPCGMSliceTrack track; SetTrackClusterZT(track, iSlice, sliceTr); if (Param().rec.tpc.mergerCovSource == 0) { @@ -626,9 +573,6 @@ GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, mTrackIDs[iSlice * mNMaxSingleSliceTracks + sliceTr->LocalTrackId()] = myTrack; mSliceTrackInfos[myTrack] = track; } - if (!Param().rec.tpc.mergerReadFromTrackerDirectly) { - mMemory->firstGlobalTracks[iSlice] = nLocalTracks ? sliceTr->GetNextTrack() : mkSlices[iSlice]->GetFirstTrack(); - } } GPUd() void GPUTPCGMMerger::LinkGlobalTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) @@ -730,7 +674,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea { CADEBUG(GPUInfo("\nMERGING Slices %d %d NTracks %d %d CROSS %d", iSlice1, iSlice2, N1, N2, mergeMode)); GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + (Param().rec.tpc.mergerReadFromTrackerDirectly ? *GetConstantMem()->tpcTrackers[iSlice2].NTracks() : mkSlices[iSlice2]->NTracks()); + GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); bool sameSlice = (iSlice1 == iSlice2); for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) { GPUTPCGMBorderTrack& b = B1[itr]; @@ -774,7 +718,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea { #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + (Param().rec.tpc.mergerReadFromTrackerDirectly ? *GetConstantMem()->tpcTrackers[iSlice2].NTracks() : mkSlices[iSlice2]->NTracks()); + GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); if (iThread == 0) { if (iBlock == 0) { @@ -864,7 +808,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea bool sameSlice = (iSlice1 == iSlice2); GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + (Param().rec.tpc.mergerReadFromTrackerDirectly ? *GetConstantMem()->tpcTrackers[iSlice2].NTracks() : mkSlices[iSlice2]->NTracks()); + GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); int32_t i2 = 0; for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) { @@ -1326,10 +1270,6 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) { - if (Param().rec.nonConsecutiveIDs) { - return; - } - if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) { return; } @@ -1646,16 +1586,10 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { - if (Param().rec.tpc.mergerReadFromTrackerDirectly) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Slice()]; - const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; - uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Slice()][0]; - *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Slice(), t->Leg()}; - } else { - const GPUTPCSliceOutCluster& c = t->OrigTrack()->OutTrackClusters()[i]; - uint32_t id = Param().rec.nonConsecutiveIDs ? ((uint32_t)((uint32_t*)&c - (uint32_t*)mkSlices[t->Slice()]->GetFirstTrack())) : c.GetId(); - *c2 = trackCluster{id, c.GetRow(), t->Slice(), t->Leg()}; - } + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Slice()]; + const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; + uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Slice()][0]; + *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Slice(), t->Leg()}; } nHits += nTrackHits; } @@ -1771,19 +1705,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread for (int32_t i = 0; i < nHits; i++) { uint8_t state; - if (Param().rec.nonConsecutiveIDs) { - const GPUTPCSliceOutCluster* c = (const GPUTPCSliceOutCluster*)((const int32_t*)mkSlices[trackClusters[i].slice]->GetFirstTrack() + trackClusters[i].id); - clXYZ[i].x = c->GetX(); - clXYZ[i].y = c->GetY(); - clXYZ[i].z = c->GetZ(); - clXYZ[i].amp = c->GetAmp(); - trackClusters[i].id = c->GetId(); -#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME - cl[i] XYZ.pad = c->mPad; - cl[i] XYZ.time = c->mTime; -#endif - state = c->GetFlags(); - } else if (Param().par.earlyTpcTransform) { + if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].slice].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].slice].Data().ClusterIdOffset()]; clXYZ[i].x = c.x; clXYZ[i].y = c.y; @@ -1800,16 +1722,10 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags cl[i].row = trackClusters[i].row; - if (!Param().rec.nonConsecutiveIDs) // We already have global consecutive numbers from the slice tracker, and we need to keep them for late cluster attachment - { - cl[i].num = trackClusters[i].id; - } else { // Produce consecutive numbers for shared cluster flagging - cl[i].num = iOutTrackFirstCluster + i; - mGlobalClusterIDs[cl[i].num] = trackClusters[i].id; - } + cl[i].num = trackClusters[i].id; cl[i].slice = trackClusters[i].slice; cl[i].leg = trackClusters[i].leg; - } // nHits + } uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u); if (iOutputTrack >= mNMaxTracks) { @@ -2052,17 +1968,11 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - if (Param().rec.nonConsecutiveIDs) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { - mClusters[i].num = mGlobalClusterIDs[i]; - } - } else { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { - mTrackSort[mTrackOrderAttach[i]] = i; - } - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { - mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below - } + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + mTrackSort[mTrackOrderAttach[i]] = i; + } + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { + mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index a9b510e1714ba..3e4ae535fb740 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -98,7 +98,6 @@ class GPUTPCGMMerger : public GPUProcessor void SetMaxData(const GPUTrackingInOutPointers& io); void* SetPointersMerger(void* mem); void* SetPointersRefitScratch(void* mem); - void* SetPointersRefitScratch2(void* mem); void* SetPointersOutput(void* mem); void* SetPointersOutputO2(void* mem); void* SetPointersOutputO2Clus(void* mem); @@ -107,8 +106,6 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); - void SetSliceData(int32_t index, const GPUTPCSliceOutput* sliceData) { mkSlices[index] = sliceData; } - GPUhdi() int32_t NOutputTracks() const { return mMemory->nOutputTracks; } GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } @@ -246,8 +243,6 @@ class GPUTPCGMMerger : public GPUProcessor int32_t mNextSliceInd[NSLICES]; int32_t mPrevSliceInd[NSLICES]; - const GPUTPCSliceOutput* mkSlices[NSLICES]; //* array of input slice tracks - int32_t* mTrackLinks; int32_t* mTrackCCRoots; // root of the connected component of this track @@ -273,7 +268,6 @@ class GPUTPCGMMerger : public GPUProcessor int32_t* mSliceTrackInfoIndex; GPUTPCGMMergedTrackHit* mClusters; GPUTPCGMMergedTrackHitXYZ* mClustersXYZ; - int32_t* mGlobalClusterIDs; GPUAtomic(uint32_t) * mClusterAttachment; o2::tpc::TrackTPC* mOutputTracksTPCO2; uint32_t* mOutputClusRefsTPCO2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index a59af7529a97d..0463966c582a5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -67,7 +67,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSlice, int GPUTPCGMBorderTrack *b1, *b2; int32_t jSlice; MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, i, withinSlice, mergeMode); - const int32_t nTrk = Param().rec.tpc.mergerReadFromTrackerDirectly ? *mRec->GetConstantMem().tpcTrackers[jSlice].NTracks() : mkSlices[jSlice]->NTracks(); + const int32_t nTrk = *mRec->GetConstantMem().tpcTrackers[jSlice].NTracks(); const gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = BorderRange(i); const gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = BorderRange(jSlice) + nTrk; out << "\nBorder Tracks : i " << i << " withinSlice " << withinSlice << " mergeMode " << mergeMode << "\n"; @@ -174,7 +174,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const } out << "\n"; } - uint32_t maxId = Param().rec.nonConsecutiveIDs ? mMemory->nOutputTrackClusters : mNMaxClusters; + uint32_t maxId = mNMaxClusters; uint32_t j = 0; for (uint32_t i = 0; i < maxId; i++) { if ((mClusterAttachment[i] & attachFlagMask) != 0) { @@ -225,7 +225,7 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const } out << "\n"; } - uint32_t maxId = Param().rec.nonConsecutiveIDs ? mMemory->nOutputTrackClusters : mNMaxClusters; + uint32_t maxId = mNMaxClusters; uint32_t j = 0; for (uint32_t i = 0; i < maxId; i++) { if ((mClusterAttachment[i] & attachFlagMask) != 0) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx index 3c774b13ce5b1..6c8641517b80d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx @@ -95,26 +95,15 @@ GPUd() void GPUTPCGMSliceTrack::SetParam2(const GPUTPCGMTrackParam& trk) GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSlice, float maxSinPhi, float sinPhiMargin) { float lastX; - if (merger->Param().par.earlyTpcTransform && !merger->Param().rec.tpc.mergerReadFromTrackerDirectly) { - lastX = mOrigTrack->OutTrackCluster(mOrigTrack->NHits() - 1).GetX(); // TODO: Why is this needed, Row2X should work, but looses some tracks - } else { - //float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: again, why does this reduce efficiency? - float y, z; - const GPUTPCSliceOutCluster* clo; - int32_t row, index; - if (merger->Param().rec.tpc.mergerReadFromTrackerDirectly) { - const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSlice]; - const GPUTPCHitId& ic = trk.TrackHits()[mOrigTrack->FirstHitID() + mOrigTrack->NHits() - 1]; - index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; - row = ic.RowIndex(); - } else { - clo = &mOrigTrack->OutTrackCluster(mOrigTrack->NHits() - 1); - index = clo->GetId(); - row = clo->GetRow(); - } - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[index]; - GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSlice, row, cl.getPad(), cl.getTime(), lastX, y, z); - } + // float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks + float y, z; + int32_t row, index; + const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCHitId& ic = trk.TrackHits()[mOrigTrack->FirstHitID() + mOrigTrack->NHits() - 1]; + index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + row = ic.RowIndex(); + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[index]; + GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSlice, row, cl.getPad(), cl.getTime(), lastX, y, z); const int32_t N = 3; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index c038146cf8497..d5a941b333c6e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -64,7 +64,7 @@ void GPUTPCTracker::InitializeProcessor() bool GPUTPCTracker::SliceDataOnGPU() { - return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) && (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) && mRec->GetParam().rec.tpc.mergerReadFromTrackerDirectly && (mRec->GetConstantMem().ioPtrs.clustersNative || mRec->GetConstantMem().ioPtrs.tpcZS || mRec->GetConstantMem().ioPtrs.tpcPackedDigits); + return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) && (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) && (mRec->GetConstantMem().ioPtrs.clustersNative || mRec->GetConstantMem().ioPtrs.tpcZS || mRec->GetConstantMem().ioPtrs.tpcPackedDigits); } void* GPUTPCTracker::SetPointersDataInput(void* mem) { return mData.SetPointersInput(mem, mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging, SliceDataOnGPU()); } @@ -117,7 +117,7 @@ void GPUTPCTracker::RegisterMemoryAllocation() mMemoryResCommon = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersCommon, GPUMemoryResource::MEMORY_PERMANENT, "TPCTrackerCommon"); mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataRows, GPUMemoryResource::MEMORY_PERMANENT, "TPCSliceRows"); - uint32_t type = mRec->GetProcessingSettings().fullMergerOnGPU ? GPUMemoryResource::MEMORY_SCRATCH : GPUMemoryResource::MEMORY_OUTPUT; + uint32_t type = GPUMemoryResource::MEMORY_SCRATCH; if (mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { // For individual scheme, we allocate tracklets separately, and change the type for the following allocations to custom type |= GPUMemoryResource::MEMORY_CUSTOM; mMemoryResTracklets = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersTracklets, type, "TPCTrackerTracklets"); diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 4bfcc312e27e7..e6017788144e0 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -164,7 +164,7 @@ int32_t ReadConfiguration(int argc, char** argv) } #endif #ifndef GPUCA_TPC_GEOMETRY_O2 - configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0; +#error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed? configStandalone.proc.ompKernels = false; configStandalone.proc.createO2Output = 0; if (configStandalone.rundEdx == -1) { @@ -412,7 +412,7 @@ int32_t SetupReconstruction() } steps.outputs.clear(); - steps.outputs.setBits(GPUDataTypes::InOutType::TPCSectorTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCSliceTracking) && !recSet.tpc.mergerReadFromTrackerDirectly); + steps.outputs.setBits(GPUDataTypes::InOutType::TPCSectorTracks, false); steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging)); steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression)); steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)); diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 2aa0611b33779..70a093c7f1de7 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -909,11 +909,6 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx bool mcAvail = mcPresent() || tracksExtMC; - if (mcAvail && !tracksExtMC && mTracking->GetParam().rec.nonConsecutiveIDs) { - GPUError("QA incompatible to non-consecutive MC labels"); - return; - } - if (mcAvail) { // Assign Track MC Labels timer.Start(); From dc7e8e9625a4b61888d1f5bcd383e9712e999423 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 4 Feb 2025 01:44:02 +0100 Subject: [PATCH 0048/1914] GPU: Remove support for host helper threads (no longer used) --- Common/Topologies/o2prototype_topology.xml | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 - GPU/GPUTracking/Base/GPUReconstructionCPU.h | 10 +- .../Base/GPUReconstructionDeviceBase.cxx | 139 ------------------ .../Base/GPUReconstructionDeviceBase.h | 17 +-- .../Base/GPUReconstructionHelpers.h | 50 ------- GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 - GPU/GPUTracking/Global/GPUChain.h | 13 -- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- .../Global/GPUChainTrackingSliceTracker.cxx | 65 +------- 11 files changed, 8 insertions(+), 300 deletions(-) delete mode 100644 GPU/GPUTracking/Base/GPUReconstructionHelpers.h diff --git a/Common/Topologies/o2prototype_topology.xml b/Common/Topologies/o2prototype_topology.xml index 240b8d87d469a..8d53c9eb0127a 100644 --- a/Common/Topologies/o2prototype_topology.xml +++ b/Common/Topologies/o2prototype_topology.xml @@ -74,7 +74,7 @@ The following parameters need adjustment when extending the FLP-EPN configuratio - $ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -GPUHelperThreads 4 -loglevel=0x7c' + $ALICEO2_INSTALL_DIR/bin/aliceHLTWrapper Tracker_%collectionIndex%_%taskIndex% 1 --dds --poll-period 100 --input type=pull,size=5000,method=connect,property=EPNReceiverOutputAddress,count=1 --output type=push,size=500,method=bind,property=TrackingOutputAddress,min-port=48000 --library libAliHLTTPC.so --component TPCCATracker --run 167808 --parameter '-GlobalTracking -allowGPU -loglevel=0x7c' EPNReceiverOutputAddress diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 1496300818fd8..270f092a1fd29 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -278,9 +278,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) { mProcessingSettings.mergerSortTracks = false; } - if (!IsGPU()) { - mProcessingSettings.nDeviceHelperThreads = 0; - } if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) { mProcessingSettings.delayedOutput = false; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 8cc753731d074..27959382e7b67 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -16,7 +16,6 @@ #define GPURECONSTRUCTIONICPU_H #include "GPUReconstruction.h" -#include "GPUReconstructionHelpers.h" #include "GPUConstantMem.h" #include #include "utils/timer.h" @@ -117,13 +116,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernelsPtr(), res->PtrDevice()); } size_t TransferMemoryResourceToHost(GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { return TransferMemoryInternal(res, stream, ev, evList, nEvents, false, res->PtrDevice(), res->Ptr()); } @@ -294,7 +286,7 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id); if (timer == nullptr) { - int32_t max = std::max({getOMPMaxThreads(), mProcessingSettings.nDeviceHelperThreads + 1, mProcessingSettings.nStreams}); + int32_t max = std::max({getOMPMaxThreads(), mProcessingSettings.nStreams}); timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); } if (num == -1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 3522095622ad4..91715fab4f668 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -41,57 +41,6 @@ GPUReconstructionDeviceBase::GPUReconstructionDeviceBase(const GPUSettingsDevice GPUReconstructionDeviceBase::~GPUReconstructionDeviceBase() = default; -void* GPUReconstructionDeviceBase::helperWrapper_static(void* arg) -{ - GPUReconstructionHelpers::helperParam* par = (GPUReconstructionHelpers::helperParam*)arg; - GPUReconstructionDeviceBase* cls = par->cls; - return cls->helperWrapper(par); -} - -void* GPUReconstructionDeviceBase::helperWrapper(GPUReconstructionHelpers::helperParam* par) -{ - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("\tHelper thread %d starting", par->num); - } - - // cpu_set_t mask; //TODO add option - // CPU_ZERO(&mask); - // CPU_SET(par->num * 2 + 2, &mask); - // sched_setaffinity(0, sizeof(mask), &mask); - - par->mutex[0].lock(); - while (par->terminate == false) { - for (int32_t i = par->num + 1; i < par->count; i += mProcessingSettings.nDeviceHelperThreads + 1) { - // if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Running, Slice %d+%d, Phase %d", par->num, i, par->phase); - if ((par->functionCls->*par->function)(i, par->num + 1, par)) { - par->error = 1; - } - if (par->reset) { - break; - } - par->done = i + 1; - // if (mProcessingSettings.debugLevel >= 3) GPUInfo("\tHelper Thread %d Finished, Slice %d+%d, Phase %d", par->num, i, par->phase); - } - ResetThisHelperThread(par); - par->mutex[0].lock(); - } - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("\tHelper thread %d terminating", par->num); - } - par->mutex[1].unlock(); - pthread_exit(nullptr); - return (nullptr); -} - -void GPUReconstructionDeviceBase::ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par) -{ - if (par->reset) { - GPUImportant("GPU Helper Thread %d reseting", par->num); - } - par->reset = false; - par->mutex[1].unlock(); -} - int32_t GPUReconstructionDeviceBase::GetGlobalLock(void*& pLock) { #ifdef _WIN32 @@ -138,86 +87,6 @@ void GPUReconstructionDeviceBase::ReleaseGlobalLock(void* sem) #endif } -void GPUReconstructionDeviceBase::ResetHelperThreads(int32_t helpers) -{ - GPUImportant("Error occurred, GPU tracker helper threads will be reset (Number of threads %d (%d))", mProcessingSettings.nDeviceHelperThreads, mNSlaveThreads); - SynchronizeGPU(); - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - mHelperParams[i].reset = true; - if (helpers || i >= mProcessingSettings.nDeviceHelperThreads) { - pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]); - } - } - GPUImportant("GPU Tracker helper threads have ben reset"); -} - -int32_t GPUReconstructionDeviceBase::StartHelperThreads() -{ - int32_t nThreads = mProcessingSettings.nDeviceHelperThreads; - if (nThreads) { - mHelperParams = new GPUReconstructionHelpers::helperParam[nThreads]; - if (mHelperParams == nullptr) { - GPUError("Memory allocation error"); - ExitDevice(); - return (1); - } - for (int32_t i = 0; i < nThreads; i++) { - mHelperParams[i].cls = this; - mHelperParams[i].terminate = false; - mHelperParams[i].reset = false; - mHelperParams[i].num = i; - for (int32_t j = 0; j < 2; j++) { - mHelperParams[i].mutex[j].lock(); - } - - if (pthread_create(&mHelperParams[i].threadId, nullptr, helperWrapper_static, &mHelperParams[i])) { - GPUError("Error starting slave thread"); - ExitDevice(); - return (1); - } - } - } - mNSlaveThreads = nThreads; - return (0); -} - -int32_t GPUReconstructionDeviceBase::StopHelperThreads() -{ - if (mNSlaveThreads) { - for (int32_t i = 0; i < mNSlaveThreads; i++) { - mHelperParams[i].terminate = true; - mHelperParams[i].mutex[0].unlock(); - mHelperParams[i].mutex[1].lock(); - if (pthread_join(mHelperParams[i].threadId, nullptr)) { - GPUError("Error waiting for thread to terminate"); - return (1); - } - } - delete[] mHelperParams; - } - mNSlaveThreads = 0; - return (0); -} - -void GPUReconstructionDeviceBase::WaitForHelperThreads() -{ - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - pthread_mutex_lock(&((pthread_mutex_t*)mHelperParams[i].mutex)[1]); - } -} - -void GPUReconstructionDeviceBase::RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t i, int32_t t, GPUReconstructionHelpers::helperParam* p), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) -{ - for (int32_t i = 0; i < mProcessingSettings.nDeviceHelperThreads; i++) { - mHelperParams[i].done = 0; - mHelperParams[i].error = 0; - mHelperParams[i].function = function; - mHelperParams[i].functionCls = functionCls; - mHelperParams[i].count = count; - pthread_mutex_unlock(&((pthread_mutex_t*)mHelperParams[i].mutex)[0]); - } -} - int32_t GPUReconstructionDeviceBase::InitDevice() { // cpu_set_t mask; @@ -262,10 +131,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice() mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors"); AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors); - if (StartHelperThreads()) { - return (1); - } - if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) { GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message! } @@ -282,10 +147,6 @@ void* GPUReconstructionDeviceBase::GPUProcessorProcessors::SetPointersDeviceProc int32_t GPUReconstructionDeviceBase::ExitDevice() { - if (StopHelperThreads()) { - return (1); - } - int32_t retVal = ExitDevice_Runtime(); mProcessorsShadow = nullptr; mHostMemoryPool = mHostMemoryBase = mDeviceMemoryPool = mDeviceMemoryBase = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = mHostMemoryPermanent = mDeviceMemoryPermanent = nullptr; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 215615f558442..1381fd0f76981 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -17,7 +17,6 @@ #include "GPUReconstructionCPU.h" #include -#include "GPUReconstructionHelpers.h" #include "GPUChain.h" #include @@ -61,24 +60,10 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU size_t GPUMemCpyAlways(bool onGpu, void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override; size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override = 0; - int32_t StartHelperThreads() override; - int32_t StopHelperThreads() override; - void RunHelperThreads(int32_t (GPUReconstructionHelpers::helperDelegateBase::*function)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*), GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) override; - int32_t HelperError(int32_t iThread) const override { return mHelperParams[iThread].error; } - int32_t HelperDone(int32_t iThread) const override { return mHelperParams[iThread].done; } - void WaitForHelperThreads() override; - void ResetHelperThreads(int32_t helpers) override; - void ResetThisHelperThread(GPUReconstructionHelpers::helperParam* par); - int32_t GetGlobalLock(void*& pLock); void ReleaseGlobalLock(void* sem); - static void* helperWrapper_static(void* arg); - void* helperWrapper(GPUReconstructionHelpers::helperParam* par); - - int32_t mDeviceId = -1; // Device ID used by backend - GPUReconstructionHelpers::helperParam* mHelperParams = nullptr; // Control Struct for helper threads - int32_t mNSlaveThreads = 0; // Number of slave threads currently active + int32_t mDeviceId = -1; // Device ID used by backend struct DebugEvents { deviceEvent DebugStart, DebugStop; // Debug timer events diff --git a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h b/GPU/GPUTracking/Base/GPUReconstructionHelpers.h deleted file mode 100644 index c55e81905f32f..0000000000000 --- a/GPU/GPUTracking/Base/GPUReconstructionHelpers.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionHelpers.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONHELPERS_H -#define GPURECONSTRUCTIONHELPERS_H - -#include - -namespace o2 -{ -namespace gpu -{ -class GPUReconstructionDeviceBase; -class GPUReconstructionHelpers -{ - public: - class helperDelegateBase - { - }; - - struct helperParam { - pthread_t threadId; - GPUReconstructionDeviceBase* cls; - int32_t num; - std::mutex mutex[2]; - int8_t terminate; - helperDelegateBase* functionCls; - int32_t (helperDelegateBase::*function)(int32_t, int32_t, helperParam*); - int32_t phase; - int32_t count; - volatile int32_t done; - volatile int8_t error; - volatile int8_t reset; - }; -}; -} // namespace gpu -} // namespace o2 - -#endif diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 5dd92d41db29b..6acc7fd1dd537 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -104,7 +104,6 @@ set(HDRS_INSTALL Base/GPUConstantMem.h Base/GPUParam.inc Base/GPUParamRTC.h - Base/GPUReconstructionHelpers.h Base/GPUReconstructionIncludes.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index c10793975453d..ca6f2f370300e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -252,7 +252,6 @@ AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0, AddOption(ompThreads, int32_t, -1, "omp", 't', "Number of OMP threads to run (-1: all)", min(-1), message("Using %s OMP threads")) AddOption(ompKernels, uint8_t, 2, "", 0, "Parallelize with OMP inside kernels instead of over slices, 2 for nested parallelization over TPC sectors and inside kernels") AddOption(ompAutoNThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data") -AddOption(nDeviceHelperThreads, int32_t, 1, "", 0, "Number of CPU helper threads for CPU processing") AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 06650f9d9c733..0981fea43810a 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -16,7 +16,6 @@ #define GPUCHAIN_H #include "GPUReconstructionCPU.h" -#include "GPUReconstructionHelpers.h" namespace o2 { @@ -111,12 +110,6 @@ class GPUChain } } inline void StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents = 1) { mRec->StreamWaitForEvents(stream, evList, nEvents); } - template - void RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count); - inline void WaitForHelperThreads() { mRec->WaitForHelperThreads(); } - inline int32_t HelperError(int32_t iThread) const { return mRec->HelperError(iThread); } - inline int32_t HelperDone(int32_t iThread) const { return mRec->HelperDone(iThread); } - inline void ResetHelperThreads(int32_t helpers) { mRec->ResetHelperThreads(helpers); } inline int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1) { return mRec->GPUDebug(state, stream); } // nEvents is forced to 0 if evList == nullptr inline void TransferMemoryResourceToGPU(RecoStep step, GPUMemoryResource* res, int32_t stream = -1, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) { timeCpy(step, true, &GPUReconstructionCPU::TransferMemoryResourceToGPU, res, stream, ev, evList, nEvents); } @@ -242,12 +235,6 @@ class GPUChain void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args); }; -template -inline void GPUChain::RunHelperThreads(T function, GPUReconstructionHelpers::helperDelegateBase* functionCls, int32_t count) -{ - mRec->RunHelperThreads((int32_t(GPUReconstructionHelpers::helperDelegateBase::*)(int32_t, int32_t, GPUReconstructionHelpers::helperParam*))function, functionCls, count); -} - template inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args) { diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 6d6d82b518097..d827b095773b1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -16,7 +16,6 @@ #define GPUCHAINTRACKING_H #include "GPUChain.h" -#include "GPUReconstructionHelpers.h" #include "GPUDataTypes.h" #include #include @@ -68,7 +67,7 @@ struct GPUTPCCFChainContext; struct GPUNewCalibValues; struct GPUTriggerOutputs; -class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelegateBase +class GPUChainTracking : public GPUChain { friend class GPUReconstruction; @@ -314,15 +313,11 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); - std::atomic_flag mLockAtomicOutputBuffer = ATOMIC_FLAG_INIT; std::mutex mMutexUpdateCalib; std::unique_ptr mPipelineFinalizationCtx; GPUChainTrackingFinalContext* mPipelineNotifyCtx = nullptr; std::function mWaitForFinalInputs; - int32_t HelperReadEvent(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par); - int32_t HelperOutput(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par); - int32_t OutputStream() const { return mRec->NStreams() - 2; } }; } // namespace gpu diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index 35a8c6c455048..174b3757d3307 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -55,9 +55,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices() if (retVal) { SynchronizeGPU(); } - if (retVal >= 2) { - ResetHelperThreads(retVal >= 3); - } return (retVal != 0); } @@ -114,9 +111,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() processorsShadow()->tpcTrackers[iSlice].SetGPUTextureBase(mRec->DeviceMemoryBase()); } - if (!doSliceDataOnGPU) { - RunHelperThreads(&GPUChainTracking::HelperReadEvent, this, NSLICES); - } if (PrepareTextures()) { return (2); } @@ -183,22 +177,12 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; - } else if (!doGPU || iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) == 0) { + } else { if (ReadEvent(iSlice, 0)) { GPUError("Error reading event"); error = 1; continue; } - } else { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Waiting for helper thread %d", iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1); - } - while (HelperDone(iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1) < (int32_t)iSlice) { - } - if (HelperError(iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) - 1)) { - error = 1; - continue; - } } if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); @@ -297,9 +281,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (doGPU) { ReleaseEvent(mEvents->init); } - if (!doSliceDataOnGPU) { - WaitForHelperThreads(); - } if (!GetProcessingSettings().trackletSelectorInPipeline) { if (GetProcessingSettings().trackletConstructorInPipeline) { @@ -359,7 +340,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (param().rec.tpc.globalTracking) { mWriteOutputDone.fill(0); } - RunHelperThreads(&GPUChainTracking::HelperOutput, this, NSLICES); uint32_t tmpSlice = 0; for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { @@ -402,12 +382,12 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for slice %d, helper thread %d", iSlice, iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1)); + GPUInfo("Data ready for slice %d", iSlice); } mSliceSelectorReady = iSlice; if (param().rec.tpc.globalTracking) { - for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a += GetProcessingSettings().nDeviceHelperThreads + 1) { + for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a++) { uint32_t tmpSlice2 = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(tmpSlice2a); uint32_t sliceLeft, sliceRight; GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); @@ -419,12 +399,9 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } } else { - if (iSlice % (GetProcessingSettings().nDeviceHelperThreads + 1) == 0) { - WriteOutput(iSlice, 0); - } + WriteOutput(iSlice, 0); } } - WaitForHelperThreads(); } if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.globalTracking) { std::vector blocking(NSLICES * mRec->NStreams()); @@ -518,43 +495,9 @@ void GPUChainTracking::WriteOutput(int32_t iSlice, int32_t threadId) if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Running WriteOutput for slice %d on thread %d\n", iSlice, threadId); } - if (GetProcessingSettings().nDeviceHelperThreads) { - while (mLockAtomicOutputBuffer.test_and_set(std::memory_order_acquire)) { - } - } processors()->tpcTrackers[iSlice].WriteOutputPrepare(); - if (GetProcessingSettings().nDeviceHelperThreads) { - mLockAtomicOutputBuffer.clear(); - } processors()->tpcTrackers[iSlice].WriteOutput(); if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Finished WriteOutput for slice %d on thread %d\n", iSlice, threadId); } } - -int32_t GPUChainTracking::HelperReadEvent(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par) { return ReadEvent(iSlice, threadId); } - -int32_t GPUChainTracking::HelperOutput(int32_t iSlice, int32_t threadId, GPUReconstructionHelpers::helperParam* par) -{ - if (param().rec.tpc.globalTracking) { - uint32_t tmpSlice = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(iSlice); - uint32_t sliceLeft, sliceRight; - GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); - - while (mSliceSelectorReady < (int32_t)tmpSlice || mSliceSelectorReady < (int32_t)sliceLeft || mSliceSelectorReady < (int32_t)sliceRight) { - if (par->reset) { - return 1; - } - } - GlobalTracking(tmpSlice, 0); - WriteOutput(tmpSlice, 0); - } else { - while (mSliceSelectorReady < iSlice) { - if (par->reset) { - return 1; - } - } - WriteOutput(iSlice, threadId); - } - return 0; -} From 359b736df8c56b693dbba7605b211909f676ca23 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Wed, 29 Jan 2025 17:21:05 +0100 Subject: [PATCH 0049/1914] Expand trigger macro variable in hybrid --- Generators/src/GeneratorHybrid.cxx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index 03a78eb852eb6..296f69815e4f0 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -192,13 +192,14 @@ Bool_t GeneratorHybrid::Init() if (mTriggerMacros[count][trg].empty() || mTriggerFuncs[count][trg].empty()) { continue; } + std::string expandedMacro = o2::utils::expandShellVarsInFileName(mTriggerMacros[count][trg]); LOG(info) << "Setting trigger " << trg << " of generator " << gen << " with following parameters"; - LOG(info) << "Macro filename: " << mTriggerMacros[count][trg]; + LOG(info) << "Macro filename: " << expandedMacro; LOG(info) << "Function name: " << mTriggerFuncs[count][trg]; - trigger = o2::conf::GetFromMacro(mTriggerMacros[count][trg], mTriggerFuncs[count][trg], "o2::eventgen::Trigger", "trigger"); + trigger = o2::conf::GetFromMacro(expandedMacro, mTriggerFuncs[count][trg], "o2::eventgen::Trigger", "trigger"); if (!trigger) { LOG(info) << "Trying to retrieve a \'o2::eventgen::DeepTrigger\' type"; - deeptrigger = o2::conf::GetFromMacro(mTriggerMacros[count][trg], mTriggerFuncs[count][trg], "o2::eventgen::DeepTrigger", "deeptrigger"); + deeptrigger = o2::conf::GetFromMacro(expandedMacro, mTriggerFuncs[count][trg], "o2::eventgen::DeepTrigger", "deeptrigger"); } if (!trigger && !deeptrigger) { LOG(warn) << "Failed to retrieve \'external trigger\': problem with configuration"; From 7f12fad71043b7ecc4759a7f599f607663b4b686 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Sun, 2 Feb 2025 20:01:00 +0100 Subject: [PATCH 0050/1914] Added static variable for number of events in Generator --- Generators/include/Generators/Generator.h | 6 ++++++ Generators/include/Generators/GeneratorHybrid.h | 3 --- Generators/src/Generator.cxx | 2 +- Generators/src/GeneratorFactory.cxx | 1 - Generators/src/GeneratorHybrid.cxx | 2 +- Generators/src/GeneratorService.cxx | 2 ++ macro/o2sim.C | 4 ++++ run/O2PrimaryServerDevice.h | 5 +++++ run/dpl_eventgen.cxx | 7 +++++++ 9 files changed, 26 insertions(+), 6 deletions(-) diff --git a/Generators/include/Generators/Generator.h b/Generators/include/Generators/Generator.h index 7181bcbc6682d..640cc80931862 100644 --- a/Generators/include/Generators/Generator.h +++ b/Generators/include/Generators/Generator.h @@ -85,9 +85,12 @@ class Generator : public FairGenerator void setTriggerMode(ETriggerMode_t val) { mTriggerMode = val; }; void addTrigger(Trigger trigger) { mTriggers.push_back(trigger); }; void addDeepTrigger(DeepTrigger trigger) { mDeepTriggers.push_back(trigger); }; + // setter for global number of events + static void setTotalNEvents(unsigned int& n) { gTotalNEvents = n; } /** getters **/ const std::vector& getParticles() const { return mParticles; }; //! + static unsigned int getTotalNEvents() { return gTotalNEvents; }; /** other **/ void clearParticles() { mParticles.clear(); }; @@ -152,6 +155,9 @@ class Generator : public FairGenerator // the current ID of the sub-generator used in the current event (if applicable) int mSubGeneratorId = -1; + // global static information about (upper limit of) number of events to be generated + static unsigned int gTotalNEvents; + ClassDefOverride(Generator, 2); }; /** class Generator **/ diff --git a/Generators/include/Generators/GeneratorHybrid.h b/Generators/include/Generators/GeneratorHybrid.h index 21f669776d944..955240d6a28fa 100644 --- a/Generators/include/Generators/GeneratorHybrid.h +++ b/Generators/include/Generators/GeneratorHybrid.h @@ -63,8 +63,6 @@ class GeneratorHybrid : public Generator Bool_t importParticles() override; void updateHeader(o2::dataformats::MCEventHeader* eventHeader) override; - void setNEvents(int n) { mNEvents = n; } - Bool_t parseJSON(const std::string& path); Bool_t confSetter(const auto& gen); template @@ -116,7 +114,6 @@ class GeneratorHybrid : public Generator std::atomic mStopFlag; bool mIsInitialized = false; - int mNEvents = -1; // the number of events to be done, if known (helps initiating cleanup) o2::dataformats::MCEventHeader mMCEventHeader; // to capture event headers enum class GenMode { diff --git a/Generators/src/Generator.cxx b/Generators/src/Generator.cxx index ed7bf0a99bbe3..9204ede98215e 100644 --- a/Generators/src/Generator.cxx +++ b/Generators/src/Generator.cxx @@ -29,7 +29,7 @@ namespace eventgen { std::atomic Generator::InstanceCounter{0}; - +unsigned int Generator::gTotalNEvents = 0; /*****************************************************************/ /*****************************************************************/ diff --git a/Generators/src/GeneratorFactory.cxx b/Generators/src/GeneratorFactory.cxx index 5db1354a12908..8a6001b2cd5e6 100644 --- a/Generators/src/GeneratorFactory.cxx +++ b/Generators/src/GeneratorFactory.cxx @@ -285,7 +285,6 @@ void GeneratorFactory::setPrimaryGenerator(o2::conf::SimConfig const& conf, Fair return; } auto hybrid = new o2::eventgen::GeneratorHybrid(config); - hybrid->setNEvents(conf.getNEvents()); primGen->AddGenerator(hybrid); #endif } else { diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index 296f69815e4f0..f968a9c4b3513 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -408,7 +408,7 @@ bool GeneratorHybrid::importParticles() mseqCounter++; mEventCounter++; - if (mEventCounter == mNEvents) { + if (mEventCounter == getTotalNEvents()) { LOG(info) << "HybridGen: Stopping TBB task pool"; mStopFlag = true; } diff --git a/Generators/src/GeneratorService.cxx b/Generators/src/GeneratorService.cxx index 21c25aeb73720..902b482dc839b 100644 --- a/Generators/src/GeneratorService.cxx +++ b/Generators/src/GeneratorService.cxx @@ -12,6 +12,7 @@ #include "Generators/GeneratorService.h" #include "Generators/GeneratorFactory.h" #include "SimConfig/SimConfig.h" +#include "Generators/Generator.h" #include "DataFormatsCalibration/MeanVertexObject.h" using namespace o2::eventgen; @@ -23,6 +24,7 @@ void GeneratorService::initService(std::string const& genName, auto localSimConfig = o2::conf::SimConfig::make(); localSimConfig.getConfigData().mGenerator = genName; localSimConfig.getConfigData().mTrigger = triggerName; + localSimConfig.getConfigData().mNEvents = o2::eventgen::Generator::getTotalNEvents(); o2::eventgen::GeneratorFactory::setPrimaryGenerator(localSimConfig, &mPrimGen); diff --git a/macro/o2sim.C b/macro/o2sim.C index 4bd2ff4e4d9cb..7d5faa544249c 100644 --- a/macro/o2sim.C +++ b/macro/o2sim.C @@ -13,6 +13,7 @@ #if !defined(__CLING__) || defined(__ROOTCLING__) #include #include +#include #include "SimulationDataFormat/O2DatabasePDG.h" #include "SimulationDataFormat/MCEventHeader.h" #include @@ -61,6 +62,9 @@ void check_notransport() FairRunSim* o2sim_init(bool asservice, bool evalmat = false) { auto& confref = o2::conf::SimConfig::Instance(); + // set the global information about the number of events to be generated + unsigned int nTotalEvents = confref.getNEvents(); + o2::eventgen::Generator::setTotalNEvents(nTotalEvents); // initialize CCDB service auto& ccdbmgr = o2::ccdb::BasicCCDBManager::instance(); // fix the timestamp early diff --git a/run/O2PrimaryServerDevice.h b/run/O2PrimaryServerDevice.h index 1db1109f573e8..4b313a7ca9499 100644 --- a/run/O2PrimaryServerDevice.h +++ b/run/O2PrimaryServerDevice.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,10 @@ class O2PrimaryServerDevice final : public fair::mq::Device ccdbmgr.setURL(conf.getConfigData().mCCDBUrl); ccdbmgr.setTimestamp(conf.getTimestamp()); + // set the global information about the number of events to be generated + unsigned int nTotalEvents = conf.getNEvents(); + o2::eventgen::Generator::setTotalNEvents(nTotalEvents); + // init magnetic field as it might be needed by the generator if (TGeoGlobalMagField::Instance()->GetField() == nullptr) { TGeoGlobalMagField::Instance()->SetField(o2::base::SimFieldUtils::createMagField()); diff --git a/run/dpl_eventgen.cxx b/run/dpl_eventgen.cxx index c033aafddc367..6202e965f3e8a 100644 --- a/run/dpl_eventgen.cxx +++ b/run/dpl_eventgen.cxx @@ -16,6 +16,7 @@ #include "SimulationDataFormat/MCTrack.h" #include "Framework/runDataProcessing.h" #include +#include #include #include #include // simple timer from ROOT @@ -63,6 +64,12 @@ struct GeneratorTask { // update config key params o2::conf::ConfigurableParam::updateFromFile(iniFile); o2::conf::ConfigurableParam::updateFromString((std::string)params); + // set the number of events in the static Generator variable gTotalNEvents. + // Variable is unset if nEvents exceeds the uint maximum value + if (nEvents <= std::numeric_limits::max()) { + unsigned int castNEvents = static_cast(nEvents); + o2::eventgen::Generator::setTotalNEvents(castNEvents); + } // initialize the service if (vtxmode == o2::conf::VertexMode::kDiamondParam) { genservice->initService(generator, trigger, o2::eventgen::DiamondParamVertexOption()); From 7fd068194adc7e8fe85b815039478a21ca726820 Mon Sep 17 00:00:00 2001 From: wiechula Date: Mon, 13 Jan 2025 12:34:28 +0100 Subject: [PATCH 0051/1914] Add line indicating selected time bin --- Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx index 78f6b4d3e5397..23afc85f94d64 100644 --- a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx +++ b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx @@ -774,6 +774,19 @@ void SimpleEventDisplayGUI::drawPadSignal(int event, int x, int y, TObject* o) if (mCheckShowClusters->IsDown()) { showClusters(roc, row); } + const auto padTimeValsName = fmt::format("PadTimeVals{}", type[type.Length() - 1]); + TCanvas* cPadTimeVals = (TCanvas*)gROOT->GetListOfCanvases()->FindObject(padTimeValsName.data()); + if (cPadTimeVals) { + h = (TH1D*)gROOT->FindObject(("h" + padTimeValsName).data()); + if (h) { + cPadTimeVals->cd(); + delete cPadTimeVals->GetListOfPrimitives()->FindObject("TLine"); + TLine l; + l.SetLineColor(kRed); + const auto timeBin = mSelTimeBin->GetNumberEntry()->GetIntNumber(); + l.DrawLine(timeBin + 0.5, h->GetYaxis()->GetXmin(), timeBin + 0.5, h->GetYaxis()->GetXmax()); + } + } update(Form("%s;%sFFT;PadTimeVals%s;SingleTB", type.Data(), type.Data(), rocType.Data())); } // printf("bin=%03d.%03d(%03d)[%05d], name=%s, ROC=%02d content=%.1f, ev: %d\n",row,pad,cpad,chn,h->GetName(), roc, h->GetBinContent(binx,biny), event); From 7cf775bbce1cc499f4edccc9f177553c06a0f0cd Mon Sep 17 00:00:00 2001 From: wiechula Date: Fri, 31 Jan 2025 15:43:52 +0100 Subject: [PATCH 0052/1914] Implement usage of CCDB Redirect --- Detectors/TPC/base/include/TPCBase/CDBTypes.h | 2 +- .../include/TPCBase/DeadChannelMapCreator.h | 3 ++- .../TPC/base/include/TPCBase/FEEConfig.h | 23 +++++++++++-------- Detectors/TPC/base/src/CDBInterface.cxx | 2 +- .../TPC/base/src/DeadChannelMapCreator.cxx | 18 +++++---------- Detectors/TPC/base/src/FEEConfig.cxx | 3 +++ .../SpacePoints/SpacePointsCalibParam.h | 2 +- .../include/SpacePoints/TrackResiduals.h | 2 +- Detectors/TPC/dcs/src/DCSConfigSpec.cxx | 18 +++++++++++---- GPU/Workflow/src/GPUWorkflowTPC.cxx | 12 ++++++---- 10 files changed, 50 insertions(+), 35 deletions(-) diff --git a/Detectors/TPC/base/include/TPCBase/CDBTypes.h b/Detectors/TPC/base/include/TPCBase/CDBTypes.h index 75278f2a76902..a3c52ecd0928b 100644 --- a/Detectors/TPC/base/include/TPCBase/CDBTypes.h +++ b/Detectors/TPC/base/include/TPCBase/CDBTypes.h @@ -110,7 +110,7 @@ const std::unordered_map CDBTypeMap{ // {CDBType::ConfigFEEPad, "TPC/Config/FEEPad"}, {CDBType::ConfigFEE, "TPC/Config/FEE"}, - {CDBType::ConfigRunInfo, "TPC/Config/RunInfo"}, + {CDBType::ConfigRunInfo, "TPC/Config/RunInfoV2"}, // {CDBType::ParDetector, "TPC/Parameter/Detector"}, {CDBType::ParElectronics, "TPC/Parameter/Electronics"}, diff --git a/Detectors/TPC/base/include/TPCBase/DeadChannelMapCreator.h b/Detectors/TPC/base/include/TPCBase/DeadChannelMapCreator.h index dbdefb4ef0f37..9d4317380f4bc 100644 --- a/Detectors/TPC/base/include/TPCBase/DeadChannelMapCreator.h +++ b/Detectors/TPC/base/include/TPCBase/DeadChannelMapCreator.h @@ -58,10 +58,11 @@ class DeadChannelMapCreator void load(long timeStampOrRun); void loadFEEConfigViaRunInfoTS(long timeStamp); void loadFEEConfigViaRunInfo(long timeStampOrRun); - void loadFEEConfig(long tag, long createdNotAfter = -1); + void loadFEEConfig(long timeStamp = -1); void loadIDCPadFlags(long timeStampOrRun); void setDeadChannelMapIDCPadStatus(const CalDetFlag_t& padStatusMap, PadFlags mask = PadFlags::flagAllNoneGood); + void setDeadChannelMapFEEConfig(const FEEConfig& feeConfig) { mDeadChannelMapFEE = feeConfig.getDeadChannelMap(); } const CalDet& getDeadChannelMapIDC() const { return mDeadChannelMapIDC; } const CalDet& getDeadChannelMapFEE() const { return mDeadChannelMapFEE; } diff --git a/Detectors/TPC/base/include/TPCBase/FEEConfig.h b/Detectors/TPC/base/include/TPCBase/FEEConfig.h index 2065970c42ef5..80962f72e6056 100644 --- a/Detectors/TPC/base/include/TPCBase/FEEConfig.h +++ b/Detectors/TPC/base/include/TPCBase/FEEConfig.h @@ -48,16 +48,19 @@ struct CRUConfig { struct FEEConfig { /// Tag definitions for TPC/Config/FEE enum class Tags : uint8_t { - Unspecified = 0, ///< Unspecified - TestWithZS = 1, ///< Test configuration with ZS - Pedestals = 2, ///< Configuration for Pedestal data taking - Pulser = 3, ///< Configuration for Pulser data taking - Laser = 4, ///< Configuration for Laser data taking - Cosmics = 5, ///< Configuration for Cosmics data taking - Physics35sigma = 6, ///< Physics configuration with 3.5 sigma thresholds - Physics30sigma = 7, ///< Physics configuration with 3.0 sigma thresholds - Physics25sigma = 8, ///< Physics configuration with 2.5 sigma thresholds - Laser10ADCoff = 9, ///< Configuration for Laser data taking with 10ADC offset for special studies + Unspecified = 0, ///< Unspecified + TestWithZS = 1, ///< Test configuration with ZS + Pedestals = 2, ///< Configuration for Pedestal data taking + Pulser = 3, ///< Configuration for Pulser data taking + Laser = 4, ///< Configuration for Laser data taking + Cosmics = 5, ///< Configuration for Cosmics data taking + Physics35sigma = 6, ///< Physics configuration with 3.5 sigma thresholds + Physics30sigma = 7, ///< Physics configuration with 3.0 sigma thresholds + Physics25sigma = 8, ///< Physics configuration with 2.5 sigma thresholds + Laser10ADCoff = 9, ///< Configuration for Laser data taking with 10ADC offset for special studies + TestNoFilters = 10, ///< Test configuration without filters + Physics2025sigma = 11, ///< Physics configuration with 2.0/2.5 sigma thresholds (IROC/OROC) + Physics30sigmaPbPb = 12, ///< Physics configuration with 3.0 sigma thresholds for PbPb (using differnet CMC settings wrt. pp config 7) }; enum class PadConfig { diff --git a/Detectors/TPC/base/src/CDBInterface.cxx b/Detectors/TPC/base/src/CDBInterface.cxx index 357399f98fcf8..605413b205c2a 100644 --- a/Detectors/TPC/base/src/CDBInterface.cxx +++ b/Detectors/TPC/base/src/CDBInterface.cxx @@ -556,7 +556,7 @@ void CDBInterface::createFEEConfig() if (!mUseDefaults) { LOGP(info, "trying to load default FEEConfig"); - mDeadChannelMapCreator.loadFEEConfig(7, 1680525888290); // load oldest physics configuration + mDeadChannelMapCreator.loadFEEConfig(1680525888290); // load oldest physics configuration if (mFEEConfig) { return; } diff --git a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx index 2b16abd676d31..5fd9b59c85282 100644 --- a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx +++ b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx @@ -43,13 +43,7 @@ void DeadChannelMapCreator::loadFEEConfigViaRunInfoTS(long timeStamp) if (mObjectValidity[CDBType::ConfigRunInfo].isValid(timeStamp)) { return; } - const auto meta = mCCDBApi.retrieveHeaders(CDBTypeMap.at(CDBType::ConfigRunInfo), {}, timeStamp); - mObjectValidity[CDBType::ConfigRunInfo].startvalidity = std::stol(meta.at("Valid-From")); - mObjectValidity[CDBType::ConfigRunInfo].endvalidity = std::stol(meta.at("Valid-Until")); - const long tag = std::stol(meta.at("Tag")); - LOGP(info, "Loading FEE config for time stamp {}, via RunInfo with Tag {}, RunType {}, runNumber {}, validity: {} - {}", - timeStamp, tag, meta.at("RunType"), meta.at("runNumber"), meta.at("Valid-From"), meta.at("Valid-Until")); - loadFEEConfig(tag, timeStamp); + loadFEEConfig(timeStamp); } //______________________________________________________________________________ @@ -59,13 +53,13 @@ void DeadChannelMapCreator::loadFEEConfigViaRunInfo(long timeStampOrRun) } //______________________________________________________________________________ -void DeadChannelMapCreator::loadFEEConfig(long tag, long createdNotAfter) +void DeadChannelMapCreator::loadFEEConfig(long timeStamp) { - std::map mm, meta; - const std::string createdNotAfterS = (createdNotAfter < 0) ? "" : std::to_string(createdNotAfter); - mFEEConfig.reset(mCCDBApi.retrieveFromTFileAny(CDBTypeMap.at(CDBType::ConfigFEE), mm, tag, &meta, "", createdNotAfterS)); + std::map meta; + mFEEConfig.reset(mCCDBApi.retrieveFromTFileAny(CDBTypeMap.at(CDBType::ConfigRunInfo), {}, timeStamp, &meta)); + const long tag = std::stol(meta.at("Tag")); if (!mFEEConfig) { - LOGP(error, "Could not load {}/{}, createdNotAfter: {}", CDBTypeMap.at(CDBType::ConfigFEE), tag, createdNotAfterS); + LOGP(error, "Could not load {}/{}, createdNotAfter: {}", CDBTypeMap.at(CDBType::ConfigFEE), tag, timeStamp); return; } LOGP(info, "Using FEE config for Tag {}, ETag {}, Last-Modified {}", meta.at("Valid-From"), meta.at("ETag"), meta.at("Last-Modified")); diff --git a/Detectors/TPC/base/src/FEEConfig.cxx b/Detectors/TPC/base/src/FEEConfig.cxx index 8a4a45ecfa6a4..5a2420b93c2d8 100644 --- a/Detectors/TPC/base/src/FEEConfig.cxx +++ b/Detectors/TPC/base/src/FEEConfig.cxx @@ -57,6 +57,9 @@ const std::unordered_map FEEConfig::TagNames {Tags::Physics30sigma, "Physics30sigma"}, {Tags::Physics25sigma, "Physics25sigma"}, {Tags::Laser10ADCoff, "Laser10ADCoff"}, + {Tags::TestNoFilters, "TestNoFilter"}, + {Tags::Physics2025sigma, "Physics2025sigma"}, + {Tags::Physics30sigmaPbPb, "Physics30sigmaPbPb"}, }; const std::unordered_map FEEConfig::PadConfigNames{ diff --git a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h index 94485642cc425..67226d108bae3 100644 --- a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h +++ b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h @@ -56,7 +56,7 @@ static constexpr int NZ2XBins = 5; ///< number of bins in z/x static constexpr float MaxResid = 20.f; ///< max residual in y and z static constexpr float MaxY = 50.f; ///< max value for y position (sector coordinates) static constexpr float MaxZ = 300.f; ///< max value for z position -static constexpr float MaxTgSlp = 1.f; ///< max value for phi and lambda angles +static constexpr float MaxTgSlp = 1.f; ///< max value for phi (from snp) // miscellaneous static constexpr float sEps = 1e-6f; ///< small number for float comparisons diff --git a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackResiduals.h b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackResiduals.h index 7d82a9e483215..e4d0a3a053728 100644 --- a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackResiduals.h +++ b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackResiduals.h @@ -100,7 +100,7 @@ class TrackResiduals LocalResid(short dyIn, short dzIn, short tgSlpIn, std::array bvoxIn) : dy(dyIn), dz(dzIn), tgSlp(tgSlpIn), bvox(bvoxIn) {} short dy{0}; ///< residual in y, ranges from -param::sMaxResid to +param::sMaxResid short dz{0}; ///< residual in z, ranges from -param::sMaxResid to +param::sMaxResid - short tgSlp{0}; ///< tangens of the phi angle between padrow and track, ranges from -param::sMaxAngle to +param::sMaxAngle + short tgSlp{0}; ///< tangens of the phi angle between padrow and track, ranges from -param::MaxTgSlp to +param::MaxTgSlp std::array bvox{}; ///< voxel identifier: VoxZ, VoxF, VoxX ClassDefNV(LocalResid, 1); }; diff --git a/Detectors/TPC/dcs/src/DCSConfigSpec.cxx b/Detectors/TPC/dcs/src/DCSConfigSpec.cxx index 7f08ee9179c77..dc13d4ed83081 100644 --- a/Detectors/TPC/dcs/src/DCSConfigSpec.cxx +++ b/Detectors/TPC/dcs/src/DCSConfigSpec.cxx @@ -189,18 +189,28 @@ void DCSConfigDevice::updateRunInfo(gsl::span configBuff) LOGP(error, "{} has wrong format: {}, expected: {}, not writing RunInformation to CCDB", RunInfoFileName, line, runInfoConf); return; } + const auto tagString = data[2]; + // + // retrieve ETag from FEEConfig to set up Redirect + const auto headers = mCCDBApi.retrieveHeaders(CDBTypeMap.at(CDBType::ConfigFEE), {}, std::stol(tagString)); + std::map md; md[o2::base::NameConf::CCDBRunTag.data()] = data[0]; - md["Tag"] = data[2]; md["RunType"] = data[3]; md[o2::ccdb::CcdbObjectInfo::AdjustableEOV] = "true"; - char tempChar{static_cast(std::stoi(md["Tag"]))}; + if (headers.find("ETag") != headers.end()) { + auto etag = headers.at("ETag"); + etag.erase(std::remove(etag.begin(), etag.end(), '"'), etag.end()); + md["Redirect"] = fmt::format("/{}/{}/{}", CDBTypeMap.at(CDBType::ConfigFEE), tagString, etag); + } else { + LOGP(error, "No ETag found for Tag {}, not setting Redirect in RunInfo", tagString); + } const long startValRCT = std::stol(data[1]); const long endValRCT = startValRCT + 48l * 60l * 60l * 1000l; if (!mDontWriteRunInfo) { o2::ccdb::CcdbObjectInfo w(CDBTypeMap.at(CDBType::ConfigRunInfo), "", "", md, startValRCT, endValRCT); - mCCDBApi.storeAsBinaryFile(&tempChar, sizeof(tempChar), "tmp.dat", "char", CDBTypeMap.at(CDBType::ConfigRunInfo), md, startValRCT, endValRCT); + mCCDBApi.storeAsBinaryFile(nullptr, 0, "ignored", "", CDBTypeMap.at(CDBType::ConfigRunInfo), md, startValRCT, endValRCT); if (!mCCDBApi.isSnapshotMode()) { o2::ccdb::adjustOverriddenEOV(mCCDBApi, w); } @@ -255,7 +265,7 @@ void DCSConfigDevice::fillFEEPad(std::string_view configFileName, gsl::span(calPad, configBuff); mFEEPadDataReceived.set(3); } else if (configFileName == "CMkValues") { - nLines = cru_calib_helpers::fillCalPad<6>(calPad, configBuff); + nLines = cru_calib_helpers::fillCalPad<0>(calPad, configBuff); mFEEPadDataReceived.set(4); } diff --git a/GPU/Workflow/src/GPUWorkflowTPC.cxx b/GPU/Workflow/src/GPUWorkflowTPC.cxx index f895587b8b020..319d084cbcc6a 100644 --- a/GPU/Workflow/src/GPUWorkflowTPC.cxx +++ b/GPU/Workflow/src/GPUWorkflowTPC.cxx @@ -281,11 +281,15 @@ void GPURecoWorkflowSpec::finaliseCCDBTPC(ConcreteDataMatcher& matcher, void* ob mTPCDeadChannelMapCreator->getDeadChannelMapIDC().getSum(), mTPCDeadChannelMapCreator->getDeadChannelMap().getSum()); } else if (matcher == ConcreteDataMatcher(gDataOriginTPC, "TPCRUNINFO", 0)) { copyCalibsToBuffer(); - mTPCDeadChannelMapCreator->loadFEEConfigViaRunInfoTS(mCreationForCalib); + const auto* fee = static_cast(obj); + mTPCDeadChannelMapCreator->setDeadChannelMapFEEConfig(*fee); mTPCDeadChannelMapCreator->finalizeDeadChannelMap(); mdEdxCalibContainerBufferNew.get()->setDeadChannelMap(mTPCDeadChannelMapCreator->getDeadChannelMap()); - LOGP(info, "Updating dead channel map with the FEE info loaded via TPCRUNINFO for creation time {}: {} / {} dead pads from FEE info / total", - mCreationForCalib, mTPCDeadChannelMapCreator->getDeadChannelMapFEE().getSum(), mTPCDeadChannelMapCreator->getDeadChannelMap().getSum()); + LOGP(info, + "Updating dead channel map with the FEE info (tag {}) loaded via TPCRUNINFO" + " for creation time {}: {} / {} dead pads from FEE info / total, with", + std::underlying_type_t(fee->tag), mCreationForCalib, + mTPCDeadChannelMapCreator->getDeadChannelMapFEE().getSum(), mTPCDeadChannelMapCreator->getDeadChannelMap().getSum()); } else if (mTPCVDriftHelper->accountCCDBInputs(matcher, obj)) { } else if (mCalibObjects.mFastTransformHelper->accountCCDBInputs(matcher, obj)) { } @@ -321,7 +325,7 @@ bool GPURecoWorkflowSpec::fetchCalibsCCDBTPC(ProcessingCon } if (mTPCDeadChannelMapCreator->useSource(tpc::SourcesDeadMap::FEEConfig)) { - pc.inputs().get("tpcruninfo"); + pc.inputs().get("tpcruninfo"); } if (dEdxCalibContainer->isCorrectionCCDB(o2::tpc::CalibsdEdx::CalResidualGainMap)) { From a73c085b99074de97214c91906fa323e1ea3f98c Mon Sep 17 00:00:00 2001 From: czhang Date: Tue, 4 Feb 2025 00:15:12 +0100 Subject: [PATCH 0053/1914] Add more configurables to alignment workflow --- .../MUON/MCH/Align/src/AlignmentSpec.cxx | 56 +++++++++++++++---- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx b/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx index 948ac1bda9117..8a1df9c98bb9b 100644 --- a/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx +++ b/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx @@ -208,11 +208,21 @@ class AlignmentTask auto doEvaluation = ic.options().get("do-evaluation"); mAlign.SetDoEvaluation(doEvaluation); + // Variation range for parameters - mAlign.SetAllowedVariation(0, 2.0); - mAlign.SetAllowedVariation(1, 0.3); - mAlign.SetAllowedVariation(2, 0.002); - mAlign.SetAllowedVariation(3, 2.0); + auto AllowX = ic.options().get("variation-x"); + auto AllowY = ic.options().get("variation-y"); + auto AllowPhi = ic.options().get("variation-phi"); + auto AllowZ = ic.options().get("variation-z"); + mAlign.SetAllowedVariation(0, AllowX); + mAlign.SetAllowedVariation(1, AllowY); + mAlign.SetAllowedVariation(2, AllowPhi); + mAlign.SetAllowedVariation(3, AllowZ); + + // Sigma XY + auto SigmaX = ic.options().get("sigma-x"); + auto SigmaY = ic.options().get("sigma-y"); + mAlign.SetSigmaXY(SigmaX, SigmaY); // Configuration for track fitter const auto& trackerParam = TrackerParam::Instance(); @@ -223,14 +233,28 @@ class AlignmentTask mImproveCutChi2 = 2. * trackerParam.sigmaCutForImprovement * trackerParam.sigmaCutForImprovement; // Fix chambers - auto input_fixchambers = ic.options().get("fix-chamber"); - std::stringstream string_chambers(input_fixchambers); - string_chambers >> std::ws; - while (string_chambers.good()) { - string substr; - std::getline(string_chambers, substr, ','); - LOG(info) << Form("%s%d", "Fixing chamber: ", std::stoi(substr)); - mAlign.FixChamber(std::stoi(substr)); + TString chambersString = ic.options().get("fix-chamber"); + std::unique_ptr objArray(chambersString.Tokenize(",")); + if (objArray->GetEntries() > 0) { + for (int iVar = 0; iVar < objArray->GetEntries(); ++iVar) { + LOG(info) << Form("%s%d", "Fixing chamber: ", std::stoi(objArray->At(iVar)->GetName())); + mAlign.FixChamber(std::stoi(objArray->At(iVar)->GetName())); + } + } + + // Fix DEs + TString DEString = ic.options().get("fix-de"); + TString MaskDEString = ic.options().get("mask-fix-de"); + std::unique_ptr objArrayDE(DEString.Tokenize(",")); + std::unique_ptr objArrayMask(MaskDEString.Tokenize(",")); + if (objArrayDE->GetEntries() > 0) { + if (objArrayDE->GetEntries() != objArrayMask->GetEntries()) { + LOG(fatal) << "Inconsistent size of DEs and Masks!"; + } + for (int iVar = 0; iVar < objArrayDE->GetEntries(); ++iVar) { + LOG(info) << Form("%s%d%s%d", "Fixing DE: ", std::stoi(objArrayDE->At(iVar)->GetName()), " with mask: ", std::stoi(objArrayMask->At(iVar)->GetName())); + mAlign.FixDetElem(std::stoi(objArrayDE->At(iVar)->GetName()), std::stoi(objArrayMask->At(iVar)->GetName())); + } } doMatched = ic.options().get("matched"); @@ -902,6 +926,14 @@ o2::framework::DataProcessorSpec getAlignmentSpec(bool disableCCDB) {"matched", VariantType::Bool, false, {"Switch for using MCH-MID matched tracks"}}, {"fix-chamber", VariantType::String, "", {"Chamber fixing, ex 1,2,3"}}, {"use-record", VariantType::Bool, false, {"Option for directly using record in alignment if provided"}}, + {"variation-x", VariantType::Float, 2.0, {"Allowed variation for x axis in cm"}}, + {"variation-y", VariantType::Float, 0.3, {"Allowed variation for y axis in cm"}}, + {"variation-phi", VariantType::Float, 0.002, {"Allowed variation for phi axis in rad"}}, + {"variation-z", VariantType::Float, 2.0, {"Allowed variation for z axis in cm"}}, + {"sigma-x", VariantType::Float, 1000.0, {"Sigma cut along X"}}, + {"sigma-y", VariantType::Float, 1000.0, {"Sigma cut along Y"}}, + {"fix-de", VariantType::String, "", {"DE fixing, ex 101,1019"}}, + {"mask-fix-de", VariantType::String, "", {"Mask for DE d.o.f fixing, ex 0,2,4"}}, {"output", VariantType::String, "Alignment", {"Option for name of output file"}}}}; } From 0e29a1acd2537dc5456f1977ef4305c0d4648da6 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 5 Feb 2025 14:59:12 +0100 Subject: [PATCH 0054/1914] loadFileToMemory with std::vector instead of o2::pmr::vectot For debugging purposes only --- CCDB/include/CCDB/CcdbApi.h | 5 +++++ CCDB/src/CcdbApi.cxx | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/CCDB/include/CCDB/CcdbApi.h b/CCDB/include/CCDB/CcdbApi.h index 9ba8869fb7de3..5ad56fbd50557 100644 --- a/CCDB/include/CCDB/CcdbApi.h +++ b/CCDB/include/CCDB/CcdbApi.h @@ -348,6 +348,11 @@ class CcdbApi //: public DatabaseInterface TObject* retrieveFromTFile(std::string const& path, std::map const& metadata, long timestamp, std::map* headers, std::string const& etag, const std::string& createdNotAfter, const std::string& createdNotBefore) const; + void loadFileToMemory(std::vector& dest, std::string const& path, + std::map const& metadata, long timestamp, + std::map* headers, std::string const& etag, + const std::string& createdNotAfter, const std::string& createdNotBefore, bool considerSnapshot = true) const; + #if !defined(__CINT__) && !defined(__MAKECINT__) && !defined(__ROOTCLING__) && !defined(__CLING__) typedef struct RequestContext { o2::pmr::vector& dest; diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index 3b622b87e7e7b..f436172de42ff 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -1870,6 +1870,21 @@ void CcdbApi::saveSnapshot(RequestContext& requestContext) const } } +void CcdbApi::loadFileToMemory(std::vector& dest, std::string const& path, + std::map const& metadata, long timestamp, + std::map* headers, std::string const& etag, + const std::string& createdNotAfter, const std::string& createdNotBefore, bool considerSnapshot) const +{ + o2::pmr::vector destP; + destP.reserve(dest.size()); + loadFileToMemory(destP, path, metadata, timestamp, headers, etag, createdNotAfter, createdNotBefore, considerSnapshot); + dest.clear(); + dest.reserve(destP.size()); + for (const auto c : destP) { + dest.push_back(c); + } +} + void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, std::string const& path, std::map const& metadata, long timestamp, std::map* headers, std::string const& etag, From de69487869f849697e7f0cac1cfd36750ccb9f17 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 5 Feb 2025 16:09:20 +0100 Subject: [PATCH 0055/1914] aod-merger treats Zombie files like missing ones --- Framework/AODMerger/src/aodMerger.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Framework/AODMerger/src/aodMerger.cxx b/Framework/AODMerger/src/aodMerger.cxx index f474cb0d37e43..aadf5cd2100c0 100644 --- a/Framework/AODMerger/src/aodMerger.cxx +++ b/Framework/AODMerger/src/aodMerger.cxx @@ -128,8 +128,8 @@ int main(int argc, char* argv[]) printf("Processing input file: %s\n", line.Data()); auto inputFile = TFile::Open(line); - if (!inputFile) { - printf("Error: Could not open input file %s.\n", line.Data()); + if (!inputFile || inputFile->IsZombie()) { + printf("Error: %s input file %s.\n", !inputFile ? "Could not open" : "Zombie", line.Data()); if (skipNonExistingFiles) { continue; } else { From 9a4e70843f364e7736e1748305c5e151d1a7342e Mon Sep 17 00:00:00 2001 From: Francesco Noferini Date: Wed, 5 Feb 2025 11:37:36 +0100 Subject: [PATCH 0056/1914] TPC timeseries requiring ft0 info as mandatory --- .../TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h | 2 +- Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx | 6 ++---- Detectors/TPC/workflow/src/tpc-time-series.cxx | 4 +--- prodtests/full-system-test/calib-workflow.sh | 1 - 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h index 3a61472d4bd8a..d7da0b9acb343 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCTimeSeriesSpec.h @@ -23,7 +23,7 @@ namespace tpc static constexpr header::DataDescription getDataDescriptionTimeSeries() { return header::DataDescription{"TIMESERIES"}; } static constexpr header::DataDescription getDataDescriptionTPCTimeSeriesTFId() { return header::DataDescription{"ITPCTSTFID"}; } -o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, o2::dataformats::GlobalTrackID::mask_t src, bool useft0 = false); +o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, o2::dataformats::GlobalTrackID::mask_t src); } // end namespace tpc } // end namespace o2 diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index b2bf6d824ab86..871fdc00555e8 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -1814,7 +1814,7 @@ class TPCTimeSeries : public Task } }; -o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, GTrackID::mask_t src, bool useft0) +o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, const o2::base::Propagator::MatCorrType matType, const bool enableUnbinnedWriter, GTrackID::mask_t src) { auto dataRequest = std::make_shared(); bool useMC = false; @@ -1823,9 +1823,7 @@ o2::framework::DataProcessorSpec getTPCTimeSeriesSpec(const bool disableWriter, dataRequest->requestTracks(srcTracks, useMC); dataRequest->requestClusters(GTrackID::getSourcesMask("TPC"), useMC); - if (useft0) { - dataRequest->requestFT0RecPoints(false); - } + dataRequest->requestFT0RecPoints(false); bool tpcOnly = srcTracks == GTrackID::getSourcesMask("TPC"); if (!tpcOnly) { diff --git a/Detectors/TPC/workflow/src/tpc-time-series.cxx b/Detectors/TPC/workflow/src/tpc-time-series.cxx index 65345fbfefb10..f7bcf00cb27ea 100644 --- a/Detectors/TPC/workflow/src/tpc-time-series.cxx +++ b/Detectors/TPC/workflow/src/tpc-time-series.cxx @@ -29,7 +29,6 @@ void customize(std::vector& workflowOptions) {"disable-root-output", VariantType::Bool, false, {"disable root-files output writers"}}, {"enable-unbinned-root-output", VariantType::Bool, false, {"writing out unbinned track data"}}, {"track-sources", VariantType::String, std::string{o2::dataformats::GlobalTrackID::ALL}, {"comma-separated list of sources to use"}}, - {"use-ft0", VariantType::Bool, false, {"enable FT0 rec-points"}}, {"material-type", VariantType::Int, 2, {"Type for the material budget during track propagation: 0=None, 1=Geo, 2=LUT"}}}; std::swap(workflowOptions, options); } @@ -44,8 +43,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) const bool enableUnbinnedWriter = config.options().get("enable-unbinned-root-output"); auto src = o2::dataformats::GlobalTrackID::getSourcesMask(config.options().get("track-sources")); auto materialType = static_cast(config.options().get("material-type")); - const bool useft0 = config.options().get("use-ft0"); - workflow.emplace_back(o2::tpc::getTPCTimeSeriesSpec(disableWriter, materialType, enableUnbinnedWriter, src, useft0)); + workflow.emplace_back(o2::tpc::getTPCTimeSeriesSpec(disableWriter, materialType, enableUnbinnedWriter, src)); if (!disableWriter) { workflow.emplace_back(o2::tpc::getTPCTimeSeriesWriterSpec()); } diff --git a/prodtests/full-system-test/calib-workflow.sh b/prodtests/full-system-test/calib-workflow.sh index 16a5209ed3514..3dcd95fd04b54 100755 --- a/prodtests/full-system-test/calib-workflow.sh +++ b/prodtests/full-system-test/calib-workflow.sh @@ -54,7 +54,6 @@ if [[ $CALIB_ASYNC_EXTRACTTPCCURRENTS == 1 ]]; then add_W o2-tpc-integrate-cluster-workflow "${CONFIG_CTPTPC}" fi if [[ $CALIB_ASYNC_EXTRACTTIMESERIES == 1 ]] ; then - CONFIG_TPCTIMESERIES=" --use-ft0" : ${CALIB_ASYNC_SAMPLINGFACTORTIMESERIES:=0.001} if [[ ! -z ${CALIB_ASYNC_ENABLEUNBINNEDTIMESERIES:-} ]]; then CONFIG_TPCTIMESERIES+=" --enable-unbinned-root-output --sample-unbinned-tsallis --threads ${TPCTIMESERIES_THREADS:-1}" From 98febeceb294512cf0908995d5627e1c26efdbd6 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 6 Feb 2025 17:17:18 +0100 Subject: [PATCH 0057/1914] Fix int8/uint8 type in LinPad2Y for GPUTPCCompressionTrackModel --- GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h index b48bce50ff5a6..b67f544f513bf 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h @@ -100,13 +100,13 @@ class GPUTPCCompressionTrackModel GPUd() void getClusterErrors2(int32_t iRow, float z, float sinPhi, float DzDs, float& ErrY2, float& ErrZ2) const; GPUd() void resetCovariance(); - GPUd() float LinearPad2Y(int32_t slice, float pad, float padWidth, int8_t npads) const + GPUd() float LinearPad2Y(int32_t slice, float pad, float padWidth, uint8_t npads) const { const float u = (pad - 0.5f * npads) * padWidth; return (slice >= GPUCA_NSLICES / 2) ? -u : u; } - GPUd() float LinearY2Pad(int32_t slice, float y, float padWidth, int8_t npads) const + GPUd() float LinearY2Pad(int32_t slice, float y, float padWidth, uint8_t npads) const { const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; return u / padWidth + 0.5f * npads; From e9a471802be093b877db2ef1865cb11c2c3b7371 Mon Sep 17 00:00:00 2001 From: wiechula Date: Fri, 7 Feb 2025 08:23:02 +0100 Subject: [PATCH 0058/1914] Remove access of obsolete meta data --- Detectors/TPC/base/src/DeadChannelMapCreator.cxx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx index 5fd9b59c85282..bcbfa8fe68956 100644 --- a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx +++ b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx @@ -57,9 +57,12 @@ void DeadChannelMapCreator::loadFEEConfig(long timeStamp) { std::map meta; mFEEConfig.reset(mCCDBApi.retrieveFromTFileAny(CDBTypeMap.at(CDBType::ConfigRunInfo), {}, timeStamp, &meta)); - const long tag = std::stol(meta.at("Tag")); + std::string redirect = "not found"; + if (meta.find("Redirect") != meta.end()) { + redirect = meta.at("Redirect"); + } if (!mFEEConfig) { - LOGP(error, "Could not load {}/{}, createdNotAfter: {}", CDBTypeMap.at(CDBType::ConfigFEE), tag, timeStamp); + LOGP(error, "Could not load {} redirected from {} with Redirect={}, createdNotAfter: {}", CDBTypeMap.at(CDBType::ConfigFEE), CDBTypeMap.at(CDBType::ConfigRunInfo), redirect, timeStamp); return; } LOGP(info, "Using FEE config for Tag {}, ETag {}, Last-Modified {}", meta.at("Valid-From"), meta.at("ETag"), meta.at("Last-Modified")); From bb29ae9af6df33ea9728c2ccf627df7e1edf59e8 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Sun, 9 Feb 2025 17:13:08 +0100 Subject: [PATCH 0059/1914] ctpdev: lumi per run added to fetcher (#13945) * dev: lumi calcalation in fetcher * clang --- .../include/DataFormatsCTP/CTPRateFetcher.h | 9 +- .../CTP/include/DataFormatsCTP/Scalers.h | 4 + .../Detectors/CTP/src/CTPRateFetcher.cxx | 90 ++++++++++++++++++- DataFormats/Detectors/CTP/src/Scalers.cxx | 72 ++++++++++++++- Detectors/CTP/macro/GetRates.C | 45 ++++++++-- Detectors/CTP/macro/PlotPbLumi.C | 5 ++ Detectors/CTP/macro/TestGetRates.C | 38 +++++--- 7 files changed, 238 insertions(+), 25 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h index d47e53419bdf1..6b7802feb15ad 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h @@ -33,15 +33,18 @@ class CTPRateFetcher void setupRun(int runNumber, o2::ccdb::BasicCCDBManager* ccdb, uint64_t timeStamp, bool initScalers); void updateScalers(ctp::CTPRunScalers& scalers); int getRates(std::array& rates, o2::ccdb::BasicCCDBManager* ccdb, int runNumber, const std::string sourceName); // rates at start,stop and middle of the run - void setOrbit(bool orb) { mOrbit = orb; } - void setOutsideLimits(bool qc) { mOutsideLimits = qc; } + double getLumi(o2::ccdb::BasicCCDBManager* ccdb, int runNumber, const std::string sourceName, int puCorr = 0); // total lumi for a run + double getLumiNoPuCorr(const std::string& classname, int type = 1); + double getLumiWPuCorr(const std::string& classname, int type = 1); + void setOrbit(bool orb) { mOrbit = orb; } // use orbit instead of time + void setOutsideLimits(bool qc) { mOutsideLimits = qc; } // return first/last rate of time outside of run private: double fetchCTPratesInputs(uint64_t timeStamp, int input); double fetchCTPratesClasses(uint64_t timeStamp, const std::string& className, int inputType = 1); double fetchCTPratesInputsNoPuCorr(uint64_t timeStamp, int input); double fetchCTPratesClassesNoPuCorr(uint64_t timeStamp, const std::string& className, int inputType = 1); - + double getLumi(const std::string& classname, int type = 1, int puCorr = 0); double pileUpCorrection(double rate); int mRunNumber = -1; bool mOutsideLimits = 0; diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Scalers.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Scalers.h index c10ac070d4d35..45d54b034f8d9 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Scalers.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Scalers.h @@ -143,6 +143,10 @@ class CTPRunScalers mScalerRecordO2[mScalerRecordO2.size() - 1].scalers[i].l1After - mScalerRecordO2[0].scalers[i].l1After, }; } + /// retrieves integral - same interface as getRate, no pileup correction + uint64_t getLumiNoPuCorr(int classindex, int type) const; + /// retrieves vector of counters - same interface as getRate, needed for + std::vector> getRatesForIndex(int classindex, int type) const; /// retrieves time boundaries of this scaler object from O2 scalers std::pair getTimeLimit() const { diff --git a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx index 28da2033e7b29..43fa9dbe7f3f3 100644 --- a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx +++ b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx @@ -41,7 +41,7 @@ double CTPRateFetcher::fetchNoPuCorr(o2::ccdb::BasicCCDBManager* ccdb, uint64_t if (runNumber < 534202) { return fetchCTPratesClassesNoPuCorr(timeStamp, "minbias_TVX_L0", 3); // 2022 } else { - double_t ret = fetchCTPratesClassesNoPuCorr(timeStamp, "CMTVX-B-NOPF"); + double ret = fetchCTPratesClassesNoPuCorr(timeStamp, "CMTVX-B-NOPF"); if (ret == -2.) { LOG(info) << "Trying different class"; ret = fetchCTPratesClassesNoPuCorr(timeStamp, "CMTVX-NONE"); @@ -77,6 +77,94 @@ int CTPRateFetcher::getRates(std::array& rates, o2::ccdb::BasicCCDBMa rates[2] = rateM; return 0; } +double CTPRateFetcher::getLumiNoPuCorr(const std::string& classname, int type) +{ + if (classname == "zncinp") { + return mScalers.getLumiNoPuCorr(26, 7); + } + std::vector& ctpcls = mConfig.getCTPClasses(); + std::vector clslist = mConfig.getTriggerClassList(); + int classIndex = -1; + for (size_t i = 0; i < clslist.size(); i++) { + if (ctpcls[i].name.find(classname) != std::string::npos) { + classIndex = i; + break; + } + } + if (classIndex == -1) { + LOG(warn) << "Trigger class " << classname << " not found in CTPConfiguration"; + return -1; + } + return mScalers.getLumiNoPuCorr(classIndex, type); +} +double CTPRateFetcher::getLumiWPuCorr(const std::string& classname, int type) +{ + std::vector> scals; + if (classname == "zncinp") { + scals = mScalers.getRatesForIndex(26, 7); + } else { + std::vector& ctpcls = mConfig.getCTPClasses(); + std::vector clslist = mConfig.getTriggerClassList(); + int classIndex = -1; + for (size_t i = 0; i < clslist.size(); i++) { + if (ctpcls[i].name.find(classname) != std::string::npos) { + classIndex = i; + break; + } + } + if (classIndex == -1) { + LOG(warn) << "Trigger class " << classname << " not found in CTPConfiguration"; + return -1; + } + scals = mScalers.getRatesForIndex(classIndex, type); + } + double lumi = 0; + for (auto const& ss : scals) { + // std::cout << ss.first << " " << ss.second << " " << pileUpCorrection(ss.first/ss.second) << std::endl; + lumi += pileUpCorrection(ss.first / ss.second) * ss.second; + } + return lumi; +} +double CTPRateFetcher::getLumi(const std::string& classname, int type, int puCorr) +{ + if (puCorr) { + return getLumiWPuCorr(classname, type); + } else { + return getLumiNoPuCorr(classname, type); + } +} + +double CTPRateFetcher::getLumi(o2::ccdb::BasicCCDBManager* ccdb, int runNumber, const std::string sourceName, int puCorr) +{ + // setupRun(runNumber, ccdb, timeStamp, 1); + if (sourceName.find("ZNC") != std::string::npos) { + if (runNumber < 544448) { + return getLumi("zncinp", 1, puCorr) / (sourceName.find("hadronic") != std::string::npos ? 28. : 1.); + } else { + return getLumi("C1ZNC-B-NOPF-CRU", 6, puCorr) / (sourceName.find("hadronic") != std::string::npos ? 28. : 1.); + } + } else if (sourceName == "T0CE") { + return getLumi("CMTVXTCE-B-NOPF", 1, puCorr); + } else if (sourceName == "T0SC") { + return getLumi("CMTVXTSC-B-NOPF", 1, puCorr); + } else if (sourceName == "T0VTX") { + if (runNumber < 534202) { + return getLumi("minbias_TVX_L0", 3, puCorr); // 2022 + } else { + double ret = getLumi("CMTVX-B-NOPF", 1, puCorr); + if (ret == -1.) { + LOG(info) << "Trying different class"; + ret = getLumi("CMTVX-NONE", 1, puCorr); + if (ret < 0) { + LOG(fatal) << "None of the classes used for lumi found"; + } + } + return ret; + } + } + LOG(error) << "CTP Lumi for " << sourceName << " not available"; + return 0; +} // double CTPRateFetcher::fetchCTPratesClasses(uint64_t timeStamp, const std::string& className, int inputType) { diff --git a/DataFormats/Detectors/CTP/src/Scalers.cxx b/DataFormats/Detectors/CTP/src/Scalers.cxx index f70a035427ade..256722fc1e5ae 100644 --- a/DataFormats/Detectors/CTP/src/Scalers.cxx +++ b/DataFormats/Detectors/CTP/src/Scalers.cxx @@ -657,7 +657,77 @@ void CTPRunScalers::printLMBRateVsT() const } } } - +// +uint64_t CTPRunScalers::getLumiNoPuCorr(int classindex, int type) const +{ + if (type < 7) { + const auto s0 = mScalerRecordO2[0].scalers[classindex]; + const auto s1 = mScalerRecordO2[mScalerRecordO2.size() - 1].scalers[classindex]; + switch (type) { + case 1: + return (s1.lmBefore - s0.lmBefore); + case 2: + return (s1.lmAfter - s0.lmAfter); + case 3: + return (s1.l0Before - s0.l0Before); + case 4: + return (s1.l0After - s0.l0After); + case 5: + return (s1.l1Before - s0.l1Before); + case 6: + return (s1.l1After - s0.l1After); + default: + LOG(error) << "Wrong type:" << type; + return -1; // wrong type + } + } else if (type == 7) { + auto s0 = mScalerRecordO2[0].scalersInps[classindex]; // type CTPScalerO2* + auto s1 = mScalerRecordO2[mScalerRecordO2.size() - 1].scalersInps[classindex]; + return (s1 - s0); + } else { + LOG(error) << "Wrong type:" << type; + return -1; // wrong type + } +}; +// +std::vector> CTPRunScalers::getRatesForIndex(int classindex, int type) const +{ + std::vector> scals; + for (int i = 0; i < mScalerRecordO2.size() - 1; i++) { + double_t diff = 0; + // double_t timeDiff = mScalerRecordO2[i + 1].epochTime - mScalerRecordO2[i].epochTime; + double_t timeDiff = (mScalerRecordO2[i + 1].intRecord.orbit - mScalerRecordO2[i].intRecord.orbit) * o2::constants::lhc::LHCOrbitMUS / 1.e6; + if (type < 7) { + const auto s0 = mScalerRecordO2[i].scalers[classindex]; + const auto s1 = mScalerRecordO2[i + 1].scalers[classindex]; + if (type == 1) { + diff = s1.lmBefore - s0.lmBefore; + } else if (type == 2) { + diff = s1.lmAfter - s0.lmAfter; + } else if (type == 3) { + diff = s1.l0Before - s0.l0Before; + } else if (type == 4) { + diff = s1.l0After - s0.l0After; + } else if (type == 5) { + diff = s1.l1Before - s0.l1Before; + } else if (type == 6) { + diff = s1.l1After - s0.l1After; + } else { + LOG(error) << "Wrong type:" << type; + return scals; // wrong type + } + } else if (type == 7) { + auto s0 = mScalerRecordO2[i].scalersInps[classindex]; // type CTPScalerO2* + auto s1 = mScalerRecordO2[i + 1].scalersInps[classindex]; + diff = s1 - s0; + } else { + LOG(error) << "Wrong type:" << type; + return scals; // wrong type + } + scals.emplace_back(std::pair{diff, timeDiff}); + } + return scals; +}; // returns the pair of global (levelled) interaction rate, as well as instantaneous interpolated // rate in Hz at a certain orbit number within the run // type - 7 : inputs diff --git a/Detectors/CTP/macro/GetRates.C b/Detectors/CTP/macro/GetRates.C index d2b65d821114a..8894d7935b99e 100644 --- a/Detectors/CTP/macro/GetRates.C +++ b/Detectors/CTP/macro/GetRates.C @@ -12,17 +12,44 @@ #if !defined(__CLING__) || defined(__ROOTCLING__) #include #include -#include +#include "CTPWorkflowScalers/ctpCCDBManager.h" +#include "Framework/Logger.h" #endif using namespace o2::ctp; -void TestFetcher(int runNumber = 535087) +void GetRates(int run = 559617) { - auto& ccdb = o2::ccdb::BasicCCDBManager::instance(); - std::pair pp = ccdb.getRunDuration(runNumber); - long ts = pp.first + 60; - std::cout << "Run duration:" << pp.first << " " << pp.second << std::endl; - // Opening run - CTPRateFetcher fetcher; - fetcher.setupRun(runNumber, &ccdb, ts, 1); + uint64_t inputmaskCum = 0, classmackCum = 0; + int ntrigSel = 0; + + auto& cmb = o2::ccdb::BasicCCDBManager::instance(); + auto ctpcfg = cmb.getSpecificForRun("CTP/Config/Config", run); + if (!ctpcfg) { + LOGP(error, "Can not get config for run {}", run); + return; + } + CTPConfiguration ctpconfig; + ctpconfig.loadConfigurationRun3(ctpcfg->getConfigString()); + ctpconfig.printStream(std::cout); + auto& triggerclasses = ctpconfig.getCTPClasses(); + LOGP(info, "Found {} trigger classes", triggerclasses.size()); + int indexInList = 0; + for (const auto& trgclass : triggerclasses) { + uint64_t inputmask = 0; + if (trgclass.descriptor != nullptr) { + inputmask = trgclass.descriptor->getInputsMask(); + // LOGP(info, "inputmask: {:#x}", inputmask); + } + trgclass.printStream(std::cout); + // std::cout << indexInList << ": " << trgclass.name << ", input mask 0x" << std::hex << inputmask << ", class mask 0x" << trgclass.classMask << std::dec << std::endl; + indexInList++; + if (trgclass.cluster->getClusterDetNames().find("TRD") != std::string::npos || trgclass.cluster->getClusterDetNames().find("trd") != std::string::npos) { + LOGP(info, "Found TRD trigger cluster, class mask: {:#x}, input mask: {:#x}", trgclass.classMask, inputmask); + inputmaskCum |= inputmask; + classmackCum |= trgclass.classMask; + ntrigSel++; + } + } + + LOGP(info, "Found {} triggers with TRD: classMasks: {:#x} inputMasks: {:#x}", ntrigSel, classmackCum, inputmaskCum); } diff --git a/Detectors/CTP/macro/PlotPbLumi.C b/Detectors/CTP/macro/PlotPbLumi.C index 6ffa1dd4cee2b..04666d5bd1cf6 100644 --- a/Detectors/CTP/macro/PlotPbLumi.C +++ b/Detectors/CTP/macro/PlotPbLumi.C @@ -20,6 +20,11 @@ #include "CCDB/BasicCCDBManager.h" #include "DataFormatsCTP/Scalers.h" #include "DataFormatsCTP/Configuration.h" +#include "DataFormatsParameters/GRPLHCIFData.h" +#include "TGraph.h" +#include "TMath.h" +#include "TCanvas.h" +#include "TStyle.h" #include #include #include diff --git a/Detectors/CTP/macro/TestGetRates.C b/Detectors/CTP/macro/TestGetRates.C index 47790426d66c7..19644853c568b 100644 --- a/Detectors/CTP/macro/TestGetRates.C +++ b/Detectors/CTP/macro/TestGetRates.C @@ -16,17 +16,33 @@ #endif using namespace o2::ctp; -void TestGetRates(int runNumber = 557251) +void TestGetRates(int runN = 0) { + std::vector runs; + std::vector codes = {"T0VTX", "T0VTX", "ZNChadronic", "ZNChadronic", "T0VTX"}; + if (runN == 0) { + runs = {529066, 539218, 544013, 544518, 557251}; + } else { + runs.push_back(runN); + } auto& ccdb = o2::ccdb::BasicCCDBManager::instance(); - // Opening run - std::pair pp = ccdb.getRunDuration(runNumber); - long ts = pp.first + 60; - std::cout << "Run duration:" << pp.first << " " << pp.second << std::endl; - CTPRateFetcher fetcher; - fetcher.setupRun(runNumber, &ccdb, ts, 1); - fetcher.setOrbit(1); - std::array rates; - fetcher.getRates(rates, &ccdb, runNumber, "T0VTX"); - std::cout << "Start:" << rates[0] << " End:" << rates[1] << " Middle:" << rates[2] << std::endl; + int i = 0; + for (auto const& runNumber : runs) { + // Opening run + std::pair pp = ccdb.getRunDuration(runNumber); + long ts = pp.first + 60; + // std::cout << "Run duration:" << pp.first << " " << pp.second << std::endl; + std::cout << "===> RUN:" << runNumber << " duration:" << (pp.second - pp.first) / 1000. << std::endl; + + CTPRateFetcher fetcher; + fetcher.setupRun(runNumber, &ccdb, ts, 1); + fetcher.setOrbit(1); + std::array rates; + fetcher.getRates(rates, &ccdb, runNumber, codes[i]); + std::cout << "Start:" << rates[0] << " End:" << rates[1] << " Middle:" << rates[2] << " code:" << codes[i] << std::endl; + double lumi1 = fetcher.getLumi(&ccdb, runNumber, codes[i], 0); + double lumi2 = fetcher.getLumi(&ccdb, runNumber, codes[i], 1); + std::cout << " Lumi NO pile up corr:" << lumi1 << " Lumi with pile upcorr:" << lumi2 << " code:" << codes[i] << std::endl; + i++; + } } From 84ba04a5149a84cad01421a6d8d8e7e55f60877f Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 9 Feb 2025 19:32:39 +0100 Subject: [PATCH 0060/1914] store distance of the innermost pad from the sector edge --- .../include/GlobalTrackingStudy/TrackInfoExt.h | 3 ++- .../study/include/GlobalTrackingStudy/V0Ext.h | 4 +++- .../GlobalTrackingWorkflow/study/src/SVStudy.cxx | 13 +++++++++++++ .../study/src/TrackingStudy.cxx | 5 +++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h index 6fd06e5265946..935e57873bbd9 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h @@ -45,6 +45,7 @@ struct TrackInfoExt { uint8_t pattITS = 0; uint8_t nClITS = 0; uint8_t rowMinTPC = 0; + uint8_t padFromEdge = -1; uint8_t rowMaxTPC = 0; uint8_t rowCountTPC = 0; @@ -55,7 +56,7 @@ struct TrackInfoExt { float getTPCInY0() const { return innerTPCPos0[1]; } float getTPCInZ0() const { return innerTPCPos0[2]; } - ClassDefNV(TrackInfoExt, 4); + ClassDefNV(TrackInfoExt, 5); }; } // namespace dataformats diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/V0Ext.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/V0Ext.h index 79221b893882d..b1a9f6923f04d 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/V0Ext.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/V0Ext.h @@ -26,11 +26,13 @@ struct ProngInfoExt { int nClITS = 0; int pattITS = 0; float chi2ITSTPC = 0.f; + uint8_t lowestRow = -1; + uint8_t padFromEdge = -1; int8_t corrGlo = -1; int8_t corrITSTPC = -1; int8_t corrITS = -1; int8_t corrTPC = -1; - ClassDefNV(ProngInfoExt, 2); + ClassDefNV(ProngInfoExt, 3); }; struct V0Ext { diff --git a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx index 12a883ec991f6..d4d8eaf92e0d6 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx @@ -226,6 +226,7 @@ o2::dataformats::V0Ext SVStudySpec::processV0(int iv, o2::globaltracking::RecoCo v0ext.v0 = v0sel; } v0ext.v0ID = v0id; + const auto clRefs = recoData.getTPCTracksClusterRefs(); o2::MCCompLabel lb[2]; const o2::MCTrack* mcTrks[2]; for (int ip = 0; ip < 2; ip++) { @@ -245,6 +246,18 @@ o2::dataformats::V0Ext SVStudySpec::processV0(int iv, o2::globaltracking::RecoCo if (lb[ip].isValid()) { prInfo.corrTPC = !lb[ip].isFake(); } + if (mParam && mUseTPCCl) { + uint8_t clSect = 0, clRow = 0; + uint32_t clIdx = 0; + tpcTr.getClusterReference(clRefs, tpcTr.getNClusterReferences() - 1, clSect, clRow, clIdx); + const auto& clus = recoData.getTPCClusters().clusters[clSect][clRow][clIdx]; + prInfo.lowestRow = clRow; + int npads = mParam->tpcGeometry.NPads(clRow); + prInfo.padFromEdge = uint8_t(clus.getPad()); + if (prInfo.padFromEdge > npads / 2) { + prInfo.padFromEdge = npads - 1 - prInfo.padFromEdge; + } + } } // get ITS tracks, if any if (gid.includesDet(DetID::ITS)) { diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index 8df1e980ecb8a..36530bfe9238b 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -300,6 +300,11 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) trc.getClusterReference(clRefs, trc.getNClusterReferences() - 1, clSect, clRow, clIdx); trExt.rowMinTPC = clRow; const auto& clus = tpcClusAcc.clusters[clSect][clRow][clIdx]; + trExt.padFromEdge = uint8_t(clus.getPad()); + int npads = mTPCRefitter->getParam()->tpcGeometry.NPads(clRow); + if (trExt.padFromEdge > npads / 2) { + trExt.padFromEdge = npads - 1 - trExt.padFromEdge; + } this->mTPCCorrMapsLoader.Transform(clSect, clRow, clus.getPad(), clus.getTime(), trExt.innerTPCPos0[0], trExt.innerTPCPos0[1], trExt.innerTPCPos0[2], trc.getTime0()); // nominal time of the track if (timestampTB > -1e8) { this->mTPCCorrMapsLoader.Transform(clSect, clRow, clus.getPad(), clus.getTime(), trExt.innerTPCPos[0], trExt.innerTPCPos[1], trExt.innerTPCPos[2], timestampTB); // time assigned from the global track track From ef43f9f913ee4cd6e472a7d5f198eedc6ab2ff0c Mon Sep 17 00:00:00 2001 From: pillot Date: Tue, 11 Feb 2025 10:57:16 +0100 Subject: [PATCH 0061/1914] new option to print the status map content (#13951) --- Detectors/MUON/MCH/Status/CMakeLists.txt | 1 + .../Status/src/statusmap-to-rejectlist.cxx | 35 ++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/Detectors/MUON/MCH/Status/CMakeLists.txt b/Detectors/MUON/MCH/Status/CMakeLists.txt index 02fd87c1e6e52..e664e92d4c05b 100644 --- a/Detectors/MUON/MCH/Status/CMakeLists.txt +++ b/Detectors/MUON/MCH/Status/CMakeLists.txt @@ -47,6 +47,7 @@ o2_add_executable( O2::DataFormatsMCH O2::Framework O2::MCHGlobalMapping + O2::MCHMappingImpl4 O2::MCHStatus ) diff --git a/Detectors/MUON/MCH/Status/src/statusmap-to-rejectlist.cxx b/Detectors/MUON/MCH/Status/src/statusmap-to-rejectlist.cxx index c50d7022ad1b6..b7d5d93676ff0 100644 --- a/Detectors/MUON/MCH/Status/src/statusmap-to-rejectlist.cxx +++ b/Detectors/MUON/MCH/Status/src/statusmap-to-rejectlist.cxx @@ -115,6 +115,35 @@ void printContent(const std::string inFile, const uint32_t mask) dataFile->Close(); } +//____________________________________________________________________________________ +void dumpContent(const std::string inFile, const size_t iTF, const uint32_t mask) +{ + /// print the content of the status map of the given TF with the given mask + + auto [dataFile, dataReader] = loadData(inFile); + TTreeReaderValue statusMap(*dataReader, "statusmaps"); + + if (dataReader->SetEntry(iTF) != TTreeReader::kEntryValid) { + LOGP(error, "invalid TF index {} (number of TFs = {})", iTF, dataReader->GetEntries()); + exit(3); + } + + LOGP(info, "status map content for TF {} with statusMask=0x{:x}:", iTF, mask); + + for (const auto& status : *statusMap) { + if ((mask & status.second) != 0) { + auto channel = status.first; + if (!channel.isValid()) { + LOGP(error, "invalid channel with status {}", status.second); + } else { + LOGP(info, "{} status {}", asString(channel), status.second); + } + } + } + + dataFile->Close(); +} + //____________________________________________________________________________________ BadChannelsVector statusMap2RejectList(const std::string inFile, const size_t iTF, const uint32_t mask) { @@ -174,6 +203,7 @@ int main(int argc, char** argv) size_t iTF; uint32_t mask; bool print; + bool dump; auto tnow = std::chrono::system_clock::now().time_since_epoch(); using namespace std::chrono_literals; @@ -193,6 +223,7 @@ int main(int argc, char** argv) ("tf,i", po::value(&iTF)->default_value(0), "index of the TF to process") ("mask,m", po::value(&mask)->default_value(defaultMask), "mask to apply to the statusMap to produce the RejectList") ("print,p",po::bool_switch(&print),"print the content of the input file without processing it") + ("dump,d",po::bool_switch(&dump),"dump the raw content of the input file without processing it") ; // clang-format on @@ -214,7 +245,9 @@ int main(int argc, char** argv) exit(1); } - if (print) { + if (dump) { + dumpContent(inFile, iTF, mask); + } else if (print) { printContent(inFile, mask); } else { auto bv = statusMap2RejectList(inFile, iTF, mask); From 3c58a98707841ffe46ea60f38e3165387b849d1c Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 11 Feb 2025 10:32:31 +0100 Subject: [PATCH 0062/1914] Fix storing TPC occupancy in the SVStudy output --- Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx index d4d8eaf92e0d6..c28048a1f9503 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx @@ -194,6 +194,8 @@ void SVStudySpec::updateTimeDependentParams(ProcessingContext& pc) // for occupancy estimator mParam = o2::gpu::GPUO2InterfaceUtils::getFullParamShared(0.f, mNHBPerTF); } + auto& elParam = o2::tpc::ParameterElectronics::Instance(); + mTPCTBinMUSInv = 1. / elParam.ZbinWidth; // 1./TPC bin in microseconds } mBz = o2::base::Propagator::Instance()->getNominalBz(); mFitterV0.setBz(mBz); @@ -353,8 +355,8 @@ void SVStudySpec::process(o2::globaltracking::RecoContainer& recoData) << "pv=" << pv << "\n"; } - tfID++; } + tfID++; } bool SVStudySpec::refitV0(const V0ID& id, o2::dataformats::V0& v0, o2::globaltracking::RecoContainer& recoData) From 867d8c2ce1ac3a3bb290cb97dde2078bbb89539c Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Tue, 11 Feb 2025 14:02:44 +0200 Subject: [PATCH 0063/1914] [FOCAL-100] Porting the beam pipe for run 4 (around FOCAL) (#13772) * Porting the beam pipe for run 4 (around FOCAL) --- Detectors/Passive/CMakeLists.txt | 2 + .../include/DetectorsPassive/PipeRun4.h | 63 + Detectors/Passive/src/PassiveLinkDef.h | 1 + Detectors/Passive/src/PipeRun4.cxx | 3190 +++++++++++++++++ macro/build_geometry.C | 5 +- 5 files changed, 3259 insertions(+), 2 deletions(-) create mode 100644 Detectors/Passive/include/DetectorsPassive/PipeRun4.h create mode 100644 Detectors/Passive/src/PipeRun4.cxx diff --git a/Detectors/Passive/CMakeLists.txt b/Detectors/Passive/CMakeLists.txt index 95bb39118cb20..0976530bc6571 100644 --- a/Detectors/Passive/CMakeLists.txt +++ b/Detectors/Passive/CMakeLists.txt @@ -15,6 +15,7 @@ o2_add_library(DetectorsPassive src/Dipole.cxx src/Compensator.cxx src/Pipe.cxx + src/PipeRun4.cxx src/Magnet.cxx src/PassiveContFact.cxx src/FrameStructure.cxx @@ -32,6 +33,7 @@ o2_target_root_dictionary(DetectorsPassive include/DetectorsPassive/Magnet.h include/DetectorsPassive/PassiveContFact.h include/DetectorsPassive/Pipe.h + include/DetectorsPassive/PipeRun4.h include/DetectorsPassive/FrameStructure.h include/DetectorsPassive/Shil.h include/DetectorsPassive/Hall.h diff --git a/Detectors/Passive/include/DetectorsPassive/PipeRun4.h b/Detectors/Passive/include/DetectorsPassive/PipeRun4.h new file mode 100644 index 0000000000000..1943bb25a802f --- /dev/null +++ b/Detectors/Passive/include/DetectorsPassive/PipeRun4.h @@ -0,0 +1,63 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_PASSIVE_PIPERUN4_H +#define ALICEO2_PASSIVE_PIPERUN4_H + +#include "DetectorsPassive/PassiveBase.h" +#include "Rtypes.h" // for PipeRun4::Class, ClassDef, PipeRun4::Streamer + +class TGeoPcon; + +namespace o2 +{ +namespace passive +{ +class PipeRun4 : public PassiveBase +{ + public: + PipeRun4(const char* name, const char* Title = "Alice Pipe", float rho = 0.f, float thick = 0.f); + PipeRun4(); + + ~PipeRun4() override; + void ConstructGeometry() override; + + /// Clone this object (used in MT mode only) + FairModule* CloneModule() const override; + + float getRmin() const { return mBePipeRmax - mBePipeThick; } + float getRmax() const { return mBePipeRmax; } + float getWidth() const { return mBePipeThick; } + float getDz() const { return mIpHLength; } + + private: + void createMaterials(); + PipeRun4(const PipeRun4& orig); + PipeRun4& operator=(const PipeRun4&); + + TGeoPcon* makeMotherFromTemplate(const TGeoPcon* shape, int imin = -1, int imax = -1, float r0 = 0., + int nz = -1); + TGeoPcon* makeInsulationFromTemplate(TGeoPcon* shape); + TGeoVolume* makeBellow(const char* ext, int nc, float rMin, float rMax, float dU, float rPlie, + float dPlie); + TGeoVolume* makeBellowCside(const char* ext, int nc, float rMin, float rMax, float rPlie, float dPlie); + + TGeoVolume* makeSupportBar(const char* tag, float Rin, float Rout, float length, float skinLength); + + float mBePipeRmax = 0.; // outer diameter of the Be section + float mBePipeThick = 0.; // Be section thickness + float mIpHLength = 0.; // half length of the beampipe around the IP // FixMe: up to now, hardcoded to 57.25cm + + ClassDefOverride(PipeRun4, 1); +}; +} // namespace passive +} // namespace o2 +#endif // ALICEO2_PASSIVE_PIPERUN4_H diff --git a/Detectors/Passive/src/PassiveLinkDef.h b/Detectors/Passive/src/PassiveLinkDef.h index 4ecb54ec2cb34..b1d674519bf2e 100644 --- a/Detectors/Passive/src/PassiveLinkDef.h +++ b/Detectors/Passive/src/PassiveLinkDef.h @@ -35,6 +35,7 @@ #pragma link C++ class o2::passive::Cave + ; #pragma link C++ class o2::passive::PassiveContFact + ; #pragma link C++ class o2::passive::Pipe + ; +#pragma link C++ class o2::passive::PipeRun4 + ; #pragma link C++ class o2::passive::FrameStructure + ; #pragma link C++ class o2::passive::Shil + ; #pragma link C++ class o2::passive::Hall + ; diff --git a/Detectors/Passive/src/PipeRun4.cxx b/Detectors/Passive/src/PipeRun4.cxx new file mode 100644 index 0000000000000..7a2ff6dcfe90b --- /dev/null +++ b/Detectors/Passive/src/PipeRun4.cxx @@ -0,0 +1,3190 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "DetectorsPassive/PipeRun4.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "TGeoManager.h" // for TGeoManager, gGeoManager +#include "TGeoMaterial.h" // for TGeoMaterial +#include "TGeoMedium.h" // for TGeoMedium +#include "TGeoVolume.h" // for TGeoVolume +#include // for TGeoTrap +#include // for TGeoTrap +// force availability of assert +#ifdef NDEBUG +#undef NDEBUG +#endif +#include + +//------------------------------------------------------------------------- +// Beam pipe class for ALICE ITS3 & FOCAL upgrade +// Imported from Pipe class +// Original Authors: +// F. Manso +// A. Morsch +// R. Tieulent +// M. Sitta +//------------------------------------------------------------------------- + +using namespace o2::passive; + +PipeRun4::~PipeRun4() = default; +PipeRun4::PipeRun4() : PassiveBase("PIPE", "") {} +PipeRun4::PipeRun4(const char* name, const char* title, float rho, float thick) + : PassiveBase(name, title), mBePipeRmax(rho), mBePipeThick(thick) +{ +} +PipeRun4::PipeRun4(const PipeRun4& rhs) = default; + +PipeRun4& PipeRun4::operator=(const PipeRun4& rhs) +{ + // self assignment + if (this == &rhs) { + return *this; + } + + // base class assignment + PassiveBase::operator=(rhs); + + return *this; +} + +void PipeRun4::ConstructGeometry() +{ + createMaterials(); + // + // Class describing the beam pipe geometry + // + float z, zsh, z0; + // + // Rotation Matrices + // + const float kDegRad = TMath::Pi() / 180.; + // Rotation by 180 deg + TGeoRotation* rot180 = new TGeoRotation("rot180", 90., 180., 90., 90., 180., 0.); + TGeoRotation* rotyz = new TGeoRotation("rotyz", 90., 180., 0., 180., 90., 90.); + TGeoRotation* rotxz = new TGeoRotation("rotxz", 0., 0., 90., 90., 90., 180.); + // + + // Media + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* kMedAir = matmgr.getTGeoMedium("PIPE_AIR"); + const TGeoMedium* kMedAirNF = matmgr.getTGeoMedium("PIPE_AIR_NF"); + const TGeoMedium* kMedAirHigh = matmgr.getTGeoMedium("PIPE_AIR_HIGH"); + + const TGeoMedium* kMedVac = matmgr.getTGeoMedium("PIPE_VACUUM"); + const TGeoMedium* kMedVacNF = matmgr.getTGeoMedium("PIPE_VACUUM_NF"); + const TGeoMedium* kMedVacHC = matmgr.getTGeoMedium("PIPE_VACUUM_HC"); + const TGeoMedium* kMedVacNFHC = matmgr.getTGeoMedium("PIPE_VACUUM_NFHC"); + + const TGeoMedium* kMedInsu = matmgr.getTGeoMedium("PIPE_INS_C0"); + + const TGeoMedium* kMedSteel = matmgr.getTGeoMedium("PIPE_INOX"); + const TGeoMedium* kMedSteelNF = matmgr.getTGeoMedium("PIPE_INOX_NF"); + const TGeoMedium* kMedSteelHC = matmgr.getTGeoMedium("PIPE_INOX_HC"); + const TGeoMedium* kMedSteelNFHC = matmgr.getTGeoMedium("PIPE_INOX_NFHC"); + + const TGeoMedium* kMedBe = matmgr.getTGeoMedium("PIPE_BE"); + + const TGeoMedium* kMedCu = matmgr.getTGeoMedium("PIPE_CU"); + const TGeoMedium* kMedCuNF = matmgr.getTGeoMedium("PIPE_CU_NF"); + const TGeoMedium* kMedCuHC = matmgr.getTGeoMedium("PIPE_CU_HC"); + const TGeoMedium* kMedCuNFHC = matmgr.getTGeoMedium("PIPE_CU_NFHC"); + + const TGeoMedium* kMedAlu2219 = matmgr.getTGeoMedium("PIPE_AA2219"); + const TGeoMedium* kMedRohacell = matmgr.getTGeoMedium("PIPE_ROHACELL"); + const TGeoMedium* kMedPolyimide = matmgr.getTGeoMedium("PIPE_POLYIMIDE"); + const TGeoMedium* kMedAlBe = matmgr.getTGeoMedium("PIPE_AlBe"); + const TGeoMedium* kMedCarbonFiber = matmgr.getTGeoMedium("PIPE_M55J6K"); + const TGeoMedium* kMedTitanium = matmgr.getTGeoMedium("PIPE_TITANIUM"); + const TGeoMedium* kMedAlu7075 = matmgr.getTGeoMedium("PIPE_AA7075"); + + // Top volume + TGeoVolume* top = gGeoManager->GetVolume("cave"); + TGeoVolume* barrel = gGeoManager->GetVolume("barrel"); + TGeoVolume* caveRB24 = gGeoManager->GetVolume("caveRB24"); + // + // + //////////////////////////////////////////////////////////////////////////////// + // // + // The Central Vacuum system // + // // + //////////////////////////////////////////////////////////////////////////////// + // + // + // The ALICE central beam-pipe according to drawing LHCVC2C_0001 + // Drawings of sub-elements: + // + // Pos 7 - Minimised Flange: LHCVFX_P0025 + // Pos 6 - Standard Flange: STDVFUHV0009 + // Pos 8 - Bellow: LHCVBX__0001 + // + // Absolute z-coordinates -82.0 - 400.0 cm + // Total length: 482.0 cm + // It consists of 3 main parts: + // CP/2 The flange on the non-absorber side: 36.5 cm + // CP/1 The central Be pipe: 405.0 cm + // CP/3 The double-bellow and flange on the absorber side: 40.5 cm + + // + /* + // Starting position in z + const float kCPz0 = -400.0; + // Length of the CP/1 section + const float kCP1Length = 405.0; + // Length of the CP/2 section + const float kCP2Length = 36.5; + // Length of the CP/3 section + const float kCP3Length = 40.5; + // Position of the CP/2 section + // const float kCP2pos = kCPz0 + kCP2Length / 2.; + // Position of the CP/3 section + const float kCP3pos = kCPz0 + kCP2Length + kCP1Length + kCP3Length/2.; + */ + + //////////////////// NEW BEAM PIPE GEOMETRY FOR MuonForwardTracker //////////////////////// + // Authors: F. Manso, R. Tieulent + // Drawings from C. Gargiulo : + // \\cern.ch\dfs\Workspaces\c\cgargiul\EXPERIMENT\ALICE\ALICE_MECHANICS\ALICE_DATA_PACKAGE\IN\DETECTORS\ITS_UPGRADE\1-DESIGN\3D_cad_model\R14_20140311_ALI\ + // + // + // central beam pipe + //------------------- Pipe version 4.7 March 2014 ----------------------------- + TGeoVolumeAssembly* beamPipeCsideSection = new TGeoVolumeAssembly("BeamPipeCsideSection"); + // If user set Rmax=0/Thick=0 use defaults, else use user input + const float kBeryliumSectionOuterRadius = (mBePipeRmax > 0.) ? mBePipeRmax : 1.65; + const float kBeryliumSectionThickness = (mBePipeThick > 0.) ? mBePipeThick : 0.05; + float kBeryliumSectionZmax = 25.0; + float kBeryliumSectionZmin = -25.0; + + const float kBellowSectionOuterRadius = 2.15; + const float kCSideBPSOuterRadius = 2.22; + const float kCSideBPSWallThickness = 0.15; + const float kBellowSectionZmax = -55.35; + const float kBellowOuterRadius = 2.8; + const float kFirstConeAngle = 15. * TMath::DegToRad(); + const float kChangeThicknessAngle = 45. * TMath::DegToRad(); + const float kCSideBPSLength = 3.53; + const float kDzFirstCone = (kCSideBPSOuterRadius - kBeryliumSectionOuterRadius) / TMath::Tan(kFirstConeAngle); + const float kReduceThicknessPartAfterBPSLength = 1.52; + const float kThinPartBeforeBellowLength = 1.025; + + const float kDistanceBetweenBellows = 2.5; + + const float kAdaptConeZmax = -77.43; + const float kAdaptConeZmin = -80.6; + const float kAdaptConeRmax = 3.0; + const float kFlangeRmax = 4.3; + const float kFlangeLength = 1.4; + + const float kBellowPlieRadius = 0.17; // radius of bellow plies + const float kBellowPlieThickness = 0.03; // Thickness of bellow plies 300 microns + const int kNBellowConvolutions = 7; + + const float kZ1 = kBeryliumSectionZmin; // z of Be - Al jonction on the C-side + const float kZ2 = kBellowSectionZmax + kDzFirstCone; // z of end of small diameter part (beginning of first cone before the bellow + const float kZ3 = kBellowSectionZmax + (kCSideBPSOuterRadius - kBellowSectionOuterRadius) / TMath::Tan(kFirstConeAngle); // z of End of first cone part with 0.8mm thickness + const float kZ4 = kBellowSectionZmax; // z of End of first Cone + const float kZ5 = kBellowSectionZmax - kCSideBPSLength; // z of End of Beam Pipe support section + const float kZ6 = kBellowSectionZmax - kCSideBPSLength - (kCSideBPSOuterRadius - kBellowSectionOuterRadius) / TMath::Tan(kChangeThicknessAngle); // z of End of Beam Pipe support section after reduction of thickness + const float kZ7 = kZ6 - kReduceThicknessPartAfterBPSLength; // Z of end of 800 microns section after Beam Pipe Support + const float kZ8 = kZ7 - (kBeryliumSectionThickness - kBellowPlieThickness) / TMath::Tan(kChangeThicknessAngle); + const float kZ9 = kZ7 - kThinPartBeforeBellowLength; // Z of the start of first bellow + const float kFirstBellowZmax = kZ9; + + //---------------- Be pipe around the IP ---------- + TGeoTube* berylliumTube = + new TGeoTube("IP_PIPEsh", kBeryliumSectionOuterRadius - kBeryliumSectionThickness, kBeryliumSectionOuterRadius, + (kBeryliumSectionZmax - kBeryliumSectionZmin) / 2); + TGeoVolume* voberylliumTube = new TGeoVolume("IP_PIPE", berylliumTube, kMedBe); + voberylliumTube->SetLineColor(kRed); + + TGeoTube* berylliumTubeVacuum = + new TGeoTube("IP_PIPEVACUUMsh", 0., kBeryliumSectionOuterRadius - kBeryliumSectionThickness, + (kBeryliumSectionZmax - kBeryliumSectionZmin) / 2); + TGeoVolume* voberylliumTubeVacuum = new TGeoVolume("IP_PIPEMOTHER", berylliumTubeVacuum, kMedVac); + voberylliumTubeVacuum->AddNode(voberylliumTube, 1, gGeoIdentity); + voberylliumTubeVacuum->SetVisibility(0); + voberylliumTubeVacuum->SetLineColor(kGreen); + + beamPipeCsideSection->AddNode(voberylliumTubeVacuum, 1, + new TGeoTranslation(0., 0., (kBeryliumSectionZmax + kBeryliumSectionZmin) / 2)); + + //---------------- Al tube ------------------ + TGeoPcon* aluBeforeBellows = new TGeoPcon(0., 360., 9); + aluBeforeBellows->DefineSection(0, kZ9, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + aluBeforeBellows->DefineSection(1, kZ8, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + aluBeforeBellows->DefineSection(2, kZ7, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + aluBeforeBellows->DefineSection(3, kZ6, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + aluBeforeBellows->DefineSection(4, kZ5, kCSideBPSOuterRadius - kCSideBPSWallThickness, kCSideBPSOuterRadius); + aluBeforeBellows->DefineSection(5, kZ4, kCSideBPSOuterRadius - kCSideBPSWallThickness, kCSideBPSOuterRadius); + aluBeforeBellows->DefineSection(6, kZ3, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + aluBeforeBellows->DefineSection(7, kZ2, kBeryliumSectionOuterRadius - kBeryliumSectionThickness, kBeryliumSectionOuterRadius); + aluBeforeBellows->DefineSection(8, kZ1, kBeryliumSectionOuterRadius - kBeryliumSectionThickness, kBeryliumSectionOuterRadius); + TGeoVolume* voaluBeforeBellows = new TGeoVolume("aluBeforeBellows", aluBeforeBellows, kMedAlu2219); + voaluBeforeBellows->SetLineColor(kBlue); + beamPipeCsideSection->AddNode(voaluBeforeBellows, 1, gGeoIdentity); + + TGeoPcon* aluBeforeBellowsVacuum = new TGeoPcon(0., 360., 7); + aluBeforeBellowsVacuum->DefineSection(0, kZ9, 0., kBellowSectionOuterRadius - kBeryliumSectionThickness); + aluBeforeBellowsVacuum->DefineSection(1, kZ6, 0., kBellowSectionOuterRadius - kBeryliumSectionThickness); + aluBeforeBellowsVacuum->DefineSection(2, kZ5, 0., kCSideBPSOuterRadius - kCSideBPSWallThickness); + aluBeforeBellowsVacuum->DefineSection(3, kZ4, 0., kCSideBPSOuterRadius - kCSideBPSWallThickness); + aluBeforeBellowsVacuum->DefineSection(4, kZ3, 0., kBellowSectionOuterRadius - kBeryliumSectionThickness); + aluBeforeBellowsVacuum->DefineSection(5, kZ2, 0., kBeryliumSectionOuterRadius - kBeryliumSectionThickness); + aluBeforeBellowsVacuum->DefineSection(6, kZ1, 0., kBeryliumSectionOuterRadius - kBeryliumSectionThickness); + TGeoVolume* voaluBeforeBellowsVacuum = new TGeoVolume("aluBeforeBellowsVacuum", aluBeforeBellowsVacuum, kMedVac); + voaluBeforeBellowsVacuum->SetVisibility(1); + voaluBeforeBellowsVacuum->SetLineColor(kGreen); + voaluBeforeBellows->AddNode(voaluBeforeBellowsVacuum, 1, gGeoIdentity); + //------------------------------------------------- + + float kBellowLength = kNBellowConvolutions * (4. * kBellowPlieRadius - 2. * kBellowPlieThickness); + // ------------------ First Bellow -------------------- + TGeoVolume* vobellows1 = + makeBellowCside("bellows1", kNBellowConvolutions, kBellowSectionOuterRadius - kBeryliumSectionThickness, + kBellowOuterRadius, kBellowPlieRadius, kBellowPlieThickness); + beamPipeCsideSection->AddNode( + vobellows1, 1, new TGeoTranslation(0., 0., kFirstBellowZmax - kBellowLength / 2. - 2. * kBellowPlieRadius)); + //------------------------------------------------------ + + const float kZ10 = kFirstBellowZmax - kBellowLength; // End of First bellow + const float kZ12 = kZ10 - kThinPartBeforeBellowLength; + const float kZ11 = kZ12 + + (kBeryliumSectionThickness - kBellowPlieThickness) / + TMath::Tan(kChangeThicknessAngle); // End of 300 microns thickness part after first bellow + const float kZ13 = kZ12 - kDistanceBetweenBellows; + const float kZ14 = kZ13 - (kBeryliumSectionThickness - kBellowPlieThickness) / TMath::Tan(kChangeThicknessAngle); + const float kZ15 = kZ14 - kThinPartBeforeBellowLength; + const float kSecondBellowZmax = kZ15; + + //---------- Al tube between the bellows ---------- + TGeoPcon* tube4 = new TGeoPcon(0., 360., 6); + tube4->DefineSection(0, kZ10, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + tube4->DefineSection(1, kZ11, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + tube4->DefineSection(2, kZ12, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + tube4->DefineSection(3, kZ13, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + tube4->DefineSection(4, kZ14, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + tube4->DefineSection(5, kZ15, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + TGeoVolume* votube4 = new TGeoVolume("votube4", tube4, kMedAlu2219); + votube4->SetLineColor(kBlue); + beamPipeCsideSection->AddNode(votube4, 1, gGeoIdentity); + + TGeoTube* tube4Vacuum = new TGeoTube(0., kBellowSectionOuterRadius - kBeryliumSectionThickness, -(kZ15 - kZ10) / 2.); + TGeoVolume* votube4Vacuum = new TGeoVolume("tube4Vacuum", tube4Vacuum, kMedVac); + votube4Vacuum->SetVisibility(1); + votube4->AddNode(votube4Vacuum, 1, new TGeoTranslation(0., 0., (kZ10 + kZ15) / 2.)); + + // ------------------ Second Bellow -------------------- + TGeoVolume* vobellows2 = + makeBellowCside("bellows2", kNBellowConvolutions, kBellowSectionOuterRadius - kBeryliumSectionThickness, + kBellowOuterRadius, kBellowPlieRadius, kBellowPlieThickness); + beamPipeCsideSection->AddNode( + vobellows2, 1, new TGeoTranslation(0., 0., kSecondBellowZmax - kBellowLength / 2. - 2. * kBellowPlieRadius)); + // ----------------------------------------------------- + + const float kZ16 = kSecondBellowZmax - kBellowLength; // End of Second bellow + const float kZ18 = kZ16 - kThinPartBeforeBellowLength; + const float kZ17 = kZ18 + + (kBeryliumSectionThickness - kBellowPlieThickness) / + TMath::Tan(kChangeThicknessAngle); // End of 300 microns thickness part after first bellow + const float kZ19 = kAdaptConeZmax; // Start of the Adpation Cone + const float kZ20 = kAdaptConeZmin; // End of the Adpation Cone + const float kZ21 = kAdaptConeZmin - kFlangeLength; // End of the Flange + + //----------- 15 deg Conical adaptator + flange ---------- + TGeoPcon* adaptator = new TGeoPcon(0., 360., 7); + adaptator->DefineSection(0, kZ16, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + adaptator->DefineSection(1, kZ17, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius - kBeryliumSectionThickness + kBellowPlieThickness); + adaptator->DefineSection(2, kZ18, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + adaptator->DefineSection(3, kZ19, kBellowSectionOuterRadius - kBeryliumSectionThickness, kBellowSectionOuterRadius); + adaptator->DefineSection(4, kZ20, kBellowSectionOuterRadius - kBeryliumSectionThickness, kAdaptConeRmax); + adaptator->DefineSection(5, kZ20, kBellowSectionOuterRadius - kBeryliumSectionThickness, kFlangeRmax); + adaptator->DefineSection(6, kZ21, kBellowSectionOuterRadius - kBeryliumSectionThickness, kFlangeRmax); + TGeoVolume* voadaptator = new TGeoVolume("voadaptator", adaptator, kMedAlu2219); + voadaptator->SetLineColor(kBlue); + beamPipeCsideSection->AddNode(voadaptator, 1, gGeoIdentity); + + TGeoPcon* adaptatorvide = new TGeoPcon(0., 360., 4); + adaptatorvide->DefineSection(0, kZ16, 0., kBellowSectionOuterRadius - kBeryliumSectionThickness); + adaptatorvide->DefineSection(1, kZ19, 0., kBellowSectionOuterRadius - kBeryliumSectionThickness); + adaptatorvide->DefineSection(2, kZ20, 0., kAdaptConeRmax - kBeryliumSectionThickness); + adaptatorvide->DefineSection(3, kZ21, 0., kAdaptConeRmax - kBeryliumSectionThickness); + TGeoVolume* voadaptatorvide = new TGeoVolume("voadaptatorvide", adaptatorvide, kMedVac); + voadaptatorvide->SetVisibility(1); + // voadaptatorvide->SetLineColor(kGreen); + voadaptator->AddNode(voadaptatorvide, 1, gGeoIdentity); + //------------------------------------------------------ + + barrel->AddNode(beamPipeCsideSection, 1, new TGeoTranslation(0., 30., 0.)); + + /////////////////////////////////////////////////////////////////// + // Beam Pipe support F.M. 2021 rev 2023 // + /////////////////////////////////////////////////////////////////// + + // Beam Pipe Support + TGeoVolume* beamPipeSupport = new TGeoVolumeAssembly("BeamPipeSupport"); + const float kBeamPipesupportZpos = kZ5; + + // Dimensions : + const float kSupportXdim = 20.67; + const float kBeamPipeRingZdim = 3.6; + const float kVespelRmax = 2.3; + const float kVespelRmin = 2.22; + const float kBeampipeCarbonCollarRmin = 2.5; + const float kBeampipeCarbonCollarRmax = 2.7; + const float kFixationCarbonCollarRmin = 1.5; + const float kFixationCarbonCollarRmax = 1.7; + const float kFixationCarbonCollarDZ = 2.5; + const float kSkinThickness = 0.3; + const float kSkinXdim = 14.2; + const float kSkinYdim = 1.4; + const float kSkinZdim = kFixationCarbonCollarDZ; + const float kCarbonEarsXdim = 2.8; + const float kCarbonEarsYdimIn = 1.1; + const float kCarbonEarsYdimOut = 0.6; + const float kCarbonEarsZdim = kFixationCarbonCollarDZ; + const float kScrewDiameter = 0.4; + const float kScrewHeadHeight = 0.2; + const float kScrewHeadDiameter = 0.6; + const float kScrewPositionIn = 3.25; + const float kScrewPositionOut = 21.80; + const float kScrewThreadLength = 1.0; + const float holeSightDiameterOut = 0.60; + const float holeSightDiameterIn = 0.25; + + // Support Bar + TGeoVolumeAssembly* supportBar = new TGeoVolumeAssembly("BPS_SupportBar"); + TGeoBBox* carbonSkinBPS = new TGeoBBox("carbonSkinBPS", kSkinXdim / 2., kSkinYdim / 2., kSkinZdim / 2.); + TGeoBBox* foambarBPS = new TGeoBBox("foambarBPS", kSkinXdim / 2. - kSkinThickness, kSkinYdim / 2. - kSkinThickness, + kSkinZdim / 2. - kSkinThickness / 2.); + TGeoBBox* carbonEarsBPSin = new TGeoBBox("carbonEarsBPSin", kCarbonEarsXdim / 2., kCarbonEarsYdimIn / 2., kCarbonEarsZdim / 2.); + TGeoBBox* carbonEarsBPSout = new TGeoBBox("carbonEarsBPSout", kCarbonEarsXdim / 2., kCarbonEarsYdimOut / 2., kCarbonEarsZdim / 2.); + + //===== building the main support bar in carbon ==== + TGeoTranslation* tBP1 = new TGeoTranslation("tBP1", (kSkinXdim + kCarbonEarsXdim) / 2., -(kSkinYdim - kCarbonEarsYdimIn) / 2., 0.); + TGeoTranslation* tBP2 = new TGeoTranslation("tBP2", -(kSkinXdim + kCarbonEarsXdim) / 2., 0., 0.); + tBP1->RegisterYourself(); + tBP2->RegisterYourself(); + + TGeoRotation* rotScrew = new TGeoRotation("rotScrew", 0., 90., 0.); + rotScrew->RegisterYourself(); + + TGeoTube* holeScrew = new TGeoTube("holeScrew", 0., kScrewDiameter / 2., kCarbonEarsYdimIn / 2. + 0.001); + TGeoTube* holeSight = new TGeoTube("holeSight", 0., holeSightDiameterOut / 2., kSkinZdim / 2. + 0.001); + TGeoTranslation* tHoleSight = new TGeoTranslation("tHoleSight", kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax - 6.55, 0., 0.); + tHoleSight->RegisterYourself(); + double kXHoleIn = kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax - kScrewPositionIn; + double kXHoleOut = kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax - kScrewPositionOut; + TGeoCombiTrans* tHoleScrew1 = new TGeoCombiTrans("tHoleScrew1", kXHoleIn, -(kSkinYdim - kCarbonEarsYdimIn) / 2., -0.7, rotScrew); + TGeoCombiTrans* tHoleScrew2 = new TGeoCombiTrans("tHoleScrew2", kXHoleIn, -(kSkinYdim - kCarbonEarsYdimIn) / 2., 0.7, rotScrew); + TGeoCombiTrans* tHoleScrew3 = new TGeoCombiTrans("tHoleScrew3", kXHoleOut, -(kSkinYdim - kCarbonEarsYdimIn) / 2., -0.7, rotScrew); + TGeoCombiTrans* tHoleScrew4 = new TGeoCombiTrans("tHoleScrew4", kXHoleOut, -(kSkinYdim - kCarbonEarsYdimIn) / 2., 0.7, rotScrew); + tHoleScrew1->RegisterYourself(); + tHoleScrew2->RegisterYourself(); + tHoleScrew3->RegisterYourself(); + tHoleScrew4->RegisterYourself(); + + TGeoCompositeShape* supportBarCarbon = new TGeoCompositeShape("BPS_supportBarCarbon", "(carbonSkinBPS-foambarBPS)+carbonEarsBPSin:tBP1-holeScrew:tHoleScrew1-holeScrew:tHoleScrew2+carbonEarsBPSout:tBP2-holeSight:tHoleSight-holeScrew:tHoleScrew3-holeScrew:tHoleScrew4"); + TGeoVolume* supportBarCarbonVol = new TGeoVolume("BPS_supportBarCarbon", supportBarCarbon, kMedCarbonFiber); + supportBarCarbonVol->SetLineColor(kGray + 2); + supportBar->AddNode(supportBarCarbonVol, 1, new TGeoTranslation(-(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax), 0, 0)); + TGeoRotation* rotBar1 = new TGeoRotation("rotBar1", 0., 180., 180.); + rotBar1->RegisterYourself(); + TGeoCombiTrans* transBar1 = new TGeoCombiTrans("transBar1", kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax, 0, 0, rotBar1); + transBar1->RegisterYourself(); + supportBar->AddNode(supportBarCarbonVol, 2, transBar1); + //================================================== + + //==== Adding the internal foam volumes ============ + TGeoCompositeShape* foamVolume = new TGeoCompositeShape("foamVolume", "foambarBPS-holeSight:tHoleSight"); + TGeoVolume* FoamVolume = new TGeoVolume("supportBarFoam", foamVolume, kMedRohacell); + FoamVolume->SetLineColor(kGreen); + TGeoRotation* rotBar2 = new TGeoRotation("rotBar2", 0., 0., 180.); + rotBar2->RegisterYourself(); + TGeoCombiTrans* transBar2 = new TGeoCombiTrans("transBar2", kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax, 0, 0, rotBar2); + transBar2->RegisterYourself(); + supportBar->AddNode(FoamVolume, 1, transBar1); + supportBar->AddNode(FoamVolume, 2, new TGeoTranslation(-(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax), 0, 0)); + //================================================== + + //================= Screws ==================== + TGeoVolumeAssembly* screw = new TGeoVolumeAssembly("screw"); + TGeoTube* headScrew = new TGeoTube("headScrew", 0., kScrewHeadDiameter / 2., kScrewHeadHeight / 2.); + TGeoVolume* HeadScrew = new TGeoVolume("HeadScrew", headScrew, kMedTitanium); + HeadScrew->SetLineColor(kRed); + TGeoTube* threadScrew = new TGeoTube("threadScrew", 0., kScrewDiameter / 2., kCarbonEarsYdimIn / 2.); + TGeoVolume* ThreadScrew = new TGeoVolume("ThreadScrew", threadScrew, kMedTitanium); + ThreadScrew->SetLineColor(kRed); + screw->AddNode(HeadScrew, 1, new TGeoTranslation(0., 0., -(kCarbonEarsYdimIn + kScrewHeadHeight) / 2.)); + screw->AddNode(ThreadScrew, 1); + TGeoCombiTrans* tScrew1 = new TGeoCombiTrans("transScrew1", kScrewPositionIn, (kCarbonEarsYdimIn - kSkinYdim) / 2., -0.7, rotScrew); + TGeoCombiTrans* tScrew2 = new TGeoCombiTrans("transScrew2", kScrewPositionIn, (kCarbonEarsYdimIn - kSkinYdim) / 2., 0.7, rotScrew); + TGeoCombiTrans* tScrew3 = new TGeoCombiTrans("transScrew3", -kScrewPositionIn, (kCarbonEarsYdimIn - kSkinYdim) / 2., -0.7, rotScrew); + TGeoCombiTrans* tScrew4 = new TGeoCombiTrans("transScrew4", -kScrewPositionIn, (kCarbonEarsYdimIn - kSkinYdim) / 2., 0.7, rotScrew); + tScrew1->RegisterYourself(); + tScrew2->RegisterYourself(); + tScrew3->RegisterYourself(); + tScrew4->RegisterYourself(); + supportBar->AddNode(screw, 1, tScrew1); + supportBar->AddNode(screw, 2, tScrew2); + supportBar->AddNode(screw, 3, tScrew3); + supportBar->AddNode(screw, 4, tScrew4); + //============================================== + + // === Optical sights (assuming the same than the MFT ones) === + TGeoVolumeAssembly* fixationSight = new TGeoVolumeAssembly("fixationSight"); + TGeoTube* screwSight = new TGeoTube("screwSight", holeSightDiameterIn / 2., holeSightDiameterOut / 2., kScrewThreadLength / 2.); + TGeoVolume* ScrewSight = new TGeoVolume("ScrewSight", screwSight, kMedSteel); + ScrewSight->SetLineColor(kBlue); + double supportSightLength = 0.5; + TGeoTube* supportSight = new TGeoTube("supportSight", holeSightDiameterIn / 2., 1.4 / 2., supportSightLength / 2.); + TGeoVolume* SupportSight = new TGeoVolume("SupportSight", supportSight, kMedSteel); + SupportSight->SetLineColor(kBlue); + fixationSight->AddNode(ScrewSight, 1); + fixationSight->AddNode(SupportSight, 1, new TGeoTranslation(0., 0., (kScrewThreadLength + supportSightLength) / 2.)); + SupportSight->SetVisibility(kTRUE); + fixationSight->SetVisibility(kTRUE); + TGeoTranslation* tSight1 = new TGeoTranslation("tSight1", 6.55, 0., (kSkinZdim - kScrewThreadLength) / 2.); + TGeoTranslation* tSight2 = new TGeoTranslation("tSight2", -6.55, 0., (kSkinZdim - kScrewThreadLength) / 2.); + tSight1->RegisterYourself(); + tSight2->RegisterYourself(); + supportBar->AddNode(fixationSight, 1, tSight1); + supportBar->AddNode(fixationSight, 2, tSight2); + // ===================== + + beamPipeSupport->AddNode(supportBar, 1); + + //======================= Fixation to pipe ======================== + TGeoTube* pipeSupportTubeCarbon = new TGeoTube(kBeampipeCarbonCollarRmin, kBeampipeCarbonCollarRmax, kFixationCarbonCollarDZ / 2.); + TGeoVolume* FixationToPipeVol = new TGeoVolume("FixationToPipe", pipeSupportTubeCarbon, kMedCarbonFiber); + FixationToPipeVol->SetLineColor(kGray + 2); + beamPipeSupport->AddNode(FixationToPipeVol, 1); + //================================================================== + + //================ Beam Pipe Ring ================= + TGeoVolumeAssembly* beamPipeRing = new TGeoVolumeAssembly("beamPipeRing"); + TGeoTube* beamPipeRingCarbon = new TGeoTube(kVespelRmax, kBeampipeCarbonCollarRmin, kBeamPipeRingZdim / 2.); + TGeoVolume* beamPipeRingCarbonVol = new TGeoVolume("beamPipeRingCarbon", beamPipeRingCarbon, kMedCarbonFiber); + beamPipeRingCarbonVol->SetLineColor(kGray + 2); + beamPipeRing->AddNode(beamPipeRingCarbonVol, 1, + new TGeoTranslation(0., 0, (kBeamPipeRingZdim - kFixationCarbonCollarDZ) / 2.)); + TGeoTube* beamPipeRingVespel = new TGeoTube(kVespelRmin, kVespelRmax, (kBeamPipeRingZdim + 0.4) / 2.); + TGeoVolume* beamPipeRingVespelVol = new TGeoVolume("beamPipeRingVespel", beamPipeRingVespel, kMedPolyimide); + beamPipeRingVespelVol->SetLineColor(kGreen + 2); + beamPipeRing->AddNode(beamPipeRingVespelVol, 1, + new TGeoTranslation(0., 0, (kBeamPipeRingZdim - kFixationCarbonCollarDZ) / 2.)); + beamPipeSupport->AddNode(beamPipeRing, 1); + beamPipeSupport->SetVisibility(1); + beamPipeSupport->IsVisible(); + //================================================== + + //============ Wings (connecting the support bars to the cage support) =============== + TGeoVolumeAssembly* Wing = new TGeoVolumeAssembly("Wing"); + + // Tige + double lengthRod = 28.7 - 1.0 - 1.0 - 1.9; // sligtly decreased to accomodate to the fixation pieces + double diameterRod = 1.815; // sligtly increased to account of the two ends of the rod + double xRod = 22.1; + TGeoTube* Rod = new TGeoTube(0., diameterRod / 2., lengthRod / 2.); + TGeoVolume* rod = new TGeoVolume("rod", Rod, kMedAlu7075); + rod->SetLineColor(kGray); + + // Connecteur Tige / Beam support + double lengthFixRod = 4.0; + double diameterFixRod = 3.0; + //--------------------------------------- + TGeoTube* RodBracket = new TGeoTube("RodBracket", 0., diameterFixRod / 2., lengthFixRod / 2.); + TGeoBBox* BracketPlane = new TGeoBBox("BracketPlane", 3., 3., 3.); + TGeoTranslation* tBracketPlane = new TGeoTranslation("tBracketPlane", 0., 3. - kCarbonEarsYdimOut / 2., (lengthFixRod + 6.) / 2. - 2.6); + tBracketPlane->RegisterYourself(); + TGeoCompositeShape* Bracket = new TGeoCompositeShape("Bracket", "RodBracket-BracketPlane:tBracketPlane"); + TGeoVolume* bracket = new TGeoVolume("bracket", Bracket, kMedAlu7075); + //--------------------------------------- + + // Carbon box surrounding the aluminum rod + TGeoVolumeAssembly* carbonBox = new TGeoVolumeAssembly("carbonBox"); + double eCarbonBox = 0.1; + double trdWidth = 8.6; + double trdLength = 11.05 - 1.0 - 0.6; // on each side to accomodate the bracket and TRDPlate + TGeoTrd1* trdOut = new TGeoTrd1("trdOut", 1.405 / 2, 6.632 / 2, trdLength / 2, trdWidth / 2); + TGeoTrd1* trdIn = new TGeoTrd1("trdIn", 1.405 / 2 - eCarbonBox, 6.632 / 2 - eCarbonBox, trdLength / 2 + eCarbonBox, trdWidth / 2 - eCarbonBox); + TGeoCompositeShape* trd = new TGeoCompositeShape("trd", "trdOut-trdIn"); + TGeoVolume* TRD = new TGeoVolume("TRD", trd, kMedCarbonFiber); + TRD->SetLineColor(kGray); + + // To close the carbon box + TGeoTrd1* trdPlate = new TGeoTrd1("trdPlate", 1.405 / 2, 6.632 / 2, 1.0 / 2, trdWidth / 2); + TGeoVolume* TRDPlate = new TGeoVolume("TDRPlate", trdPlate, kMedAlu7075); + + // To connect on the main cage + TGeoBBox* plateBox = new TGeoBBox("plateBox", 7.5 / 2., 9.5 / 2., 1.9 / 2.); + TGeoBBox* removeBox = new TGeoBBox("removeBox", 2.1 / 2 + 0.0001, 2.5 / 2. + 0.0001, 1.9 / 2. + 0.0001); + TGeoTranslation* tRemove1 = new TGeoTranslation("tRemove1", (7.5 - 2.1) / 2, -(9.5 - 2.5) / 2, 0.); + TGeoTranslation* tRemove2 = new TGeoTranslation("tRemove2", -(7.5 - 2.1) / 2, -(9.5 - 2.5) / 2, 0.); + tRemove1->RegisterYourself(); + tRemove2->RegisterYourself(); + + // Connectors Rod / Cage + TGeoCompositeShape* PlateBox = new TGeoCompositeShape("PlateBox", "plateBox-removeBox:tRemove1-removeBox:tRemove2"); + TGeoVolume* PLATEBox = new TGeoVolume("PLATEBox", PlateBox, kMedAlu7075); + + TGeoRotation* PlateRot = new TGeoRotation("PlateRot", 0., 0., 0.); + TGeoRotation* FrontRot = new TGeoRotation("FrontRot", 180., 90., 0.); + TGeoCombiTrans* tFrontCarbonBox = new TGeoCombiTrans("tFrontCarbonBox", 0., 0., 0., FrontRot); + PlateRot->RegisterYourself(); + FrontRot->RegisterYourself(); + tFrontCarbonBox->RegisterYourself(); + TGeoCombiTrans* tTRDPlate = new TGeoCombiTrans("tTRDPlate", 0., 0., -(trdLength + 1.0) / 2, FrontRot); + tTRDPlate->RegisterYourself(); + TRDPlate->SetLineColor(kGray + 2); + TGeoCombiTrans* tPlateBox = new TGeoCombiTrans("tPlateBox", 0., 0., -(trdLength + 1.9) / 2 - 1.0, PlateRot); + tPlateBox->RegisterYourself(); + PLATEBox->SetLineColor(kGray); + + double xyOut[16] = {0}; + xyOut[0] = 3.316; + xyOut[1] = 4.3; + xyOut[2] = 0.7025; + xyOut[3] = -xyOut[1]; + xyOut[4] = -xyOut[2]; + xyOut[5] = -xyOut[1]; + xyOut[6] = -xyOut[0]; + xyOut[7] = xyOut[1]; + //-------------- + xyOut[8] = 1.3; + xyOut[9] = 1.3 - xyOut[1] + xyOut[8]; + xyOut[10] = xyOut[8]; + xyOut[11] = -xyOut[8] - xyOut[1] + xyOut[8]; + xyOut[12] = -xyOut[8]; + xyOut[13] = -xyOut[8] - xyOut[1] + xyOut[8]; + xyOut[14] = -xyOut[8]; + xyOut[15] = xyOut[8] - xyOut[1] + xyOut[8]; + double ARB8Length = 15.35; + TGeoArb8* ARB8Out = new TGeoArb8("ARB8Out", ARB8Length / 2, xyOut); + + double xyIn[16] = {0}; + xyIn[0] = xyOut[0] - eCarbonBox; + xyIn[1] = xyOut[1] - eCarbonBox; + xyIn[2] = 0.7025 - eCarbonBox; + xyIn[3] = -xyIn[1]; + xyIn[4] = -xyIn[2]; + xyIn[5] = -xyIn[1]; + xyIn[6] = -xyIn[0]; + xyIn[7] = xyIn[1]; + //-------------- + xyIn[8] = xyOut[8] - eCarbonBox; + xyIn[9] = xyOut[8] - xyIn[1] + xyIn[8] - eCarbonBox; + xyIn[10] = xyIn[8]; + xyIn[11] = -xyIn[8] - xyOut[1] + xyOut[8]; + xyIn[12] = -xyIn[8]; + xyIn[13] = -xyIn[8] - xyOut[1] + xyOut[8]; + xyIn[14] = -xyIn[8]; + xyIn[15] = xyIn[8] - xyOut[1] + xyOut[8]; + TGeoArb8* ARB8In = new TGeoArb8("ARB8In", ARB8Length / 2 + 0.0001, xyIn); + + TGeoCompositeShape* arb8 = new TGeoCompositeShape("arb8", "ARB8Out-ARB8In"); + TGeoVolume* ARB8 = new TGeoVolume("ARB8", arb8, kMedCarbonFiber); + ARB8->SetLineColor(kGray); + TGeoRotation* RearRot = new TGeoRotation("RearRot", 0., 0., 0.); + TGeoCombiTrans* tRearCarbonBox = new TGeoCombiTrans("tRearCarbonBox", 0., 0., (ARB8Length + trdLength) / 2, RearRot); + RearRot->RegisterYourself(); + tRearCarbonBox->RegisterYourself(); + //=============================================================== + + carbonBox->AddNode(TRD, 1, tFrontCarbonBox); + carbonBox->AddNode(ARB8, 1, tRearCarbonBox); + carbonBox->AddNode(TRDPlate, 1, tTRDPlate); + carbonBox->AddNode(PLATEBox, 1, tPlateBox); + + TGeoRotation* CarbonBoxRot1 = new TGeoRotation("CarbonBoxRot1", 90., 0., 0.); + double xCarbonBox = xRod + trdWidth / 2 - xyOut[8]; + double zCarbonBox = -trdLength / 2 - ARB8Length - lengthFixRod + 1.3; + TGeoCombiTrans* tCarbonBox1 = new TGeoCombiTrans("tCarbonBox1", -xCarbonBox, 0., zCarbonBox, CarbonBoxRot1); + CarbonBoxRot1->RegisterYourself(); + tCarbonBox1->RegisterYourself(); + TGeoRotation* CarbonBoxRot2 = new TGeoRotation("CarbonBoxRot2", 270., 0., 0.); + TGeoCombiTrans* tCarbonBox2 = new TGeoCombiTrans("tCarbonBox2", xCarbonBox, 0., zCarbonBox, CarbonBoxRot2); + CarbonBoxRot2->RegisterYourself(); + tCarbonBox2->RegisterYourself(); + + Wing->AddNode(rod, 1, new TGeoTranslation(xRod, 0., -(lengthRod / 2. + lengthFixRod) + 1.3)); + Wing->AddNode(rod, 2, new TGeoTranslation(-xRod, 0., -(lengthRod / 2. + lengthFixRod) + 1.3)); + bracket->SetLineColor(kGray); + Wing->AddNode(bracket, 1, new TGeoTranslation(xRod, 0., -lengthFixRod / 2. + 1.3)); + Wing->AddNode(bracket, 2, new TGeoTranslation(-xRod, 0., -lengthFixRod / 2. + 1.3)); + Wing->AddNode(carbonBox, 1, tCarbonBox1); + Wing->AddNode(carbonBox, 2, tCarbonBox2); + + beamPipeSupport->AddNode(Wing, 1); + double mGlobalShift = 2.45; // to be closest to the first bellow according to Corrado blueprints + barrel->AddNode(beamPipeSupport, 1, new TGeoTranslation(0., 30, kBeamPipesupportZpos + kFixationCarbonCollarDZ / 2. - mGlobalShift)); + + ///////////// END NEW BEAM PIPE GEOMETRY FOR MFT //////////////////// + + ///////////////////////////////////////////////////////////////////// + // Side A section after Beryllium + // Authors: M.Sitta - 19 Sep 2014 + // Drawings from C. Gargiulo : + // \\cern.ch\dfs\Workspaces\c\cgargiul\EXPERIMENT\ALICE\ALICE_MECHANICS\ALICE_DATA_PACKAGE\IN\DETECTORS\ITS_UPGRADE\1-DESIGN\0-IF_Control_Drawing\20140207_ICD_ITS_MFT_BP + ///////////////////////////////////////////////////////////////////// + + float kConicalBerilliumMinThickness = 0.08; + float kConicalBerilliumMaxThickness = 0.1; + float kFlangeZ = 483.75; + float kFlangeWidth = 2.74; + float kFlangeThickness = 4.3; + float kConicalBerylliumEnd = 473.3; + float kSupport1 = 178.6; + float kSupport2 = 471.3; + float kSupportWidth = 5.25; + float kPipeRadiusAtSupport1 = 2.2; + float kConicalBePipeEndOuterRadius = 3.0; + + TGeoPcon* tube0 = new TGeoPcon(0., 360., 5); + tube0->DefineSection(0, kFlangeZ - kFlangeWidth / 2, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + tube0->DefineSection(1, kConicalBerylliumEnd, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + tube0->DefineSection(2, kSupport1 + kSupportWidth, kPipeRadiusAtSupport1 - kConicalBerilliumMinThickness, kPipeRadiusAtSupport1); + tube0->DefineSection(3, kSupport1, kPipeRadiusAtSupport1 - kConicalBerilliumMinThickness, kPipeRadiusAtSupport1); + tube0->DefineSection(4, kBeryliumSectionZmax, kBeryliumSectionOuterRadius - kConicalBerilliumMinThickness, kBeryliumSectionOuterRadius); // need a transition to kConicalBerilliumMaxThickness + + TGeoPcon* tube0vide = new TGeoPcon(0., 360., 5); + tube0vide->DefineSection(0, kFlangeZ - kFlangeWidth / 2, 0., kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness - 0.01); + tube0vide->DefineSection(1, kConicalBerylliumEnd, 0., kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness - 0.01); + tube0vide->DefineSection(2, kSupport1 + kSupportWidth, 0, kPipeRadiusAtSupport1 - kConicalBerilliumMinThickness - 0.01); + tube0vide->DefineSection(3, kSupport1, 0, kPipeRadiusAtSupport1 - kConicalBerilliumMinThickness - 0.01); + tube0vide->DefineSection(4, kBeryliumSectionZmax, 0., kBeryliumSectionOuterRadius - kConicalBerilliumMinThickness - 0.01); + + TGeoVolume* votube0 = new TGeoVolume("votube0", tube0, kMedBe); + votube0->SetLineColor(kRed); + TGeoVolume* votube0vide = new TGeoVolume("votube0vide", tube0vide, kMedVac); + votube0vide->SetLineColor(kGreen); + + barrel->AddNode(votube0, 1, new TGeoTranslation(0., 30., 0.)); + barrel->AddNode(votube0vide, 1, new TGeoTranslation(0., 30., 0.)); + + TGeoVolume* beampipeSupportA1 = makeSupportBar("A1", kPipeRadiusAtSupport1 + 0.01, kPipeRadiusAtSupport1 + 0.38, 20.67, 14.25); + barrel->AddNode(beampipeSupportA1, 1, new TGeoTranslation(0., 30, kSupport1 + kSupportWidth / 2.)); + + // Length is approximate + TGeoVolume* beampipeSupportA2 = makeSupportBar("A2", kConicalBePipeEndOuterRadius, kConicalBePipeEndOuterRadius + 0.38, 44, 37.5); + barrel->AddNode(beampipeSupportA2, 1, new TGeoTranslation(0., 30, kConicalBerylliumEnd + kSupportWidth / 2.)); + + TGeoPcon* Bolt1 = new TGeoPcon(0., 360, 8); + Bolt1->DefineSection(0, 0, 0, 0.5); + Bolt1->DefineSection(1, 0.515 - 0.01, 0, 0.5); + Bolt1->DefineSection(2, 0.515 - 0.01, 0, 0.25); + Bolt1->DefineSection(3, kFlangeWidth + 0.515 + 0.01, 0, 0.25); + Bolt1->DefineSection(4, kFlangeWidth + 0.515 + 0.01, 0, 0.5); + Bolt1->DefineSection(5, kFlangeWidth + 0.515 + 0.55, 0, 0.5); + Bolt1->DefineSection(6, kFlangeWidth + 0.515 + 0.55, 0, 0.25); + Bolt1->DefineSection(7, kFlangeWidth + 0.515 + 0.55 + 0.5, 0, 0.25); + Bolt1->SetName("BOLT"); + + TGeoVolume* volBolt1 = new TGeoVolume("volBolt1", Bolt1, kMedTitanium); + volBolt1->SetLineWidth(2); + volBolt1->SetLineColor(kRed); + + TGeoTranslation* t1 = new TGeoTranslation((kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), (kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t1->SetName("t1"); + t1->RegisterYourself(); + TGeoTranslation* t2 = new TGeoTranslation((kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), (kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t2->SetName("t2"); + t2->RegisterYourself(); + TGeoTranslation* t3 = new TGeoTranslation(-(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), (kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t3->SetName("t3"); + t3->RegisterYourself(); + TGeoTranslation* t4 = new TGeoTranslation(-(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), (kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t4->SetName("t4"); + t4->RegisterYourself(); + TGeoTranslation* t5 = new TGeoTranslation(-(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), -(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t5->SetName("t5"); + t5->RegisterYourself(); + TGeoTranslation* t6 = new TGeoTranslation(-(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), -(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t6->SetName("t6"); + t6->RegisterYourself(); + TGeoTranslation* t7 = new TGeoTranslation((kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), -(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t7->SetName("t7"); + t7->RegisterYourself(); + TGeoTranslation* t8 = new TGeoTranslation((kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Cos(TMath::Pi() / 8), -(kConicalBePipeEndOuterRadius + (kFlangeThickness - kConicalBePipeEndOuterRadius) / 2) * TMath::Sin(TMath::Pi() / 8), kFlangeZ - kFlangeWidth / 2 - 0.515); + t8->SetName("t8"); + t8->RegisterYourself(); + + TGeoVolumeAssembly* Bolts = new TGeoVolumeAssembly("Bolts"); + Bolts->AddNode(volBolt1, 1, t1); + Bolts->AddNode(volBolt1, 2, t2); + Bolts->AddNode(volBolt1, 3, t3); + Bolts->AddNode(volBolt1, 4, t4); + Bolts->AddNode(volBolt1, 5, t5); + Bolts->AddNode(volBolt1, 6, t6); + Bolts->AddNode(volBolt1, 7, t7); + Bolts->AddNode(volBolt1, 8, t8); + + barrel->AddNode(Bolts, 1, new TGeoTranslation(0., 30., 0.)); + + TGeoTranslation* Tflange = new TGeoTranslation(0, 0, kFlangeZ); + Tflange->SetName("Tflange"); + Tflange->RegisterYourself(); + + // Flange + TGeoTube* flange = new TGeoTube("voFlangeA1", kConicalBePipeEndOuterRadius + 0.01, kFlangeThickness, kFlangeWidth / 2.); + + TGeoPcon* HoleF = new TGeoPcon("HoleF", 0., 360., 2); + HoleF->DefineSection(0, 0., 0, 0.25 + 0.01); + HoleF->DefineSection(1, 4.305, 0, 0.25 + 0.01); + + // create the flange with holes for the titanium bolts + TGeoCompositeShape* FlangeWithHoles = new TGeoCompositeShape("voFlangeWithHoles", "((voFlangeA1:Tflange)-((voFlangeA1:Tflange)*(HoleF:t1+HoleF:t2+HoleF:t3+HoleF:t4+HoleF:t5+HoleF:t6+HoleF:t7+HoleF:t8)))"); + + TGeoVolume* volflange = new TGeoVolume("voFlangeHoles", FlangeWithHoles, kMedAlBe); + volflange->SetLineWidth(2); + volflange->SetLineColor(kGray); + + barrel->AddNode(volflange, 1, new TGeoTranslation(0., 30., 0.)); + + TGeoPcon* pipeSamell = new TGeoPcon(0., 360., 2); + pipeSamell->DefineSection(0, kFlangeZ + kFlangeWidth / 2, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + pipeSamell->DefineSection(1, kFlangeZ + 5.13 + 0.435 + 0.4 + 0.08, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + pipeSamell->SetName("pipeSamell"); + + TGeoVolume* VolpipeSmall = new TGeoVolume("voPipeSmallVac", pipeSamell, kMedAlu2219); + VolpipeSmall->SetLineWidth(2); + barrel->AddNode(VolpipeSmall, 1, new TGeoTranslation(0., 30., 0.)); + + TGeoPcon* pipeSmallVac = new TGeoPcon(0., 360., 2); + pipeSmallVac->DefineSection(0, kFlangeZ + kFlangeWidth / 2, 0, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness - 0.01); + pipeSmallVac->DefineSection(1, kFlangeZ + 5.13 + 0.435 + 0.4 + 0.08, 0, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness - 0.01); + TGeoVolume* vopipeSmallVac = new TGeoVolume("voPipeSmallVac", pipeSmallVac, kMedVac); + vopipeSmallVac->SetLineColor(kGreen); + + barrel->AddNode(vopipeSmallVac, 1, new TGeoTranslation(0., 30., 0.)); + + // -- Bellows on A side + // float plieradius = (3.72 + (2. * 7 - 2.) * 0.03) / (4. * 7); // radius of bellows "plis" + float plieradiusA = 0.2; // radius of bellow plies + + // ------------------ First Bellow -------------------- + // Inner: 3.0 cm, outer 3.97 cm length 8.47 cm with 10 wiggles + // check meaning of dU ; it is probably the total length, see also below + TGeoVolume* vobellows1A = makeBellow("bellows1A", 10, 3.0, 3.97, 8.47, plieradiusA, 0.03); + // Z position is rough for now. + barrel->AddNode(vobellows1A, 1, new TGeoTranslation(0., 30., kFlangeZ + 10)); + // Comments: removing 1/2 plie (see makeBellow): 0.31= 2*0.17-0.03 and 0.08: free space + + // ------------------ Outer pipe after flange -------------------- + TGeoPcon* pipeOut = new TGeoPcon(0., 360., 2); + pipeOut->DefineSection(0, kFlangeZ + 13.6 - 0.08, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + pipeOut->DefineSection(1, 714.6, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness, kConicalBePipeEndOuterRadius); + + TGeoVolume* OuterPIPE = new TGeoVolume("pipeOut", pipeOut, kMedAlu2219); + barrel->AddNode(OuterPIPE, 1, new TGeoTranslation(0., 30., 0.)); + + // The end of the barrel volume is at 714.6 cm, after that we start with RB24 volume + TGeoPcon* pipeOutVac = new TGeoPcon(0., 360., 2); + pipeOutVac->DefineSection(0, kFlangeZ + 13.6 - 0.08, 0, kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness); + pipeOutVac->DefineSection(1, 714.6, 0., kConicalBePipeEndOuterRadius - kConicalBerilliumMaxThickness); + + TGeoVolume* OuterPIPEVac = new TGeoVolume("pipeOutVac", pipeOutVac, kMedAlu2219); + barrel->AddNode(OuterPIPEVac, 1, new TGeoTranslation(0., 30., 0.)); + + //------------------------------------------------- + + //////////////////////////////////////////////////////////////////////////////// + // // + // RB24/1 // + // // + //////////////////////////////////////////////////////////////////////////////// + // + // + // Drawing LHCVC2U_0001 + // Copper Tube RB24/1 393.5 cm + // Warm module VMACA 18.0 cm + // Annular Ion Pump 35.0 cm + // Valve 7.5 cm + // Warm module VMABC 28.0 cm + // ================================ + // 462.0 cm + // + + // Copper Tube RB24/1 + const float kRB24CuTubeL = 381.5; + const float kRB24cCuTubeL = 155.775 + (28.375 - 18.135); + const float kRB24bCuTubeL = kRB24CuTubeL - kRB24cCuTubeL; + const float kRB24CuTubeRi = 5.8 / 2.; + const float kRB24CuTubeRo = 6.0 / 2.; + const float kRB24CuTubeFRo = 7.6; + const float kRB24CuTubeFL = 1.86; + const float kRB24CL = 2. * 597.9; + + // + // introduce cut at end of barrel 714.6m + // + // outside barrel + TGeoVolume* voRB24cCuTubeM = new TGeoVolume("voRB24cCuTubeM", new TGeoTube(0., kRB24CuTubeRi, kRB24cCuTubeL / 2.), kMedVacNFHC); + TGeoVolume* voRB24cCuTube = new TGeoVolume("voRB24cCuTube", new TGeoTube(kRB24CuTubeRi, kRB24CuTubeRo, kRB24cCuTubeL / 2.), kMedAlu2219); + voRB24cCuTubeM->AddNode(voRB24cCuTube, 1, gGeoIdentity); + + // Air outside tube with higher transport cuts + TGeoVolume* voRB24CuTubeA = new TGeoVolume("voRB24CuTubeA", new TGeoTube(80., 81., kRB24bCuTubeL / 2.), kMedAirHigh); + voRB24CuTubeA->SetVisibility(0); + + // Simplified DN 100 Flange + TGeoVolume* voRB24CuTubeF = new TGeoVolume("voRB24CuTubeF", new TGeoTube(kRB24CuTubeRo, kRB24CuTubeFRo, kRB24CuTubeFL / 2.), kMedSteelNF); + + // Warm Module Type VMACA + // LHCVMACA_0002 + // + // Pos 1 Warm Bellows DN100 LHCVBU__0012 + // Pos 2 RF Contact D80 LHCVSR__0005 + // Pos 3 Trans. Tube Flange LHCVSR__0065 + // [Pos 4 Hex. Countersunk Screw Bossard BN4719] + // [Pos 5 Tension spring LHCVSR__0011] + // + // + // + // Pos1 Warm Bellows DN100 + // Pos1.1 Bellows LHCVBU__0006 + // + // + // Connection Tubes + // Connection tube inner r + const float kRB24B1ConTubeRin = 10.0 / 2.; + // Connection tube outer r + const float kRB24B1ConTubeRou = 10.3 / 2.; + // Connection tube length + const float kRB24B1ConTubeL = 2.5; + // + const float kRB24B1CompL = 16.375; // Length of the compensator + const float kRB24B1BellowRi = 10.25 / 2.; // Bellow inner radius + const float kRB24B1BellowRo = 11.40 / 2.; // Bellow outer radius + const int kRB24B1NumberOfPlies = 27; // Number of plies + const float kRB24B1BellowUndL = 11.00; // Length of undulated region + const float kRB24B1PlieThickness = 0.015; // Plie thickness + + const float kRB24B1PlieRadius = + (kRB24B1BellowUndL + (2. * kRB24B1NumberOfPlies - 2.) * kRB24B1PlieThickness) / (4. * kRB24B1NumberOfPlies); + + const float kRB24B1ProtTubeThickness = 0.02; // Thickness of the protection tube + const float kRB24B1ProtTubeLength = 4.2; // Length of the protection tube + + const float kRB24B1RFlangeL = 1.86; // Length of the flanges + const float kRB24B1RFlangeLO = 0.26; // Flange overlap + const float kRB24B1RFlangeRO = 11.18 / 2; // Inner radius at Flange overlap + const float kRB24B1RFlangeRou = 15.20 / 2.; // Outer radius of flange + const float kRB24B1RFlangeRecess = 0.98; // Flange recess + const float kRB24B1L = kRB24B1CompL + 2. * (kRB24B1RFlangeL - kRB24B1RFlangeRecess); + + /// + // + // Bellow Section + TGeoVolume* voRB24B1Bellow = makeBellow("RB24B1", kRB24B1NumberOfPlies, kRB24B1BellowRi, kRB24B1BellowRo, + kRB24B1BellowUndL, kRB24B1PlieRadius, kRB24B1PlieThickness); + voRB24B1Bellow->SetVisibility(0); + float newRB24B1BellowUndL = 2 * (static_cast(voRB24B1Bellow->GetShape()))->GetDz(); + + // + // Bellow mother volume + TGeoPcon* shRB24B1BellowM = new TGeoPcon(0., 360., 12); + // Connection Tube and Flange + z = 0.; + shRB24B1BellowM->DefineSection(0, z, 0., kRB24B1RFlangeRou); + z += kRB24B1RFlangeLO; + shRB24B1BellowM->DefineSection(1, z, 0., kRB24B1RFlangeRou); + z = kRB24B1RFlangeL; + shRB24B1BellowM->DefineSection(2, z, 0., kRB24B1RFlangeRou); + shRB24B1BellowM->DefineSection(3, z, 0., kRB24B1ConTubeRou); + z = kRB24B1ConTubeL + kRB24B1RFlangeL - kRB24B1RFlangeRecess; + shRB24B1BellowM->DefineSection(4, z, 0., kRB24B1ConTubeRou); + // Plie + shRB24B1BellowM->DefineSection(5, z, 0., kRB24B1BellowRo + kRB24B1ProtTubeThickness); + z += newRB24B1BellowUndL; + shRB24B1BellowM->DefineSection(6, z, 0., kRB24B1BellowRo + kRB24B1ProtTubeThickness); + shRB24B1BellowM->DefineSection(7, z, 0., kRB24B1ConTubeRou); + // Connection Tube and Flange + z = kRB24B1L - shRB24B1BellowM->GetZ(3); + shRB24B1BellowM->DefineSection(8, z, 0., kRB24B1ConTubeRou); + shRB24B1BellowM->DefineSection(9, z, 0., kRB24B1RFlangeRou); + z = kRB24B1L - shRB24B1BellowM->GetZ(1); + shRB24B1BellowM->DefineSection(10, z, 0., kRB24B1RFlangeRou); + z = kRB24B1L - shRB24B1BellowM->GetZ(0); + shRB24B1BellowM->DefineSection(11, z, 0., kRB24B1RFlangeRou); + + TGeoVolume* voRB24B1BellowM = new TGeoVolume("RB24B1BellowM", shRB24B1BellowM, kMedVacNF); + voRB24B1BellowM->SetVisibility(0); + // + // End Parts (connection tube) + TGeoVolume* voRB24B1CT = new TGeoVolume("RB24B1CT", new TGeoTube(kRB24B1ConTubeRin, kRB24B1ConTubeRou, kRB24B1ConTubeL / 2.), kMedSteelNF); + // + // Protection Tube + TGeoVolume* voRB24B1PT = new TGeoVolume( + "RB24B1PT", new TGeoTube(kRB24B1BellowRo, kRB24B1BellowRo + kRB24B1ProtTubeThickness, kRB24B1ProtTubeLength / 2.), + kMedSteelNF); + + z = kRB24B1ConTubeL / 2. + (kRB24B1RFlangeL - kRB24B1RFlangeRecess); + + voRB24B1BellowM->AddNode(voRB24B1CT, 1, new TGeoTranslation(0., 0., z)); + z += (kRB24B1ConTubeL / 2. + newRB24B1BellowUndL / 2.); + voRB24B1BellowM->AddNode(voRB24B1Bellow, 1, new TGeoTranslation(0., 0., z)); + z += (newRB24B1BellowUndL / 2. + kRB24B1ConTubeL / 2); + voRB24B1BellowM->AddNode(voRB24B1CT, 2, new TGeoTranslation(0., 0., z)); + z = kRB24B1ConTubeL + kRB24B1ProtTubeLength / 2. + 1. + kRB24B1RFlangeLO; + voRB24B1BellowM->AddNode(voRB24B1PT, 1, new TGeoTranslation(0., 0., z)); + z += kRB24B1ProtTubeLength + 0.6; + voRB24B1BellowM->AddNode(voRB24B1PT, 2, new TGeoTranslation(0., 0., z)); + + // Pos 1/2 Rotatable Flange LHCVBU__0013 + // Pos 1/3 Flange DN100/103 LHCVBU__0018 + // The two flanges can be represented by the same volume + // Outer Radius (including the outer movable ring). + // The inner ring has a diameter of 12.04 cm + + TGeoPcon* shRB24B1RFlange = new TGeoPcon(0., 360., 10); + z = 0.; + shRB24B1RFlange->DefineSection(0, z, 10.30 / 2., kRB24B1RFlangeRou); + z += 0.55; // 5.5 mm added for outer ring + z += 0.43; + shRB24B1RFlange->DefineSection(1, z, 10.30 / 2., kRB24B1RFlangeRou); + shRB24B1RFlange->DefineSection(2, z, 10.06 / 2., kRB24B1RFlangeRou); + z += 0.15; + shRB24B1RFlange->DefineSection(3, z, 10.06 / 2., kRB24B1RFlangeRou); + // In reality this part is rounded + shRB24B1RFlange->DefineSection(4, z, 10.91 / 2., kRB24B1RFlangeRou); + z += 0.15; + shRB24B1RFlange->DefineSection(5, z, 10.91 / 2., kRB24B1RFlangeRou); + shRB24B1RFlange->DefineSection(6, z, 10.06 / 2., kRB24B1RFlangeRou); + z += 0.32; + shRB24B1RFlange->DefineSection(7, z, 10.06 / 2., kRB24B1RFlangeRou); + shRB24B1RFlange->DefineSection(8, z, kRB24B1RFlangeRO, kRB24B1RFlangeRou); + z += kRB24B1RFlangeLO; + shRB24B1RFlange->DefineSection(9, z, kRB24B1RFlangeRO, kRB24B1RFlangeRou); + + TGeoVolume* voRB24B1RFlange = new TGeoVolume("RB24B1RFlange", shRB24B1RFlange, kMedSteelNF); + + z = kRB24B1L - kRB24B1RFlangeL; + voRB24B1BellowM->AddNode(voRB24B1RFlange, 1, new TGeoTranslation(0., 0., z)); + z = kRB24B1RFlangeL; + voRB24B1BellowM->AddNode(voRB24B1RFlange, 2, new TGeoCombiTrans(0., 0., z, rot180)); + // + // Pos 2 RF Contact D80 LHCVSR__0005 + // + // Pos 2.1 RF Contact Flange LHCVSR__0003 + // + TGeoPcon* shRB24B1RCTFlange = new TGeoPcon(0., 360., 6); + const float kRB24B1RCTFlangeRin = 8.06 / 2. + 0.05; // Inner radius + const float kRB24B1RCTFlangeL = 1.45; // Length + + z = 0.; + shRB24B1RCTFlange->DefineSection(0, z, kRB24B1RCTFlangeRin, 8.20 / 2.); + z += 0.15; + shRB24B1RCTFlange->DefineSection(1, z, kRB24B1RCTFlangeRin, 8.20 / 2.); + shRB24B1RCTFlange->DefineSection(2, z, kRB24B1RCTFlangeRin, 8.60 / 2.); + z += 1.05; + shRB24B1RCTFlange->DefineSection(3, z, kRB24B1RCTFlangeRin, 8.60 / 2.); + shRB24B1RCTFlange->DefineSection(4, z, kRB24B1RCTFlangeRin, 11.16 / 2.); + z += 0.25; + shRB24B1RCTFlange->DefineSection(5, z, kRB24B1RCTFlangeRin, 11.16 / 2.); + TGeoVolume* voRB24B1RCTFlange = new TGeoVolume("RB24B1RCTFlange", shRB24B1RCTFlange, kMedCuNF); + z = kRB24B1L - kRB24B1RCTFlangeL; + + voRB24B1BellowM->AddNode(voRB24B1RCTFlange, 1, new TGeoTranslation(0., 0., z)); + // + // Pos 2.2 RF-Contact LHCVSR__0004 + // + TGeoPcon* shRB24B1RCT = new TGeoPcon(0., 360., 3); + const float kRB24B1RCTRin = 8.00 / 2.; // Inner radius + const float kRB24B1RCTCRin = 8.99 / 2.; // Max. inner radius conical section + const float kRB24B1RCTL = 11.78; // Length + const float kRB24B1RCTSL = 10.48; // Length of straight section + const float kRB24B1RCTd = 0.03; // Thickness + + z = 0; + shRB24B1RCT->DefineSection(0, z, kRB24B1RCTCRin, kRB24B1RCTCRin + kRB24B1RCTd); + z = kRB24B1RCTL - kRB24B1RCTSL; + // In the (VSR0004) this section is straight in (LHCVC2U_0001) it is conical ???? + shRB24B1RCT->DefineSection(1, z, kRB24B1RCTRin + 0.35, kRB24B1RCTRin + 0.35 + kRB24B1RCTd); + z = kRB24B1RCTL - 0.03; + shRB24B1RCT->DefineSection(2, z, kRB24B1RCTRin, kRB24B1RCTRin + kRB24B1RCTd); + + TGeoVolume* voRB24B1RCT = new TGeoVolume("RB24B1RCT", shRB24B1RCT, kMedCuNF); + z = kRB24B1L - kRB24B1RCTL - 0.45; + voRB24B1BellowM->AddNode(voRB24B1RCT, 1, new TGeoTranslation(0., 0., z)); + + // + // Pos 3 Trans. Tube Flange LHCVSR__0065 + // + // Pos 3.1 Transition Tube D53 LHCVSR__0064 + // Pos 3.2 Transition Flange LHCVSR__0060 + // Pos 3.3 Transition Tube LHCVSR__0058 + TGeoPcon* shRB24B1TTF = new TGeoPcon(0., 360., 7); + // Flange + z = 0.; + shRB24B1TTF->DefineSection(0, z, 6.30 / 2., 11.16 / 2.); + z += 0.25; + shRB24B1TTF->DefineSection(1, z, 6.30 / 2., 11.16 / 2.); + shRB24B1TTF->DefineSection(2, z, 6.30 / 2., 9.3 / 2.); + z += 0.55; + shRB24B1TTF->DefineSection(3, z, 6.30 / 2., 9.3 / 2.); + // Tube + shRB24B1TTF->DefineSection(4, z, 6.30 / 2., 6.7 / 2.); + z += 5.80; + shRB24B1TTF->DefineSection(5, z, 6.30 / 2., 6.7 / 2.); + // Transition Tube + z += 3.75; + shRB24B1TTF->DefineSection(6, z, 8.05 / 2., 8.45 / 2.); + TGeoVolume* voRB24B1TTF = new TGeoVolume("RB24B1TTF", shRB24B1TTF, kMedSteelNF); + z = 0.; + voRB24B1BellowM->AddNode(voRB24B1TTF, 1, new TGeoTranslation(0., 0., z)); + + // Annular Ion Pump + // LHCVC2U_0003 + // + // Pos 1 Rotable Flange LHCVFX__0031 + // Pos 2 RF Screen Tube LHCVC2U_0005 + // Pos 3 Shell LHCVC2U_0007 + // Pos 4 Extruded Shell LHCVC2U_0006 + // Pos 5 Feedthrough Tube LHCVC2U_0004 + // Pos 6 Tubulated Flange STDVFUHV0021 + // Pos 7 Fixed Flange LHCVFX__0032 + // Pos 8 Pumping Elements + + // + // Pos 1 Rotable Flange LHCVFX__0031 + // pos 7 Fixed Flange LHCVFX__0032 + // + // Mother volume + + // + // Length 35 cm + // Flange 2 x 1.98 = 3.96 + // Tube = 32.84 + //========================== + // 36.80 + // Overlap 2 * 0.90 = 1.80 + + const float kRB24IpRFD1 = 0.68; // Length of section 1 + const float kRB24IpRFD2 = 0.30; // Length of section 2 + const float kRB24IpRFD3 = 0.10; // Length of section 3 + const float kRB24IpRFD4 = 0.35; // Length of section 4 + const float kRB24IpRFD5 = 0.55; // Length of section 5 + + const float kRB24IpRFRo = 15.20 / 2.; // Flange outer radius + const float kRB24IpRFRi1 = 6.30 / 2.; // Flange inner radius section 1 + const float kRB24IpRFRi2 = 6.00 / 2.; // Flange inner radius section 2 + const float kRB24IpRFRi3 = 5.84 / 2.; // Flange inner radius section 3 + const float kRB24IpRFRi4 = 6.00 / 2.; // Flange inner radius section 1 + const float kRB24IpRFRi5 = 10.50 / 2.; // Flange inner radius section 2 + + TGeoPcon* shRB24IpRF = new TGeoPcon(0., 360., 9); + z0 = 0.; + shRB24IpRF->DefineSection(0, z0, kRB24IpRFRi1, kRB24IpRFRo); + z0 += kRB24IpRFD1; + shRB24IpRF->DefineSection(1, z0, kRB24IpRFRi2, kRB24IpRFRo); + z0 += kRB24IpRFD2; + shRB24IpRF->DefineSection(2, z0, kRB24IpRFRi2, kRB24IpRFRo); + shRB24IpRF->DefineSection(3, z0, kRB24IpRFRi3, kRB24IpRFRo); + z0 += kRB24IpRFD3; + shRB24IpRF->DefineSection(4, z0, kRB24IpRFRi3, kRB24IpRFRo); + shRB24IpRF->DefineSection(5, z0, kRB24IpRFRi4, kRB24IpRFRo); + z0 += kRB24IpRFD4; + shRB24IpRF->DefineSection(6, z0, kRB24IpRFRi4, kRB24IpRFRo); + shRB24IpRF->DefineSection(7, z0, kRB24IpRFRi5, kRB24IpRFRo); + z0 += kRB24IpRFD5; + shRB24IpRF->DefineSection(8, z0, kRB24IpRFRi5, kRB24IpRFRo); + + TGeoVolume* voRB24IpRF = new TGeoVolume("RB24IpRF", shRB24IpRF, kMedSteel); + + // + // Pos 2 RF Screen Tube LHCVC2U_0005 + // + + // + // Tube + float kRB24IpSTTL = 32.84; // Total length of the tube + float kRB24IpSTTRi = 5.80 / 2.; // Inner Radius + float kRB24IpSTTRo = 6.00 / 2.; // Outer Radius + TGeoVolume* voRB24IpSTT = new TGeoVolume("RB24IpSTT", new TGeoTube(kRB24IpSTTRi, kRB24IpSTTRo, kRB24IpSTTL / 2.), kMedSteelNF); + // Screen + float kRB24IpSTCL = 0.4; // Lenth of the crochet detail + // Length of the screen + float kRB24IpSTSL = 9.00 - 2. * kRB24IpSTCL; + // Rel. position of the screen + float kRB24IpSTSZ = 7.00 + kRB24IpSTCL; + TGeoVolume* voRB24IpSTS = new TGeoVolume("RB24IpSTS", new TGeoTube(kRB24IpSTTRi, kRB24IpSTTRo, kRB24IpSTSL / 2.), kMedSteelNF); + // + voRB24IpSTT->AddNode(voRB24IpSTS, 1, new TGeoTranslation(0., 0., kRB24IpSTSZ - kRB24IpSTTL / 2. + kRB24IpSTSL / 2.)); + + // Crochets + // Inner radius + float kRB24IpSTCRi = kRB24IpSTTRo + 0.25; + // Outer radius + float kRB24IpSTCRo = kRB24IpSTTRo + 0.35; + // Length of 1stsection + float kRB24IpSTCL1 = 0.15; + // Length of 2nd section + float kRB24IpSTCL2 = 0.15; + // Length of 3rd section + float kRB24IpSTCL3 = 0.10; + // Rel. position of 1st Crochet + + TGeoPcon* shRB24IpSTC = new TGeoPcon(0., 360., 5); + z0 = 0; + shRB24IpSTC->DefineSection(0, z0, kRB24IpSTCRi, kRB24IpSTCRo); + z0 += kRB24IpSTCL1; + shRB24IpSTC->DefineSection(1, z0, kRB24IpSTCRi, kRB24IpSTCRo); + shRB24IpSTC->DefineSection(2, z0, kRB24IpSTTRo, kRB24IpSTCRo); + z0 += kRB24IpSTCL2; + shRB24IpSTC->DefineSection(3, z0, kRB24IpSTTRo, kRB24IpSTCRo); + z0 += kRB24IpSTCL3; + shRB24IpSTC->DefineSection(4, z0, kRB24IpSTTRo, kRB24IpSTTRo + 0.001); + TGeoVolume* voRB24IpSTC = new TGeoVolume("RB24IpSTC", shRB24IpSTC, kMedSteel); + + // Pos 3 Shell LHCVC2U_0007 + // Pos 4 Extruded Shell LHCVC2U_0006 + float kRB24IpShellL = 4.45; // Length of the Shell + float kRB24IpShellD = 0.10; // Wall thickness of the shell + float kRB24IpShellCTRi = 6.70 / 2.; // Inner radius of the connection tube + float kRB24IpShellCTL = 1.56; // Length of the connection tube + float kRB24IpShellCARi = 17.80 / 2.; // Inner radius of the cavity + float kRB24IpShellCCRo = 18.20 / 2.; // Inner radius at the centre + + TGeoPcon* shRB24IpShell = new TGeoPcon(0., 360., 7); + z0 = 0; + shRB24IpShell->DefineSection(0, z0, kRB24IpShellCTRi, kRB24IpShellCTRi + kRB24IpShellD); + z0 += kRB24IpShellCTL; + shRB24IpShell->DefineSection(1, z0, kRB24IpShellCTRi, kRB24IpShellCTRi + kRB24IpShellD); + shRB24IpShell->DefineSection(2, z0, kRB24IpShellCTRi, kRB24IpShellCARi + kRB24IpShellD); + z0 += kRB24IpShellD; + shRB24IpShell->DefineSection(3, z0, kRB24IpShellCARi, kRB24IpShellCARi + kRB24IpShellD); + z0 = kRB24IpShellL - kRB24IpShellD; + shRB24IpShell->DefineSection(4, z0, kRB24IpShellCARi, kRB24IpShellCARi + kRB24IpShellD); + shRB24IpShell->DefineSection(5, z0, kRB24IpShellCARi, kRB24IpShellCCRo); + z0 = kRB24IpShellL; + shRB24IpShell->DefineSection(6, z0, kRB24IpShellCARi, kRB24IpShellCCRo); + TGeoVolume* voRB24IpShell = new TGeoVolume("RB24IpShell", shRB24IpShell, kMedSteel); + + TGeoPcon* shRB24IpShellM = makeMotherFromTemplate(shRB24IpShell, 0, 6, kRB24IpShellCTRi, 13); + + for (int i = 0; i < 6; i++) { + z = 2. * kRB24IpShellL - shRB24IpShellM->GetZ(5 - i); + float rmin = shRB24IpShellM->GetRmin(5 - i); + float rmax = shRB24IpShellM->GetRmax(5 - i); + shRB24IpShellM->DefineSection(7 + i, z, rmin, rmax); + } + + TGeoVolume* voRB24IpShellM = new TGeoVolume("RB24IpShellM", shRB24IpShellM, kMedVac); + voRB24IpShellM->SetVisibility(0); + voRB24IpShellM->AddNode(voRB24IpShell, 1, gGeoIdentity); + voRB24IpShellM->AddNode(voRB24IpShell, 2, new TGeoCombiTrans(0., 0., 2. * kRB24IpShellL, rot180)); + // + // Pos 8 Pumping Elements + // + // Anode array + TGeoVolume* voRB24IpPE = new TGeoVolume("voRB24IpPE", new TGeoTube(0.9, 1., 2.54 / 2.), kMedSteel); + float kRB24IpPEAR = 5.5; + + for (int i = 0; i < 15; i++) { + float phi = float(i) * 24.; + float x = kRB24IpPEAR * TMath::Cos(kDegRad * phi); + float y = kRB24IpPEAR * TMath::Sin(kDegRad * phi); + voRB24IpShellM->AddNode(voRB24IpPE, i + 1, new TGeoTranslation(x, y, kRB24IpShellL)); + } + + // + // Warm Module Type VMABC + // LHCVMABC_0002 + // + // + // + // Flange 1.00 + // Central Piece 11.50 + // Bellow 14.50 + // End Flange 1.00 + //=================================== + // Total 28.00 + // + // Pos 1 Warm Bellows DN100 LHCVBU__0016 + // Pos 2 Trans. Tube Flange LHCVSR__0062 + // Pos 3 RF Contact D63 LHCVSR__0057 + // [Pos 4 Hex. Countersunk Screw Bossard BN4719] + // [Pos 5 Tension spring LHCVSR__00239] + // + + // Pos 1 Warm Bellows DN100 LHCVBU__0016 + // Pos 1.1 Right Body 2 Ports with Support LHCVBU__0014 + // + // Tube 1 + const float kRB24VMABCRBT1Ri = 10.0 / 2.; + const float kRB24VMABCRBT1Ro = 10.3 / 2.; + const float kRB24VMABCRBT1L = 11.5; + const float kRB24VMABCRBT1L2 = 8.; + const float kRB24VMABCL = 28.375; + + TGeoTube* shRB24VMABCRBT1 = new TGeoTube(kRB24VMABCRBT1Ri, kRB24VMABCRBT1Ro, kRB24VMABCRBT1L / 2.); + shRB24VMABCRBT1->SetName("RB24VMABCRBT1"); + TGeoTube* shRB24VMABCRBT1o = new TGeoTube(0., kRB24VMABCRBT1Ro, kRB24VMABCRBT1L / 2.); + shRB24VMABCRBT1o->SetName("RB24VMABCRBT1o"); + TGeoTube* shRB24VMABCRBT1o2 = new TGeoTube(0., kRB24VMABCRBT1Ro + 0.3, kRB24VMABCRBT1L / 2.); + shRB24VMABCRBT1o2->SetName("RB24VMABCRBT1o2"); + // Lower inforcement + TGeoVolume* voRB24VMABCRBT12 = new TGeoVolume( + "RB24VMABCRBT12", new TGeoTubeSeg(kRB24VMABCRBT1Ro, kRB24VMABCRBT1Ro + 0.3, kRB24VMABCRBT1L2 / 2., 220., 320.), + kMedSteelNF); + // + // Tube 2 + const float kRB24VMABCRBT2Ri = 6.0 / 2.; + const float kRB24VMABCRBT2Ro = 6.3 / 2.; + const float kRB24VMABCRBF2Ro = 11.4 / 2.; + const float kRB24VMABCRBT2L = 5.95 + 2.; // 2. cm added for welding + const float kRB24VMABCRBF2L = 1.75; + TGeoTube* shRB24VMABCRBT2 = new TGeoTube(kRB24VMABCRBT2Ri, kRB24VMABCRBT2Ro, kRB24VMABCRBT2L / 2.); + shRB24VMABCRBT2->SetName("RB24VMABCRBT2"); + TGeoTube* shRB24VMABCRBT2i = new TGeoTube(0., kRB24VMABCRBT2Ri, kRB24VMABCRBT2L / 2. + 2.); + shRB24VMABCRBT2i->SetName("RB24VMABCRBT2i"); + TGeoCombiTrans* tRBT2 = new TGeoCombiTrans(-11.5 + kRB24VMABCRBT2L / 2., 0., 7.2 - kRB24VMABCRBT1L / 2., rotxz); + tRBT2->SetName("tRBT2"); + tRBT2->RegisterYourself(); + TGeoCompositeShape* shRB24VMABCRBT2c = new TGeoCompositeShape("shRB24VMABCRBT2c", "RB24VMABCRBT2:tRBT2-RB24VMABCRBT1o"); + TGeoVolume* voRB24VMABCRBT2 = new TGeoVolume("shRB24VMABCRBT2", shRB24VMABCRBT2c, kMedSteelNF); + // Flange + // Pos 1.4 Flange DN63 LHCVBU__0008 + TGeoVolume* voRB24VMABCRBF2 = + new TGeoVolume("RB24VMABCRBF2", new TGeoTube(kRB24VMABCRBT2Ro, kRB24VMABCRBF2Ro, kRB24VMABCRBF2L / 2.), kMedSteelNF); + // DN63 Blank Flange (my best guess) + TGeoVolume* voRB24VMABCRBF2B = new TGeoVolume("RB24VMABCRBF2B", new TGeoTube(0., kRB24VMABCRBF2Ro, kRB24VMABCRBF2L / 2.), kMedSteelNF); + // + // Tube 3 + const float kRB24VMABCRBT3Ri = 3.5 / 2.; + const float kRB24VMABCRBT3Ro = 3.8 / 2.; + const float kRB24VMABCRBF3Ro = 7.0 / 2.; + const float kRB24VMABCRBT3L = 4.95 + 2.; // 2. cm added for welding + const float kRB24VMABCRBF3L = 1.27; + TGeoTube* shRB24VMABCRBT3 = new TGeoTube(kRB24VMABCRBT3Ri, kRB24VMABCRBT3Ro, kRB24VMABCRBT3L / 2); + shRB24VMABCRBT3->SetName("RB24VMABCRBT3"); + TGeoTube* shRB24VMABCRBT3i = new TGeoTube(0., kRB24VMABCRBT3Ri, kRB24VMABCRBT3L / 2. + 2.); + shRB24VMABCRBT3i->SetName("RB24VMABCRBT3i"); + TGeoCombiTrans* tRBT3 = new TGeoCombiTrans(0., 10.5 - kRB24VMABCRBT3L / 2., 7.2 - kRB24VMABCRBT1L / 2., rotyz); + tRBT3->SetName("tRBT3"); + tRBT3->RegisterYourself(); + TGeoCompositeShape* shRB24VMABCRBT3c = + new TGeoCompositeShape("shRB24VMABCRBT3c", "RB24VMABCRBT3:tRBT3-RB24VMABCRBT1o"); + TGeoVolume* voRB24VMABCRBT3 = new TGeoVolume("shRB24VMABCRBT3", shRB24VMABCRBT3c, kMedSteel); + // Flange + // Pos 1.4 Flange DN35 LHCVBU__0007 + TGeoVolume* voRB24VMABCRBF3 = new TGeoVolume("RB24VMABCRBF3", new TGeoTube(kRB24VMABCRBT3Ro, kRB24VMABCRBF3Ro, kRB24VMABCRBF3L / 2.), kMedSteelNF); + // + // Tube 4 + const float kRB24VMABCRBT4Ri = 6.0 / 2.; + const float kRB24VMABCRBT4Ro = 6.4 / 2.; + const float kRB24VMABCRBT4L = 6.6; + TGeoTube* shRB24VMABCRBT4 = new TGeoTube(kRB24VMABCRBT4Ri, kRB24VMABCRBT4Ro, kRB24VMABCRBT4L / 2.); + shRB24VMABCRBT4->SetName("RB24VMABCRBT4"); + TGeoCombiTrans* tRBT4 = new TGeoCombiTrans(0., -11. + kRB24VMABCRBT4L / 2., 7.2 - kRB24VMABCRBT1L / 2., rotyz); + tRBT4->SetName("tRBT4"); + tRBT4->RegisterYourself(); + TGeoCompositeShape* shRB24VMABCRBT4c = + new TGeoCompositeShape("shRB24VMABCRBT4c", "RB24VMABCRBT4:tRBT4-RB24VMABCRBT1o2"); + TGeoVolume* voRB24VMABCRBT4 = new TGeoVolume("shRB24VMABCRBT4", shRB24VMABCRBT4c, kMedSteelNF); + TGeoCompositeShape* shRB24VMABCRB = + new TGeoCompositeShape("shRB24VMABCRB", "RB24VMABCRBT1-(RB24VMABCRBT2i:tRBT2+RB24VMABCRBT3i:tRBT3)"); + TGeoVolume* voRB24VMABCRBI = new TGeoVolume("RB24VMABCRBI", shRB24VMABCRB, kMedSteelNF); + // + // Plate + const float kRB24VMABCRBBx = 16.0; + const float kRB24VMABCRBBy = 1.5; + const float kRB24VMABCRBBz = 15.0; + + // Relative position of tubes + const float kRB24VMABCTz = 7.2; + // Relative position of plate + const float kRB24VMABCPz = 3.6; + const float kRB24VMABCPy = -12.5; + + TGeoVolume* voRB24VMABCRBP = new TGeoVolume( + "RB24VMABCRBP", new TGeoBBox(kRB24VMABCRBBx / 2., kRB24VMABCRBBy / 2., kRB24VMABCRBBz / 2.), kMedSteelNF); + // + // Pirani Gauge (my best guess) + // + TGeoPcon* shRB24VMABCPirani = new TGeoPcon(0., 360., 15); + // DN35/16 Coupling + z = 0; + shRB24VMABCPirani->DefineSection(0, z, 0.8, kRB24VMABCRBF3Ro); + z += kRB24VMABCRBF3L; // 1.3 + shRB24VMABCPirani->DefineSection(1, z, 0.8, kRB24VMABCRBF3Ro); + shRB24VMABCPirani->DefineSection(2, z, 0.8, 1.0); + // Pipe + z += 2.8; + shRB24VMABCPirani->DefineSection(3, z, 0.8, 1.0); + // Flange + shRB24VMABCPirani->DefineSection(4, z, 0.8, 1.75); + z += 1.6; + shRB24VMABCPirani->DefineSection(5, z, 0.8, 1.75); + shRB24VMABCPirani->DefineSection(6, z, 0.8, 1.0); + z += 5.2; + shRB24VMABCPirani->DefineSection(7, z, 0.8, 1.0); + shRB24VMABCPirani->DefineSection(8, z, 0.8, 2.5); + z += 2.0; + shRB24VMABCPirani->DefineSection(9, z, 0.80, 2.50); + shRB24VMABCPirani->DefineSection(10, z, 1.55, 1.75); + z += 5.7; + shRB24VMABCPirani->DefineSection(11, z, 1.55, 1.75); + shRB24VMABCPirani->DefineSection(11, z, 0.00, 1.75); + z += 0.2; + shRB24VMABCPirani->DefineSection(12, z, 0.00, 1.75); + shRB24VMABCPirani->DefineSection(13, z, 0.00, 0.75); + z += 0.5; + shRB24VMABCPirani->DefineSection(14, z, 0.00, 0.75); + TGeoVolume* voRB24VMABCPirani = new TGeoVolume("RB24VMABCPirani", shRB24VMABCPirani, kMedSteelNF); + // + // + // + + // + // Positioning of elements + TGeoVolumeAssembly* voRB24VMABCRB = new TGeoVolumeAssembly("RB24VMABCRB"); + // + voRB24VMABCRB->AddNode(voRB24VMABCRBI, 1, gGeoIdentity); + // Plate + voRB24VMABCRB->AddNode(voRB24VMABCRBP, 1, + new TGeoTranslation(0., kRB24VMABCPy + kRB24VMABCRBBy / 2., + kRB24VMABCRBBz / 2. - kRB24VMABCRBT1L / 2. + kRB24VMABCPz)); + // Tube 2 + voRB24VMABCRB->AddNode(voRB24VMABCRBT2, 1, gGeoIdentity); + // Flange Tube 2 + voRB24VMABCRB->AddNode(voRB24VMABCRBF2, 1, new TGeoCombiTrans(kRB24VMABCPy + kRB24VMABCRBF2L / 2., 0., kRB24VMABCTz - kRB24VMABCRBT1L / 2., rotxz)); + // Blank Flange Tube 2 + voRB24VMABCRB->AddNode(voRB24VMABCRBF2B, 1, new TGeoCombiTrans(kRB24VMABCPy - kRB24VMABCRBF2L / 2., 0., kRB24VMABCTz - kRB24VMABCRBT1L / 2., rotxz)); + // Tube 3 + voRB24VMABCRB->AddNode(voRB24VMABCRBT3, 1, gGeoIdentity); + // Flange Tube 3 + voRB24VMABCRB->AddNode(voRB24VMABCRBF3, 1, new TGeoCombiTrans(0., 11.2 - kRB24VMABCRBF3L / 2., kRB24VMABCTz - kRB24VMABCRBT1L / 2., rotyz)); + // Pirani Gauge + voRB24VMABCRB->AddNode(voRB24VMABCPirani, 1, new TGeoCombiTrans(0., 11.2, kRB24VMABCTz - kRB24VMABCRBT1L / 2., rotyz)); + // Tube 4 + voRB24VMABCRB->AddNode(voRB24VMABCRBT4, 1, gGeoIdentity); + // Inforcement + voRB24VMABCRB->AddNode(voRB24VMABCRBT12, 1, new TGeoTranslation(0., 0., kRB24VMABCRBT1L2 / 2. - kRB24VMABCRBT1L / 2. + 2.8)); + + // Pos 1.3 Bellows with end part LHCVBU__0002 + // + // Connection Tube + // Connection tube inner r + const float kRB24VMABBEConTubeRin = 10.0 / 2.; + // Connection tube outer r + const float kRB24VMABBEConTubeRou = 10.3 / 2.; + // Connection tube length + const float kRB24VMABBEConTubeL1 = 0.9; + const float kRB24VMABBEConTubeL2 = 2.6; + + // Mother volume + TGeoPcon* shRB24VMABBEBellowM = new TGeoPcon(0., 360., 6); + // Connection Tube and Flange + z = 0.; + shRB24VMABBEBellowM->DefineSection(0, z, kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou); + z += kRB24VMABBEConTubeL1; + shRB24VMABBEBellowM->DefineSection(1, z, kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou); + shRB24VMABBEBellowM->DefineSection(2, z, kRB24B1BellowRi, kRB24B1BellowRo + kRB24B1ProtTubeThickness); + z += newRB24B1BellowUndL; + shRB24VMABBEBellowM->DefineSection(3, z, kRB24B1BellowRi, kRB24B1BellowRo + kRB24B1ProtTubeThickness); + shRB24VMABBEBellowM->DefineSection(4, z, kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou); + z += kRB24VMABBEConTubeL2; + shRB24VMABBEBellowM->DefineSection(5, z, kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou); + TGeoVolume* voRB24VMABBEBellowM = new TGeoVolume("RB24VMABBEBellowM", shRB24VMABBEBellowM, kMedVacNF); + voRB24VMABBEBellowM->SetVisibility(0); + + // Connection tube left + TGeoVolume* voRB24VMABBECT1 = new TGeoVolume( + "RB24VMABBECT1", new TGeoTube(kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou, kRB24VMABBEConTubeL1 / 2.), kMedSteelNF); + // Connection tube right + TGeoVolume* voRB24VMABBECT2 = new TGeoVolume( + "RB24VMABBECT2", new TGeoTube(kRB24VMABBEConTubeRin, kRB24VMABBEConTubeRou, kRB24VMABBEConTubeL2 / 2.), kMedSteelNF); + z = kRB24VMABBEConTubeL1 / 2.; + voRB24VMABBEBellowM->AddNode(voRB24VMABBECT1, 1, new TGeoTranslation(0., 0., z)); + z += kRB24VMABBEConTubeL1 / 2.; + z += newRB24B1BellowUndL / 2.; + voRB24VMABBEBellowM->AddNode(voRB24B1Bellow, 2, new TGeoTranslation(0., 0., z)); + z += newRB24B1BellowUndL / 2.; + z += kRB24VMABBEConTubeL2 / 2.; + voRB24VMABBEBellowM->AddNode(voRB24VMABBECT2, 1, new TGeoTranslation(0., 0., z)); + z += kRB24VMABBEConTubeL2 / 2.; + + voRB24VMABCRB->AddNode(voRB24VMABBEBellowM, 1, new TGeoTranslation(0., 0., kRB24VMABCRBT1L / 2.)); + + // Pos 1.2 Rotable flange LHCVBU__0013[*] + // Front + voRB24VMABCRB->AddNode(voRB24B1RFlange, 3, new TGeoCombiTrans(0., 0., -kRB24VMABCRBT1L / 2. + 0.86, rot180)); + // End + z = kRB24VMABCRBT1L / 2. + newRB24B1BellowUndL + kRB24VMABBEConTubeL1 + kRB24VMABBEConTubeL2; + voRB24VMABCRB->AddNode(voRB24B1RFlange, 4, new TGeoTranslation(0., 0., z - 0.86)); + + // Pos 2 Trans. Tube Flange LHCVSR__0062 + // Pos 2.1 Transition Tube LHCVSR__0063 + // Pos 2.2 Transition Flange LHCVSR__0060 + // + // Transition Tube with Flange + TGeoPcon* shRB24VMABCTT = new TGeoPcon(0., 360., 7); + z = 0.; + shRB24VMABCTT->DefineSection(0, z, 6.3 / 2., 11.16 / 2.); + z += 0.25; + shRB24VMABCTT->DefineSection(1, z, 6.3 / 2., 11.16 / 2.); + shRB24VMABCTT->DefineSection(2, z, 6.3 / 2., 9.30 / 2.); + z += 0.25; + shRB24VMABCTT->DefineSection(3, z, 6.3 / 2., 9.30 / 2.); + shRB24VMABCTT->DefineSection(4, z, 6.3 / 2., 6.70 / 2.); + z += (20.35 - 0.63); + shRB24VMABCTT->DefineSection(5, z, 6.3 / 2., 6.7 / 2.); + z += 0.63; + shRB24VMABCTT->DefineSection(6, z, 6.3 / 2., 6.7 / 2.); + TGeoVolume* voRB24VMABCTT = new TGeoVolume("RB24VMABCTT", shRB24VMABCTT, kMedSteelNF); + voRB24VMABCRB->AddNode(voRB24VMABCTT, 1, new TGeoTranslation(0., 0., -kRB24VMABCRBT1L / 2. - 1.)); + + // Pos 3 RF Contact D63 LHCVSR__0057 + // Pos 3.1 RF Contact Flange LHCVSR__0017 + // + TGeoPcon* shRB24VMABCCTFlange = new TGeoPcon(0., 360., 6); + const float kRB24VMABCCTFlangeRin = 6.36 / 2.; // Inner radius + const float kRB24VMABCCTFlangeL = 1.30; // Length + + z = 0.; + shRB24VMABCCTFlange->DefineSection(0, z, kRB24VMABCCTFlangeRin, 6.5 / 2.); + z += 0.15; + shRB24VMABCCTFlange->DefineSection(1, z, kRB24VMABCCTFlangeRin, 6.5 / 2.); + shRB24VMABCCTFlange->DefineSection(2, z, kRB24VMABCCTFlangeRin, 6.9 / 2.); + z += 0.9; + shRB24VMABCCTFlange->DefineSection(3, z, kRB24VMABCCTFlangeRin, 6.9 / 2.); + shRB24VMABCCTFlange->DefineSection(4, z, kRB24VMABCCTFlangeRin, 11.16 / 2.); + z += 0.25; + shRB24VMABCCTFlange->DefineSection(5, z, kRB24VMABCCTFlangeRin, 11.16 / 2.); + TGeoVolume* voRB24VMABCCTFlange = new TGeoVolume("RB24VMABCCTFlange", shRB24VMABCCTFlange, kMedCuNF); + // + // Pos 3.2 RF-Contact LHCVSR__0056 + // + TGeoPcon* shRB24VMABCCT = new TGeoPcon(0., 360., 4); + const float kRB24VMABCCTRin = 6.30 / 2.; // Inner radius + const float kRB24VMABCCTCRin = 7.29 / 2.; // Max. inner radius conical section + const float kRB24VMABCCTL = 11.88; // Length + const float kRB24VMABCCTSL = 10.48; // Length of straight section + const float kRB24VMABCCTd = 0.03; // Thickness + z = 0; + shRB24VMABCCT->DefineSection(0, z, kRB24VMABCCTCRin, kRB24VMABCCTCRin + kRB24VMABCCTd); + z = kRB24VMABCCTL - kRB24VMABCCTSL; + shRB24VMABCCT->DefineSection(1, z, kRB24VMABCCTRin + 0.35, kRB24VMABCCTRin + 0.35 + kRB24VMABCCTd); + z = kRB24VMABCCTL - kRB24VMABCCTFlangeL; + shRB24VMABCCT->DefineSection(2, z, kRB24VMABCCTRin, kRB24VMABCCTRin + kRB24VMABCCTd); + z = kRB24VMABCCTL; + shRB24VMABCCT->DefineSection(3, z, kRB24VMABCCTRin, kRB24VMABCCTRin + kRB24VMABCCTd); + + TGeoVolume* voRB24VMABCCT = new TGeoVolume("RB24VMABCCT", shRB24VMABCCT, kMedCuNF); + + TGeoVolumeAssembly* voRB24VMABRFCT = new TGeoVolumeAssembly("RB24VMABRFCT"); + voRB24VMABRFCT->AddNode(voRB24VMABCCT, 1, gGeoIdentity); + voRB24VMABRFCT->AddNode(voRB24VMABCCTFlange, 1, new TGeoTranslation(0., 0., kRB24VMABCCTL - kRB24VMABCCTFlangeL)); + + z = kRB24VMABCRBT1L / 2. + newRB24B1BellowUndL + kRB24VMABBEConTubeL1 + kRB24VMABBEConTubeL2 - kRB24VMABCCTL + 1.; + voRB24VMABCRB->AddNode(voRB24VMABRFCT, 1, new TGeoTranslation(0., 0., z)); + + // + // Assembling RB24/1 + // + + // part which is placed in the cave + // -> + TGeoVolumeAssembly* voRB24C = new TGeoVolumeAssembly("RB24C"); + voRB24C->AddNode(voRB24cCuTubeM, 1, gGeoIdentity); + z = -kRB24cCuTubeL / 2 + kRB24CuTubeFL / 2.; + voRB24C->AddNode(voRB24CuTubeF, 1, new TGeoTranslation(0., 0., z)); + // VMABC close to compensator magnet + // z = -kRB24cCuTubeL / 2. - (kRB24VMABCL - kRB24VMABCRBT1L / 2) + 1.; + // voRB24C->AddNode(voRB24VMABCRB, 2, new TGeoTranslation(0., 0., z)); + z = -kRB24cCuTubeL / 2. - kRB24B1L; + voRB24C->AddNode(voRB24B1BellowM, 2, new TGeoTranslation(0., 0., z)); + + // <- + + // + // RB24/2 + // + // Copper Tube RB24/2 + // mainly inside the compensator magnet + const float kRB242CuTubeL = 350.0; + // 20 cm straight - 20 cm transition to final oval - 270 oval - 20 cm transition to final oval - 20 cm straight + // + // mother volume for transition region + TGeoVolume* voRB242CuOvTransMo = new TGeoVolume("voRB24CuOvTransMo", new TGeoTube(0., 4.75, 10.), kMedAir); + const int nTrans = 10; + TGeoVolume* voRB242CuOvTransV[nTrans]; + TGeoVolume* voRB242CuOvTransI[nTrans]; + float dovX = 4.; + float dovY = 4.; + float dovZ = -9.0; + for (int i = 0; i < nTrans; i++) { + dovX -= 0.0625; + dovY += 0.075; + char vname[20]; + snprintf(vname, 20, "voRB242CuOvTransV%d", i); + voRB242CuOvTransV[i] = new TGeoVolume(vname, new TGeoEltu(dovX, dovY, 1.0), kMedCuHC); + snprintf(vname, 20, "voRB242CuOvTransI%d", i); + voRB242CuOvTransI[i] = new TGeoVolume(vname, new TGeoEltu(dovX - 0.2, dovY - 0.2, 1.0), kMedVacHC); + voRB242CuOvTransV[i]->AddNode(voRB242CuOvTransI[i], 1, gGeoIdentity); + voRB242CuOvTransMo->AddNode(voRB242CuOvTransV[i], 1, new TGeoTranslation(0., 0., dovZ)); + dovZ += 2.; + } + // + TGeoVolume* voRB242CuTubeM = new TGeoVolume("voRB242CuTubeM", new TGeoTube(0., kRB24CuTubeRo, 10.), kMedVacHC); + TGeoVolume* voRB242CuTube = new TGeoVolume("voRB242CuTube", new TGeoTube(kRB24CuTubeRi, kRB24CuTubeRo, 10.), kMedCuHC); + voRB242CuTubeM->AddNode(voRB242CuTube, 1, gGeoIdentity); + TGeoVolume* voRB242CuOvalM = new TGeoVolume("voRB242CuOvalM", new TGeoEltu(3.375, 4.75, 135.), kMedCuHC); + TGeoVolume* voRB242CuOval = new TGeoVolume("voRB242CuOval", new TGeoEltu(3.175, 4.55, 135.), kMedVacHC); + voRB242CuOvalM->AddNode(voRB242CuOval, 1, gGeoIdentity); + // + TGeoVolumeAssembly* voRB242 = new TGeoVolumeAssembly("RB242"); + voRB242->AddNode(voRB242CuOvalM, 1, gGeoIdentity); + z = -kRB242CuTubeL / 2 + kRB24CuTubeFL / 2.; + voRB242->AddNode(voRB24CuTubeF, 3, new TGeoTranslation(0., 0., z)); + z = +kRB242CuTubeL / 2 - kRB24CuTubeFL / 2.; + voRB242->AddNode(voRB24CuTubeF, 4, new TGeoTranslation(0., 0., z)); + z = 135. + 10.; + voRB242->AddNode(voRB242CuOvTransMo, 1, new TGeoCombiTrans(0., 0., z, rot180)); + z = -135. - 10.; + voRB242->AddNode(voRB242CuOvTransMo, 2, new TGeoTranslation(0., 0., z)); + z = -135. - 30.; + voRB242->AddNode(voRB242CuTubeM, 1, new TGeoTranslation(0., 0., z)); + z = 135. + 30.; + voRB242->AddNode(voRB242CuTubeM, 2, new TGeoTranslation(0., 0., z)); + z = -kRB24cCuTubeL / 2 - kRB24B1L - kRB242CuTubeL / 2.; + voRB24C->AddNode(voRB242, 1, new TGeoTranslation(0., 0., z)); + // + // RB24/3 + // + // Copper Tube RB24/3 + // the lenth of the tube is 296.85 on the drawing but this is inconsistent with the total length tube + bellow + const float kRB243CuTubeL = 297.85 - (kRB24VMABCL - kRB24B1L); + + TGeoVolume* voRB243CuTubeM = new TGeoVolume("voRB243CuTubeM", new TGeoTube(0., kRB24CuTubeRo, (kRB243CuTubeL) / 2.), kMedVacNF); + TGeoVolume* voRB243CuTube = new TGeoVolume("voRB243CuTube", new TGeoTube(kRB24CuTubeRi, kRB24CuTubeRo, (kRB243CuTubeL) / 2.), kMedCuNF); + voRB243CuTubeM->AddNode(voRB243CuTube, 1, gGeoIdentity); + + TGeoVolumeAssembly* voRB243 = new TGeoVolumeAssembly("RB243"); + TGeoVolumeAssembly* voRB243A = new TGeoVolumeAssembly("RB243A"); + + voRB243A->AddNode(voRB243CuTube, 1, gGeoIdentity); + z = -kRB243CuTubeL / 2 + kRB24CuTubeFL / 2.; + voRB243A->AddNode(voRB24CuTubeF, 5, new TGeoTranslation(0., 0., z)); + z = +kRB243CuTubeL / 2 - kRB24CuTubeFL / 2.; + voRB243A->AddNode(voRB24CuTubeF, 6, new TGeoTranslation(0., 0., z)); + + z = +kRB243CuTubeL / 2 + (kRB24VMABCRBT1L / 2) + 1; + voRB243A->AddNode(voRB24VMABCRB, 2, new TGeoTranslation(0., 0., z)); + + z = -kRB243CuTubeL / 2. - kRB24VMABCL; + voRB243->AddNode(voRB243A, 1, new TGeoTranslation(0., 0., z)); + z = -(1.5 * kRB243CuTubeL + 2. * kRB24VMABCL); + voRB243->AddNode(voRB243A, 2, new TGeoTranslation(0., 0., z)); + + z = -2. * (kRB243CuTubeL + kRB24VMABCL) - (kRB24VMABCL - kRB24VMABCRBT1L / 2) + 1.; + voRB243->AddNode(voRB24VMABCRB, 3, new TGeoTranslation(0., 0., z)); + + z = -kRB24cCuTubeL / 2 - kRB24B1L - kRB242CuTubeL; + voRB24C->AddNode(voRB243, 1, new TGeoTranslation(0., 0., z)); + + // + // + caveRB24->AddNode(voRB24C, 1, new TGeoCombiTrans(0., 0., -kRB24CL / 2 + kRB24cCuTubeL / 2, rot180)); + + // + //////////////////////////////////////////////////////////////////////////////// + // // + // The Absorber Vacuum system // + // // + //////////////////////////////////////////////////////////////////////////////// + // + // Rotable Flange starts at: 82.00 cm from IP + // Length of rotable flange section: 10.68 cm + // Weld 0.08 cm + // Length of straight section 207.21 cm + // ======================================================================= + // 299.97 cm [0.03 cm missing ?] + // Length of opening cone 252.09 cm + // Weld 0.15 cm + // Length of compensator 30.54 cm + // Weld 0.15 cm + // Length of fixed flange 2.13 - 0.97 1.16 cm + // ======================================================================= + // 584.06 cm [584.80 installed] [0.74 cm missing] + // RB26/3 + // Length of split flange 2.13 - 1.2 0.93 cm + // Weld 0.15 cm + // Length of fixed point section 16.07 cm + // Weld 0.15 cm + // Length of opening cone 629.20 cm + // Weld 0.30 cm + // Kength of the compensator 41.70 cm + // Weld 0.30 cm + // Length of fixed flange 2.99 - 1.72 1.27 cm + // ================================================= + // Length of RB26/3 690.07 cm [689.20 installed] [0.87 cm too much] + // + // RB26/4-5 + // Length of split flange 2.13 - 1.2 0.93 cm + // Weld 0.15 cm + // Length of fixed point section 16.07 cm + // Weld 0.15 cm + // Length of opening cone 629.20 cm + // Weld 0.30 cm + // Length of closing cone + // Weld + // Lenth of straight section + // Kength of the compensator 41.70 cm + // Weld 0.30 cm + // Length of fixed flange 2.99 - 1.72 1.27 cm + // ================================================= + // Length of RB26/3 690.07 cm [689.20 installed] [0.87 cm too much] + + /////////////////////////////////////////// + // // + // RB26/1-2 // + // Drawing LHCV2a_0050 [as installed] // + // Drawing LHCV2a_0008 // + // Drawing LHCV2a_0001 // + /////////////////////////////////////////// + // Pos1 Vacuum Tubes LHCVC2A__0010 + // Pos2 Compensator LHCVC2A__0064 + // Pos3 Rotable Flange LHCVFX___0016 + // Pos4 Fixed Flange LHCVFX___0006 + // Pos5 Bellow Tooling LHCVFX___0003 + // + // + // + /////////////////////////////////// + // RB26/1-2 Vacuum Tubes // + // Drawing LHCVC2a_0010 // + /////////////////////////////////// + const float kRB26s12TubeL0 = 459.45; // 0.15 cm added for welding + const float kRB26s12TubeL2 = 47.21; // part of this tube outside barrel region + const float kRB26s12TubeL = kRB26s12TubeL0 - kRB26s12TubeL2; // 392.115 + // + // 184.905 + // 0.877 + // Add 1 cm on outer diameter for insulation + // + // + // the section which is placed into the central barrel (ending at z = -505) + TGeoPcon* shRB26s12Tube = new TGeoPcon(0., 360., 4); + // Section 1: straight section + shRB26s12Tube->DefineSection(0, 0.00, 5.84 / 2., 6.00 / 2.); + shRB26s12Tube->DefineSection(1, 207.21, 5.84 / 2., 6.00 / 2.); + // Section 2: 0.72 deg opening cone + shRB26s12Tube->DefineSection(2, 207.21, 5.84 / 2., 6.14 / 2.); + shRB26s12Tube->DefineSection(3, kRB26s12TubeL, 5.84 / 2 + 2.576, 6.14 / 2. + 2.576); + + // the section which is placed into the muon spectrometer (starting at z = -505) + TGeoPcon* shRB26s12msTube = new TGeoPcon(0., 360., 3); + // conical part + shRB26s12msTube->DefineSection(0, 0.00, shRB26s12Tube->GetRmin(3), shRB26s12Tube->GetRmax(3)); + shRB26s12msTube->DefineSection(1, 452.30 - kRB26s12TubeL, 12.0 / 2., 12.3 / 2.); + // straight part until compensator + shRB26s12msTube->DefineSection(2, kRB26s12TubeL2, 12.0 / 2., 12.3 / 2.); + + TGeoVolume* voRB26s12Tube = new TGeoVolume("RB26s12Tube", shRB26s12Tube, kMedSteelHC); + TGeoVolume* voRB26s12msTube = new TGeoVolume("RB26s12msTube", shRB26s12msTube, kMedSteelHC); + // Add the insulation layer + TGeoVolume* voRB26s12TubeIns = new TGeoVolume("RB26s12TubeIns", makeInsulationFromTemplate(shRB26s12Tube), kMedInsu); + TGeoVolume* voRB26s12msTubeIns = new TGeoVolume("RB26s12msTubeIns", makeInsulationFromTemplate(shRB26s12msTube), kMedInsu); + voRB26s12Tube->AddNode(voRB26s12TubeIns, 1, gGeoIdentity); + voRB26s12msTube->AddNode(voRB26s12msTubeIns, 1, gGeoIdentity); + + TGeoVolume* voRB26s12TubeM = new TGeoVolume("RB26s12TubeM", makeMotherFromTemplate(shRB26s12Tube), kMedVacHC); + voRB26s12TubeM->AddNode(voRB26s12Tube, 1, gGeoIdentity); + TGeoVolume* voRB26s12msTubeM = new TGeoVolume("RB26s12msTubeM", makeMotherFromTemplate(shRB26s12msTube), kMedVacHC); + voRB26s12msTubeM->AddNode(voRB26s12msTube, 1, gGeoIdentity); + + /////////////////////////////////// + // RB26/2 Axial Compensator // + // Drawing LHCVC2a_0064 // + /////////////////////////////////// + const float kRB26s2CompL = 30.65; // Length of the compensator + const float kRB26s2BellowRo = 14.38 / 2.; // Bellow outer radius [Pos 1] + const float kRB26s2BellowRi = 12.12 / 2.; // Bellow inner radius [Pos 1] + const int kRB26s2NumberOfPlies = 14; // Number of plies [Pos 1] + const float kRB26s2BellowUndL = 10.00; // Length of undulated region [Pos 1] [+10 mm installed including pretension ?] + const float kRB26s2PlieThickness = 0.025; // Plie thickness [Pos 1] + const float kRB26s2ConnectionPlieR = 0.21; // Connection plie radius [Pos 1] + // Plie radius + const float kRB26s2PlieR = (kRB26s2BellowUndL - 4. * kRB26s2ConnectionPlieR + 2. * kRB26s2PlieThickness + + (2. * kRB26s2NumberOfPlies - 2.) * kRB26s2PlieThickness) / + (4. * kRB26s2NumberOfPlies - 2.); + const float kRB26s2CompTubeInnerR = 12.00 / 2.; // Connection tubes inner radius [Pos 2 + 3] + const float kRB26s2CompTubeOuterR = 12.30 / 2.; // Connection tubes outer radius [Pos 2 + 3] + const float kRB26s2WeldingTubeLeftL = 9.00 / 2.; // Left connection tube half length [Pos 2] + const float kRB26s2WeldingTubeRightL = 11.65 / 2.; // Right connection tube half length [Pos 3] [+ 0.15 cm for welding] + const float kRB26s2RingOuterR = 18.10 / 2.; // Ring inner radius [Pos 4] + const float kRB26s2RingL = 0.40 / 2.; // Ring half length [Pos 4] + const float kRB26s2RingZ = 6.50; // Ring z-position [Pos 4] + const float kRB26s2ProtOuterR = 18.20 / 2.; // Protection tube outer radius [Pos 5] + const float kRB26s2ProtL = 15.00 / 2.; // Protection tube half length [Pos 5] + const float kRB26s2ProtZ = 6.70; // Protection tube z-position [Pos 5] + + // Mother volume + // + TGeoPcon* shRB26s2Compensator = new TGeoPcon(0., 360., 6); + shRB26s2Compensator->DefineSection(0, 0.0, 0., kRB26s2CompTubeOuterR); + shRB26s2Compensator->DefineSection(1, kRB26s2RingZ, 0., kRB26s2CompTubeOuterR); + shRB26s2Compensator->DefineSection(2, kRB26s2RingZ, 0., kRB26s2ProtOuterR); + shRB26s2Compensator->DefineSection(3, kRB26s2ProtZ + 2. * kRB26s2ProtL, 0., kRB26s2ProtOuterR); + shRB26s2Compensator->DefineSection(4, kRB26s2ProtZ + 2. * kRB26s2ProtL, 0., kRB26s2CompTubeOuterR); + shRB26s2Compensator->DefineSection(5, kRB26s2CompL, 0., kRB26s2CompTubeOuterR); + TGeoVolume* voRB26s2Compensator = new TGeoVolume("RB26s2Compensator", shRB26s2Compensator, kMedVacHC); + + // + // [Pos 1] Bellow + // + // + TGeoVolume* voRB26s2Bellow = + new TGeoVolume("RB26s2Bellow", new TGeoTube(kRB26s2BellowRi, kRB26s2BellowRo, kRB26s2BellowUndL / 2.), kMedVacHC); + // + // Upper part of the undulation + // + TGeoTorus* shRB26s2PlieTorusU = new TGeoTorus(kRB26s2BellowRo - kRB26s2PlieR, kRB26s2PlieR - kRB26s2PlieThickness, kRB26s2PlieR); + shRB26s2PlieTorusU->SetName("RB26s2TorusU"); + TGeoTube* shRB26s2PlieTubeU = new TGeoTube(kRB26s2BellowRo - kRB26s2PlieR, kRB26s2BellowRo, kRB26s2PlieR); + shRB26s2PlieTubeU->SetName("RB26s2TubeU"); + TGeoCompositeShape* shRB26s2UpperPlie = new TGeoCompositeShape("RB26s2UpperPlie", "RB26s2TorusU*RB26s2TubeU"); + + TGeoVolume* voRB26s2WiggleU = new TGeoVolume("RB26s2UpperPlie", shRB26s2UpperPlie, kMedSteelHC); + // + // Lower part of the undulation + TGeoTorus* shRB26s2PlieTorusL = new TGeoTorus(kRB26s2BellowRi + kRB26s2PlieR, kRB26s2PlieR - kRB26s2PlieThickness, kRB26s2PlieR); + shRB26s2PlieTorusL->SetName("RB26s2TorusL"); + TGeoTube* shRB26s2PlieTubeL = new TGeoTube(kRB26s2BellowRi, kRB26s2BellowRi + kRB26s2PlieR, kRB26s2PlieR); + shRB26s2PlieTubeL->SetName("RB26s2TubeL"); + TGeoCompositeShape* shRB26s2LowerPlie = new TGeoCompositeShape("RB26s2LowerPlie", "RB26s2TorusL*RB26s2TubeL"); + + TGeoVolume* voRB26s2WiggleL = new TGeoVolume("RB26s2LowerPlie", shRB26s2LowerPlie, kMedSteelHC); + + // + // Connection between upper and lower part of undulation + TGeoVolume* voRB26s2WiggleC1 = new TGeoVolume( + "RB26s2PlieConn1", + new TGeoTube(kRB26s2BellowRi + kRB26s2PlieR, kRB26s2BellowRo - kRB26s2PlieR, kRB26s2PlieThickness / 2.), kMedSteelHC); + // + // One wiggle + TGeoVolumeAssembly* voRB26s2Wiggle = new TGeoVolumeAssembly("RB26s2Wiggle"); + z0 = -kRB26s2PlieThickness / 2.; + voRB26s2Wiggle->AddNode(voRB26s2WiggleC1, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2PlieR - kRB26s2PlieThickness / 2.; + voRB26s2Wiggle->AddNode(voRB26s2WiggleU, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2PlieR - kRB26s2PlieThickness / 2.; + voRB26s2Wiggle->AddNode(voRB26s2WiggleC1, 2, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2PlieR - kRB26s2PlieThickness; + voRB26s2Wiggle->AddNode(voRB26s2WiggleL, 1, new TGeoTranslation(0., 0., z0)); + // Positioning of the volumes + z0 = -kRB26s2BellowUndL / 2. + kRB26s2ConnectionPlieR; + voRB26s2Bellow->AddNode(voRB26s2WiggleL, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2ConnectionPlieR; + zsh = 4. * kRB26s2PlieR - 2. * kRB26s2PlieThickness; + for (int iw = 0; iw < kRB26s2NumberOfPlies; iw++) { + float zpos = z0 + iw * zsh; + voRB26s2Bellow->AddNode(voRB26s2Wiggle, iw + 1, new TGeoTranslation(0., 0., zpos - kRB26s2PlieThickness)); + } + + voRB26s2Compensator->AddNode(voRB26s2Bellow, 1, new TGeoTranslation(0., 0., 2. * kRB26s2WeldingTubeLeftL + kRB26s2BellowUndL / 2.)); + + // + // [Pos 2] Left Welding Tube + // + TGeoTube* shRB26s2CompLeftTube = new TGeoTube(kRB26s2CompTubeInnerR, kRB26s2CompTubeOuterR, kRB26s2WeldingTubeLeftL); + TGeoVolume* voRB26s2CompLeftTube = new TGeoVolume("RB26s2CompLeftTube", shRB26s2CompLeftTube, kMedSteelHC); + voRB26s2Compensator->AddNode(voRB26s2CompLeftTube, 1, new TGeoTranslation(0., 0., kRB26s2WeldingTubeLeftL)); + // + // [Pos 3] Right Welding Tube + // + TGeoTube* shRB26s2CompRightTube = + new TGeoTube(kRB26s2CompTubeInnerR, kRB26s2CompTubeOuterR, kRB26s2WeldingTubeRightL); + TGeoVolume* voRB26s2CompRightTube = new TGeoVolume("RB26s2CompRightTube", shRB26s2CompRightTube, kMedSteelHC); + voRB26s2Compensator->AddNode(voRB26s2CompRightTube, 1, new TGeoTranslation(0., 0., kRB26s2CompL - kRB26s2WeldingTubeRightL)); + // + // [Pos 4] Ring + // + TGeoTube* shRB26s2CompRing = new TGeoTube(kRB26s2CompTubeOuterR, kRB26s2RingOuterR, kRB26s2RingL); + TGeoVolume* voRB26s2CompRing = new TGeoVolume("RB26s2CompRing", shRB26s2CompRing, kMedSteelHC); + voRB26s2Compensator->AddNode(voRB26s2CompRing, 1, new TGeoTranslation(0., 0., kRB26s2RingZ + kRB26s2RingL)); + + // + // [Pos 5] Outer Protecting Tube + // + TGeoTube* shRB26s2CompProtTube = new TGeoTube(kRB26s2RingOuterR, kRB26s2ProtOuterR, kRB26s2ProtL); + TGeoVolume* voRB26s2CompProtTube = new TGeoVolume("RB26s2CompProtTube", shRB26s2CompProtTube, kMedSteelHC); + voRB26s2Compensator->AddNode(voRB26s2CompProtTube, 1, new TGeoTranslation(0., 0., kRB26s2ProtZ + kRB26s2ProtL)); + + /////////////////////////////////// + // Rotable Flange // + // Drawing LHCVFX_0016 // + /////////////////////////////////// + const float kRB26s1RFlangeTubeRi = 5.84 / 2.; // Tube inner radius + const float kRB26s1RFlangeTubeRo = 6.00 / 2.; // Tube outer radius + + // Pos 1 Clamp Ring LHCVFX__0015 + const float kRB26s1RFlangeCrL = 1.40; // Lenth of the clamp ring + const float kRB26s1RFlangeCrRi1 = 6.72 / 2.; // Ring inner radius section 1 + const float kRB26s1RFlangeCrRi2 = 6.06 / 2.; // Ring inner radius section 2 + const float kRB26s1RFlangeCrRo = 8.60 / 2.; // Ring outer radius + const float kRB26s1RFlangeCrD = 0.800; // Width section 1 + + TGeoPcon* shRB26s1RFlangeCr = new TGeoPcon(0., 360., 4); + z0 = 0.; + shRB26s1RFlangeCr->DefineSection(0, z0, kRB26s1RFlangeCrRi1, kRB26s1RFlangeCrRo); + z0 += kRB26s1RFlangeCrD; + shRB26s1RFlangeCr->DefineSection(1, z0, kRB26s1RFlangeCrRi1, kRB26s1RFlangeCrRo); + shRB26s1RFlangeCr->DefineSection(2, z0, kRB26s1RFlangeCrRi2, kRB26s1RFlangeCrRo); + z0 = kRB26s1RFlangeCrL; + shRB26s1RFlangeCr->DefineSection(3, z0, kRB26s1RFlangeCrRi2, kRB26s1RFlangeCrRo); + TGeoVolume* voRB26s1RFlangeCr = new TGeoVolume("RB26s1RFlangeCr", shRB26s1RFlangeCr, kMedSteelHC); + + // Pos 2 Insert LHCVFX__0015 + const float kRB26s1RFlangeIsL = 4.88; // Lenth of the insert + const float kRB26s1RFlangeIsR = 6.70 / 2.; // Ring radius + const float kRB26s1RFlangeIsD = 0.80; // Ring Width + + TGeoPcon* shRB26s1RFlangeIs = new TGeoPcon(0., 360., 4); + z0 = 0.; + shRB26s1RFlangeIs->DefineSection(0, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeIsR); + z0 += kRB26s1RFlangeIsD; + shRB26s1RFlangeIs->DefineSection(1, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeIsR); + shRB26s1RFlangeIs->DefineSection(2, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + z0 = kRB26s1RFlangeIsL; + shRB26s1RFlangeIs->DefineSection(3, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + TGeoVolume* voRB26s1RFlangeIs = new TGeoVolume("RB26s1RFlangeIs", shRB26s1RFlangeIs, kMedSteelHC); + // 4.88 + 3.7 = 8.58 (8.7 to avoid overlap) + // Pos 3 Fixed Point Section LHCVC2A_0021 + const float kRB26s1RFlangeFpL = 5.88; // Length of the fixed point section (0.08 cm added for welding) + const float kRB26s1RFlangeFpZ = 3.82; // Position of the ring + const float kRB26s1RFlangeFpD = 0.59; // Width of the ring + const float kRB26s1RFlangeFpR = 7.00 / 2.; // Radius of the ring + + TGeoPcon* shRB26s1RFlangeFp = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s1RFlangeFp->DefineSection(0, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + z0 += kRB26s1RFlangeFpZ; + shRB26s1RFlangeFp->DefineSection(1, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + shRB26s1RFlangeFp->DefineSection(2, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeFpR); + z0 += kRB26s1RFlangeFpD; + shRB26s1RFlangeFp->DefineSection(3, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeFpR); + shRB26s1RFlangeFp->DefineSection(4, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + z0 = kRB26s1RFlangeFpL; + shRB26s1RFlangeFp->DefineSection(5, z0, kRB26s1RFlangeTubeRi, kRB26s1RFlangeTubeRo); + TGeoVolume* voRB26s1RFlangeFp = new TGeoVolume("RB26s1RFlangeFp", shRB26s1RFlangeFp, kMedSteelHC); + + // Put everything in a mother volume + TGeoPcon* shRB26s1RFlange = new TGeoPcon(0., 360., 8); + z0 = 0.; + shRB26s1RFlange->DefineSection(0, z0, 0., kRB26s1RFlangeCrRo); + z0 += kRB26s1RFlangeCrL; + shRB26s1RFlange->DefineSection(1, z0, 0., kRB26s1RFlangeCrRo); + shRB26s1RFlange->DefineSection(2, z0, 0., kRB26s1RFlangeTubeRo); + z0 = kRB26s1RFlangeIsL + kRB26s1RFlangeFpZ; + shRB26s1RFlange->DefineSection(3, z0, 0., kRB26s1RFlangeTubeRo); + shRB26s1RFlange->DefineSection(4, z0, 0., kRB26s1RFlangeFpR); + z0 += kRB26s1RFlangeFpD; + shRB26s1RFlange->DefineSection(5, z0, 0., kRB26s1RFlangeFpR); + shRB26s1RFlange->DefineSection(6, z0, 0., kRB26s1RFlangeTubeRo); + z0 = kRB26s1RFlangeIsL + kRB26s1RFlangeFpL; + shRB26s1RFlange->DefineSection(7, z0, 0., kRB26s1RFlangeTubeRo); + TGeoVolume* voRB26s1RFlange = new TGeoVolume("RB26s1RFlange", shRB26s1RFlange, kMedVacHC); + + voRB26s1RFlange->AddNode(voRB26s1RFlangeIs, 1, gGeoIdentity); + voRB26s1RFlange->AddNode(voRB26s1RFlangeCr, 1, gGeoIdentity); + voRB26s1RFlange->AddNode(voRB26s1RFlangeFp, 1, new TGeoTranslation(0., 0., kRB26s1RFlangeIsL)); + + /////////////////////////////////// + // Fixed Flange // + // Drawing LHCVFX_0006 // + /////////////////////////////////// + const float kRB26s2FFlangeL = 2.13; // Length of the flange + const float kRB26s2FFlangeD1 = 0.97; // Length of section 1 + const float kRB26s2FFlangeD2 = 0.29; // Length of section 2 + const float kRB26s2FFlangeD3 = 0.87; // Length of section 3 + const float kRB26s2FFlangeRo = 17.15 / 2.; // Flange outer radius + const float kRB26s2FFlangeRi1 = 12.30 / 2.; // Flange inner radius section 1 + const float kRB26s2FFlangeRi2 = 12.00 / 2.; // Flange inner radius section 2 + const float kRB26s2FFlangeRi3 = 12.30 / 2.; // Flange inner radius section 3 + z0 = 0; + TGeoPcon* shRB26s2FFlange = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s2FFlange->DefineSection(0, z0, kRB26s2FFlangeRi1, kRB26s2FFlangeRo); + z0 += kRB26s2FFlangeD1; + shRB26s2FFlange->DefineSection(1, z0, kRB26s2FFlangeRi1, kRB26s2FFlangeRo); + shRB26s2FFlange->DefineSection(2, z0, kRB26s2FFlangeRi2, kRB26s2FFlangeRo); + z0 += kRB26s2FFlangeD2; + shRB26s2FFlange->DefineSection(3, z0, kRB26s2FFlangeRi2, kRB26s2FFlangeRo); + shRB26s2FFlange->DefineSection(4, z0, kRB26s2FFlangeRi3, kRB26s2FFlangeRo); + z0 += kRB26s2FFlangeD3; + shRB26s2FFlange->DefineSection(5, z0, kRB26s2FFlangeRi3, kRB26s2FFlangeRo); + TGeoVolume* voRB26s2FFlange = new TGeoVolume("RB26s2FFlange", shRB26s2FFlange, kMedSteelHC); + + TGeoVolume* voRB26s2FFlangeM = new TGeoVolume("RB26s2FFlangeM", makeMotherFromTemplate(shRB26s2FFlange, 2, 5), kMedVacHC); + voRB26s2FFlangeM->AddNode(voRB26s2FFlange, 1, gGeoIdentity); + + //////////////////////////////////////// + // // + // RB26/3 // + // Drawing LHCV2a_0048 // + // Drawing LHCV2a_0002 // + //////////////////////////////////////// + // + // Pos 1 Vacuum Tubes LHCVC2A__0003 + // Pos 2 Fixed Point LHCVFX___0005 + // Pos 3 Split Flange LHCVFX___0007 + // Pos 4 Fixed Flange LHCVFX___0004 + // Pos 5 Axial Compensator LHCVC2A__0065 + // + // + // + // + /////////////////////////////////// + // Vacuum Tube // + // Drawing LHCVC2A_0003 // + /////////////////////////////////// + const float kRB26s3TubeL = 629.35 + 0.3; // 0.3 cm added for welding + const float kRB26s3TubeR1 = 12. / 2.; + const float kRB26s3TubeR2 = kRB26s3TubeR1 + 215.8 * TMath::Tan(0.829 / 180. * TMath::Pi()); + + TGeoPcon* shRB26s3Tube = new TGeoPcon(0., 360., 7); + // Section 1: straight section + shRB26s3Tube->DefineSection(0, 0.00, kRB26s3TubeR1, kRB26s3TubeR1 + 0.15); + shRB26s3Tube->DefineSection(1, 2.00, kRB26s3TubeR1, kRB26s3TubeR1 + 0.15); + // Section 2: 0.829 deg opening cone + shRB26s3Tube->DefineSection(2, 2.00, kRB26s3TubeR1, kRB26s3TubeR1 + 0.20); + + shRB26s3Tube->DefineSection(3, 217.80, kRB26s3TubeR2, kRB26s3TubeR2 + 0.20); + shRB26s3Tube->DefineSection(4, 217.80, kRB26s3TubeR2, kRB26s3TubeR2 + 0.30); + + shRB26s3Tube->DefineSection(5, 622.20, 30.00 / 2., 30.60 / 2.); + shRB26s3Tube->DefineSection(6, kRB26s3TubeL, 30.00 / 2., 30.60 / 2.); + + TGeoVolume* voRB26s3Tube = new TGeoVolume("RB26s3Tube", shRB26s3Tube, kMedSteelHC); + // Add the insulation layer + TGeoVolume* voRB26s3TubeIns = new TGeoVolume("RB26s3TubeIns", makeInsulationFromTemplate(shRB26s3Tube), kMedInsu); + voRB26s3Tube->AddNode(voRB26s3TubeIns, 1, gGeoIdentity); + + TGeoVolume* voRB26s3TubeM = new TGeoVolume("RB26s3TubeM", makeMotherFromTemplate(shRB26s3Tube), kMedVacHC); + voRB26s3TubeM->AddNode(voRB26s3Tube, 1, gGeoIdentity); + + /////////////////////////////////// + // Fixed Point // + // Drawing LHCVFX_0005 // + /////////////////////////////////// + const float kRB26s3FixedPointL = 16.37; // Length of the fixed point section (0.3 cm added for welding) + const float kRB26s3FixedPointZ = 9.72; // Position of the ring (0.15 cm added for welding) + const float kRB26s3FixedPointD = 0.595; // Width of the ring + const float kRB26s3FixedPointR = 13.30 / 2.; // Radius of the ring + const float kRB26s3FixedPointRi = 12.00 / 2.; // Inner radius of the tube + const float kRB26s3FixedPointRo1 = 12.30 / 2.; // Outer radius of the tube (in) + const float kRB26s3FixedPointRo2 = 12.40 / 2.; // Outer radius of the tube (out) + const float kRB26s3FixedPointDs = 1.5; // Width of straight section behind ring + const float kRB26s3FixedPointDc = 3.15; // Width of conical section behind ring (0.15 cm added for welding) + + TGeoPcon* shRB26s3FixedPoint = new TGeoPcon(0., 360., 8); + z0 = 0.; + shRB26s3FixedPoint->DefineSection(0, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo1); + z0 += kRB26s3FixedPointZ; + shRB26s3FixedPoint->DefineSection(1, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo1); + shRB26s3FixedPoint->DefineSection(2, z0, kRB26s3FixedPointRi, kRB26s3FixedPointR); + z0 += kRB26s3FixedPointD; + shRB26s3FixedPoint->DefineSection(3, z0, kRB26s3FixedPointRi, kRB26s3FixedPointR); + shRB26s3FixedPoint->DefineSection(4, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo1); + z0 += kRB26s3FixedPointDs; + shRB26s3FixedPoint->DefineSection(5, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo1); + z0 += kRB26s3FixedPointDc; + shRB26s3FixedPoint->DefineSection(6, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo2); + z0 = kRB26s3FixedPointL; + shRB26s3FixedPoint->DefineSection(7, z0, kRB26s3FixedPointRi, kRB26s3FixedPointRo2); + TGeoVolume* voRB26s3FixedPoint = new TGeoVolume("RB26s3FixedPoint", shRB26s3FixedPoint, kMedSteelHC); + + TGeoVolume* voRB26s3FixedPointM = new TGeoVolume("RB26s3FixedPointM", makeMotherFromTemplate(shRB26s3FixedPoint), kMedVacHC); + voRB26s3FixedPointM->AddNode(voRB26s3FixedPoint, 1, gGeoIdentity); + + /////////////////////////////////// + // Split Flange // + // Drawing LHCVFX_0005 // + /////////////////////////////////// + const float kRB26s3SFlangeL = 2.13; // Length of the flange + const float kRB26s3SFlangeD1 = 0.57; // Length of section 1 + const float kRB26s3SFlangeD2 = 0.36; // Length of section 2 + const float kRB26s3SFlangeD3 = 0.50 + 0.70; // Length of section 3 + const float kRB26s3SFlangeRo = 17.15 / 2.; // Flange outer radius + const float kRB26s3SFlangeRi1 = 12.30 / 2.; // Flange inner radius section 1 + const float kRB26s3SFlangeRi2 = 12.00 / 2.; // Flange inner radius section 2 + const float kRB26s3SFlangeRi3 = 12.30 / 2.; // Flange inner radius section 3 + z0 = 0; + TGeoPcon* shRB26s3SFlange = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s3SFlange->DefineSection(0, z0, kRB26s3SFlangeRi1, kRB26s3SFlangeRo); + z0 += kRB26s3SFlangeD1; + shRB26s3SFlange->DefineSection(1, z0, kRB26s3SFlangeRi1, kRB26s3SFlangeRo); + shRB26s3SFlange->DefineSection(2, z0, kRB26s3SFlangeRi2, kRB26s3SFlangeRo); + z0 += kRB26s3SFlangeD2; + shRB26s3SFlange->DefineSection(3, z0, kRB26s3SFlangeRi2, kRB26s3SFlangeRo); + shRB26s3SFlange->DefineSection(4, z0, kRB26s3SFlangeRi3, kRB26s3SFlangeRo); + z0 += kRB26s3SFlangeD3; + shRB26s3SFlange->DefineSection(5, z0, kRB26s3SFlangeRi3, kRB26s3SFlangeRo); + TGeoVolume* voRB26s3SFlange = new TGeoVolume("RB26s3SFlange", shRB26s3SFlange, kMedSteelHC); + + TGeoVolume* voRB26s3SFlangeM = new TGeoVolume("RB26s3SFlangeM", makeMotherFromTemplate(shRB26s3SFlange, 0, 3), kMedVacHC); + voRB26s3SFlangeM->AddNode(voRB26s3SFlange, 1, gGeoIdentity); + + /////////////////////////////////// + // RB26/3 Fixed Flange // + // Drawing LHCVFX___0004 // + /////////////////////////////////// + const float kRB26s3FFlangeL = 2.99; // Length of the flange + const float kRB26s3FFlangeD1 = 1.72; // Length of section 1 + const float kRB26s3FFlangeD2 = 0.30; // Length of section 2 + const float kRB26s3FFlangeD3 = 0.97; // Length of section 3 + const float kRB26s3FFlangeRo = 36.20 / 2.; // Flange outer radius + const float kRB26s3FFlangeRi1 = 30.60 / 2.; // Flange inner radius section 1 + const float kRB26s3FFlangeRi2 = 30.00 / 2.; // Flange inner radius section 2 + const float kRB26s3FFlangeRi3 = 30.60 / 2.; // Flange inner radius section 3 + z0 = 0; + TGeoPcon* shRB26s3FFlange = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s3FFlange->DefineSection(0, z0, kRB26s3FFlangeRi1, kRB26s3FFlangeRo); + z0 += kRB26s3FFlangeD1; + shRB26s3FFlange->DefineSection(1, z0, kRB26s3FFlangeRi1, kRB26s3FFlangeRo); + shRB26s3FFlange->DefineSection(2, z0, kRB26s3FFlangeRi2, kRB26s3FFlangeRo); + z0 += kRB26s3FFlangeD2; + shRB26s3FFlange->DefineSection(3, z0, kRB26s3FFlangeRi2, kRB26s3FFlangeRo); + shRB26s3FFlange->DefineSection(4, z0, kRB26s3FFlangeRi3, kRB26s3FFlangeRo); + z0 += kRB26s3FFlangeD3; + shRB26s3FFlange->DefineSection(5, z0, kRB26s3FFlangeRi3, kRB26s3FFlangeRo); + TGeoVolume* voRB26s3FFlange = new TGeoVolume("RB26s3FFlange", shRB26s3FFlange, kMedSteelHC); + + TGeoVolume* voRB26s3FFlangeM = new TGeoVolume("RB26s3FFlangeM", makeMotherFromTemplate(shRB26s3FFlange, 2, 5), kMedVacHC); + voRB26s3FFlangeM->AddNode(voRB26s3FFlange, 1, gGeoIdentity); + + /////////////////////////////////// + // RB26/3 Axial Compensator // + // Drawing LHCVC2a_0065 // + /////////////////////////////////// + const float kRB26s3CompL = 42.3; // Length of the compensator (0.3 cm added for welding) + const float kRB26s3BellowRo = 34.00 / 2.; // Bellow outer radius [Pos 1] + const float kRB26s3BellowRi = 30.10 / 2.; // Bellow inner radius [Pos 1] + const int kRB26s3NumberOfPlies = 13; // Number of plies [Pos 1] + const float kRB26s3BellowUndL = 17.70; // Length of undulated region [Pos 1] + const float kRB26s3PlieThickness = 0.06; // Plie thickness [Pos 1] + const float kRB26s3ConnectionPlieR = 0.21; // Connection plie radius [Pos 1] + // Plie radius + const float kRB26s3PlieR = (kRB26s3BellowUndL - 4. * kRB26s3ConnectionPlieR + 2. * kRB26s3PlieThickness + + (2. * kRB26s3NumberOfPlies - 2.) * kRB26s3PlieThickness) / + (4. * kRB26s3NumberOfPlies - 2.); + + // + // The welding tubes have 3 sections with different radii and 2 transition regions. + // Section 1: connection to the outside + // Section 2: commection to the bellow + // Section 3: between 1 and 2 + const float kRB26s3CompTubeInnerR1 = 30.0 / 2.; // Outer Connection tubes inner radius [Pos 4 + 3] + const float kRB26s3CompTubeOuterR1 = 30.6 / 2.; // Outer Connection tubes outer radius [Pos 4 + 3] + const float kRB26s3CompTubeInnerR2 = 29.4 / 2.; // Connection tubes inner radius [Pos 4 + 3] + const float kRB26s3CompTubeOuterR2 = 30.0 / 2.; // Connection tubes outer radius [Pos 4 + 3] + const float kRB26s3CompTubeInnerR3 = 30.6 / 2.; // Connection tubes inner radius at bellow [Pos 4 + 3] + const float kRB26s3CompTubeOuterR3 = 32.2 / 2.; // Connection tubes outer radius at bellow [Pos 4 + 3] + + const float kRB26s3WeldingTubeLeftL1 = 2.0; // Left connection tube length [Pos 4] + const float kRB26s3WeldingTubeLeftL2 = 3.4; // Left connection tube length [Pos 4] + const float kRB26s3WeldingTubeLeftL = 7.0; // Left connection tube total length [Pos 4] + const float kRB26s3WeldingTubeRightL1 = 2.3; // Right connection tube length [Pos 3] (0.3 cm added for welding) + const float kRB26s3WeldingTubeRightL2 = 13.4; // Right connection tube length [Pos 3] + + const float kRB26s3WeldingTubeT1 = 0.6; // Length of first r-transition [Pos 4 + 3] + const float kRB26s3WeldingTubeT2 = 1.0; // Length of 2nd r-transition [Pos 4 + 3] + + const float kRB26s3RingOuterR = 36.1 / 2.; // Ring inner radius [Pos 4] + const float kRB26s3RingL = 0.8 / 2.; // Ring half length [Pos 4] + const float kRB26s3RingZ = 3.7; // Ring z-position [Pos 4] + const float kRB26s3ProtOuterR = 36.2 / 2.; // Protection tube outer radius [Pos 2] + const float kRB26s3ProtL = 27.0 / 2.; // Protection tube half length [Pos 2] + const float kRB26s3ProtZ = 4.0; // Protection tube z-position [Pos 2] + + // Mother volume + // + TGeoPcon* shRB26s3Compensator = new TGeoPcon(0., 360., 6); + shRB26s3Compensator->DefineSection(0, 0.0, 0., kRB26s3CompTubeOuterR1); + shRB26s3Compensator->DefineSection(1, kRB26s3RingZ, 0., kRB26s3CompTubeOuterR1); + shRB26s3Compensator->DefineSection(2, kRB26s3RingZ, 0., kRB26s3ProtOuterR); + shRB26s3Compensator->DefineSection(3, kRB26s3ProtZ + 2. * kRB26s3ProtL, 0., kRB26s3ProtOuterR); + shRB26s3Compensator->DefineSection(4, kRB26s3ProtZ + 2. * kRB26s3ProtL, 0., kRB26s3CompTubeOuterR1); + shRB26s3Compensator->DefineSection(5, kRB26s3CompL, 0., kRB26s3CompTubeOuterR1); + TGeoVolume* voRB26s3Compensator = new TGeoVolume("RB26s3Compensator", shRB26s3Compensator, kMedVacHC); + + // + // [Pos 1] Bellow + // + // + + // + // Upper part of the undulation + // + TGeoTorus* shRB26s3PlieTorusU = new TGeoTorus(kRB26s3BellowRo - kRB26s3PlieR, kRB26s3PlieR - kRB26s3PlieThickness, kRB26s3PlieR); + shRB26s3PlieTorusU->SetName("RB26s3TorusU"); + TGeoTube* shRB26s3PlieTubeU = new TGeoTube(kRB26s3BellowRo - kRB26s3PlieR, kRB26s3BellowRo, kRB26s3PlieR); + shRB26s3PlieTubeU->SetName("RB26s3TubeU"); + TGeoCompositeShape* shRB26s3UpperPlie = new TGeoCompositeShape("RB26s3UpperPlie", "RB26s3TorusU*RB26s3TubeU"); + + TGeoVolume* voRB26s3WiggleU = new TGeoVolume("RB26s3UpperPlie", shRB26s3UpperPlie, kMedSteelHC); + // + // Lower part of the undulation + TGeoTorus* shRB26s3PlieTorusL = new TGeoTorus(kRB26s3BellowRi + kRB26s3PlieR, kRB26s3PlieR - kRB26s3PlieThickness, kRB26s3PlieR); + shRB26s3PlieTorusL->SetName("RB26s3TorusL"); + TGeoTube* shRB26s3PlieTubeL = new TGeoTube(kRB26s3BellowRi, kRB26s3BellowRi + kRB26s3PlieR, kRB26s3PlieR); + shRB26s3PlieTubeL->SetName("RB26s3TubeL"); + TGeoCompositeShape* shRB26s3LowerPlie = new TGeoCompositeShape("RB26s3LowerPlie", "RB26s3TorusL*RB26s3TubeL"); + + TGeoVolume* voRB26s3WiggleL = new TGeoVolume("RB26s3LowerPlie", shRB26s3LowerPlie, kMedSteelHC); + + // + // Connection between upper and lower part of undulation + TGeoVolume* voRB26s3WiggleC1 = new TGeoVolume( + "RB26s3PlieConn1", + new TGeoTube(kRB26s3BellowRi + kRB26s3PlieR, kRB26s3BellowRo - kRB26s3PlieR, kRB26s3PlieThickness / 2.), kMedSteelHC); + // + // One wiggle + TGeoVolumeAssembly* voRB26s3Wiggle = new TGeoVolumeAssembly("RB26s3Wiggle"); + z0 = -kRB26s3PlieThickness / 2.; + voRB26s3Wiggle->AddNode(voRB26s3WiggleC1, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3PlieR - kRB26s3PlieThickness / 2.; + voRB26s3Wiggle->AddNode(voRB26s3WiggleU, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3PlieR - kRB26s3PlieThickness / 2.; + voRB26s3Wiggle->AddNode(voRB26s3WiggleC1, 2, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3PlieR - kRB26s3PlieThickness; + voRB26s3Wiggle->AddNode(voRB26s3WiggleL, 1, new TGeoTranslation(0., 0., z0)); + voRB26s3Wiggle->GetShape()->ComputeBBox(); // enforce recomputing of BBox + + // + // The bellow itself + float zBellowTot = kRB26s3NumberOfPlies * (static_cast(voRB26s3Wiggle->GetShape()))->GetDZ(); + TGeoVolume* voRB26s3Bellow = new TGeoVolume("RB26s3Bellow", new TGeoTube(kRB26s3BellowRi, kRB26s3BellowRo, zBellowTot), kMedVacHC); + + // Positioning of the volumes + z0 = -kRB26s2BellowUndL / 2. + kRB26s2ConnectionPlieR; + voRB26s2Bellow->AddNode(voRB26s2WiggleL, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2ConnectionPlieR; + zsh = 4. * kRB26s2PlieR - 2. * kRB26s2PlieThickness; + for (int iw = 0; iw < kRB26s2NumberOfPlies; iw++) { + float zpos = z0 + iw * zsh; + voRB26s2Bellow->AddNode(voRB26s2Wiggle, iw + 1, new TGeoTranslation(0., 0., zpos - kRB26s2PlieThickness)); + } + + voRB26s3Compensator->AddNode(voRB26s3Bellow, 1, new TGeoTranslation(0., 0., kRB26s3WeldingTubeLeftL + zBellowTot)); + + // + // [Pos 2] Outer Protecting Tube + // + TGeoTube* shRB26s3CompProtTube = new TGeoTube(kRB26s3RingOuterR, kRB26s3ProtOuterR, kRB26s3ProtL); + TGeoVolume* voRB26s3CompProtTube = new TGeoVolume("RB26s3CompProtTube", shRB26s3CompProtTube, kMedSteelHC); + voRB26s3Compensator->AddNode(voRB26s3CompProtTube, 1, new TGeoTranslation(0., 0., kRB26s3ProtZ + kRB26s3ProtL)); + + // + // [Pos 3] Right Welding Tube + // + TGeoPcon* shRB26s3CompRightTube = new TGeoPcon(0., 360., 5); + z0 = 0.; + shRB26s3CompRightTube->DefineSection(0, z0, kRB26s3CompTubeInnerR3, kRB26s3CompTubeOuterR3); + z0 += kRB26s3WeldingTubeT2; + shRB26s3CompRightTube->DefineSection(1, z0, kRB26s3CompTubeInnerR2, kRB26s3CompTubeOuterR2); + z0 += kRB26s3WeldingTubeRightL2; + shRB26s3CompRightTube->DefineSection(2, z0, kRB26s3CompTubeInnerR2, kRB26s3CompTubeOuterR2); + z0 += kRB26s3WeldingTubeT1; + shRB26s3CompRightTube->DefineSection(3, z0, kRB26s3CompTubeInnerR1, kRB26s3CompTubeOuterR1); + z0 += kRB26s3WeldingTubeRightL1; + shRB26s3CompRightTube->DefineSection(4, z0, kRB26s3CompTubeInnerR1, kRB26s3CompTubeOuterR1); + + TGeoVolume* voRB26s3CompRightTube = new TGeoVolume("RB26s3CompRightTube", shRB26s3CompRightTube, kMedSteelHC); + voRB26s3Compensator->AddNode(voRB26s3CompRightTube, 1, new TGeoTranslation(0., 0., kRB26s3CompL - z0)); + + // + // [Pos 4] Left Welding Tube + // + TGeoPcon* shRB26s3CompLeftTube = new TGeoPcon(0., 360., 5); + z0 = 0.; + shRB26s3CompLeftTube->DefineSection(0, z0, kRB26s3CompTubeInnerR1, kRB26s3CompTubeOuterR1); + z0 += kRB26s3WeldingTubeLeftL1; + shRB26s3CompLeftTube->DefineSection(1, z0, kRB26s3CompTubeInnerR1, kRB26s3CompTubeOuterR1); + z0 += kRB26s3WeldingTubeT1; + shRB26s3CompLeftTube->DefineSection(2, z0, kRB26s3CompTubeInnerR2, kRB26s3CompTubeOuterR2); + z0 += kRB26s3WeldingTubeLeftL2; + shRB26s3CompLeftTube->DefineSection(3, z0, kRB26s3CompTubeInnerR2, kRB26s3CompTubeOuterR2); + z0 += kRB26s3WeldingTubeT2; + shRB26s3CompLeftTube->DefineSection(4, z0, kRB26s3CompTubeInnerR3, kRB26s3CompTubeOuterR3); + + TGeoVolume* voRB26s3CompLeftTube = new TGeoVolume("RB26s3CompLeftTube", shRB26s3CompLeftTube, kMedSteelHC); + voRB26s3Compensator->AddNode(voRB26s3CompLeftTube, 1, gGeoIdentity); + // + // [Pos 5] Ring + // + TGeoTube* shRB26s3CompRing = new TGeoTube(kRB26s3CompTubeOuterR2, kRB26s3RingOuterR, kRB26s3RingL); + TGeoVolume* voRB26s3CompRing = new TGeoVolume("RB26s3CompRing", shRB26s3CompRing, kMedSteelHC); + voRB26s3Compensator->AddNode(voRB26s3CompRing, 1, new TGeoTranslation(0., 0., kRB26s3RingZ + kRB26s3RingL)); + + /////////////////////////////////////////// + // // + // RB26/4-5 // + // Drawing LHCV2a_0012 [as installed] // + //////////////////////////////////////////// + // Pos1 Vacuum Tubes LHCVC2A__0014 + // Pos2 Compensator LHCVC2A__0066 + // Pos3 Fixed Point Section LHCVC2A__0016 + // Pos4 Split Flange LHCVFX___0005 + // Pos5 RotableFlange LHCVFX___0009 + //////////////////////////////////////////// + + /////////////////////////////////// + // RB26/4-5 Vacuum Tubes // + // Drawing LHCVC2a_0014 // + /////////////////////////////////// + const float kRB26s45TubeL = 593.12 + 0.3; // 0.3 cm added for welding + + TGeoPcon* shRB26s45Tube = new TGeoPcon(0., 360., 11); + // Section 1: straight section + shRB26s45Tube->DefineSection(0, 0.00, 30.00 / 2., 30.60 / 2.); + shRB26s45Tube->DefineSection(1, 1.20, 30.00 / 2., 30.60 / 2.); + shRB26s45Tube->DefineSection(2, 1.20, 30.00 / 2., 30.80 / 2.); + shRB26s45Tube->DefineSection(3, 25.10, 30.00 / 2., 30.80 / 2.); + // Section 2: 0.932 deg opening cone + shRB26s45Tube->DefineSection(4, 486.10, 45.00 / 2., 45.80 / 2.); + // Section 3: straight section 4 mm + shRB26s45Tube->DefineSection(5, 512.10, 45.00 / 2., 45.80 / 2.); + // Section 4: straight section 3 mm + shRB26s45Tube->DefineSection(6, 512.10, 45.00 / 2., 45.60 / 2.); + shRB26s45Tube->DefineSection(7, 527.70, 45.00 / 2., 45.60 / 2.); + // Section 4: closing cone + shRB26s45Tube->DefineSection(8, 591.30, 10.00 / 2., 10.60 / 2.); + shRB26s45Tube->DefineSection(9, 591.89, 10.00 / 2., 10.30 / 2.); + + shRB26s45Tube->DefineSection(10, kRB26s45TubeL, 10.00 / 2., 10.30 / 2.); + TGeoVolume* voRB26s45Tube = new TGeoVolume("RB26s45Tube", shRB26s45Tube, kMedSteelHC); + + TGeoVolume* voRB26s45TubeM = new TGeoVolume("RB26s45TubeM", makeMotherFromTemplate(shRB26s45Tube), kMedVacHC); + voRB26s45TubeM->AddNode(voRB26s45Tube, 1, gGeoIdentity); + + /////////////////////////////////// + // RB26/5 Axial Compensator // + // Drawing LHCVC2a_0066 // + /////////////////////////////////// + const float kRB26s5CompL = 27.60; // Length of the compensator (0.30 cm added for welding) + const float kRB26s5BellowRo = 12.48 / 2.; // Bellow outer radius [Pos 1] + const float kRB26s5BellowRi = 10.32 / 2.; // Bellow inner radius [Pos 1] + const int kRB26s5NumberOfPlies = 15; // Number of plies [Pos 1] + const float kRB26s5BellowUndL = 10.50; // Length of undulated region [Pos 1] + const float kRB26s5PlieThickness = 0.025; // Plie thickness [Pos 1] + const float kRB26s5ConnectionPlieR = 0.21; // Connection plie radius [Pos 1] + const float kRB26s5ConnectionR = 11.2 / 2.; // Bellow connection radius [Pos 1] + // Plie radius + const float kRB26s5PlieR = (kRB26s5BellowUndL - 4. * kRB26s5ConnectionPlieR + 2. * kRB26s5PlieThickness + + (2. * kRB26s5NumberOfPlies - 2.) * kRB26s5PlieThickness) / + (4. * kRB26s5NumberOfPlies - 2.); + const float kRB26s5CompTubeInnerR = 10.00 / 2.; // Connection tubes inner radius [Pos 2 + 3] + const float kRB26s5CompTubeOuterR = 10.30 / 2.; // Connection tubes outer radius [Pos 2 + 3] + const float kRB26s5WeldingTubeLeftL = 3.70 / 2.; // Left connection tube half length [Pos 2] + const float kRB26s5WeldingTubeRightL = 13.40 / 2.; // Right connection tube half length [Pos 3] (0.3 cm added for welding) + const float kRB26s5RingInnerR = 11.2 / 2.; // Ring inner radius [Pos 4] + const float kRB26s5RingOuterR = 16.0 / 2.; // Ring inner radius [Pos 4] + const float kRB26s5RingL = 0.4 / 2.; // Ring half length [Pos 4] + const float kRB26s5RingZ = 14.97; // Ring z-position [Pos 4] + const float kRB26s5ProtOuterR = 16.2 / 2.; // Protection tube outer radius [Pos 5] + const float kRB26s5ProtL = 13.0 / 2.; // Protection tube half length [Pos 5] + const float kRB26s5ProtZ = 2.17; // Protection tube z-position [Pos 5] + const float kRB26s5DetailZR = 11.3 / 2.; // Detail Z max radius + + // Mother volume + // + TGeoPcon* shRB26s5Compensator = new TGeoPcon(0., 360., 8); + shRB26s5Compensator->DefineSection(0, 0.0, 0., kRB26s5CompTubeOuterR); + shRB26s5Compensator->DefineSection(1, kRB26s5ProtZ, 0., kRB26s5CompTubeOuterR); + shRB26s5Compensator->DefineSection(2, kRB26s5ProtZ, 0., kRB26s5ProtOuterR); + shRB26s5Compensator->DefineSection(3, kRB26s5ProtZ + 2. * kRB26s5ProtL + 2. * kRB26s5RingL, 0., kRB26s5ProtOuterR); + shRB26s5Compensator->DefineSection(4, kRB26s5ProtZ + 2. * kRB26s5ProtL + 2. * kRB26s5RingL, 0., kRB26s5DetailZR); + shRB26s5Compensator->DefineSection(5, kRB26s5CompL - 8., 0., kRB26s5DetailZR); + shRB26s5Compensator->DefineSection(6, kRB26s5CompL - 8., 0., kRB26s5CompTubeOuterR); + shRB26s5Compensator->DefineSection(7, kRB26s5CompL, 0., kRB26s5CompTubeOuterR); + TGeoVolume* voRB26s5Compensator = new TGeoVolume("RB26s5Compensator", shRB26s5Compensator, kMedVacHC); + + // + // [Pos 1] Bellow + // + // + TGeoVolume* voRB26s5Bellow = + new TGeoVolume("RB26s5Bellow", new TGeoTube(kRB26s5BellowRi, kRB26s5BellowRo, kRB26s5BellowUndL / 2.), kMedVacHC); + // + // Upper part of the undulation + // + TGeoTorus* shRB26s5PlieTorusU = new TGeoTorus(kRB26s5BellowRo - kRB26s5PlieR, kRB26s5PlieR - kRB26s5PlieThickness, kRB26s5PlieR); + shRB26s5PlieTorusU->SetName("RB26s5TorusU"); + TGeoTube* shRB26s5PlieTubeU = new TGeoTube(kRB26s5BellowRo - kRB26s5PlieR, kRB26s5BellowRo, kRB26s5PlieR); + shRB26s5PlieTubeU->SetName("RB26s5TubeU"); + TGeoCompositeShape* shRB26s5UpperPlie = new TGeoCompositeShape("RB26s5UpperPlie", "RB26s5TorusU*RB26s5TubeU"); + + TGeoVolume* voRB26s5WiggleU = new TGeoVolume("RB26s5UpperPlie", shRB26s5UpperPlie, kMedSteelHC); + // + // Lower part of the undulation + TGeoTorus* shRB26s5PlieTorusL = new TGeoTorus(kRB26s5BellowRi + kRB26s5PlieR, kRB26s5PlieR - kRB26s5PlieThickness, kRB26s5PlieR); + shRB26s5PlieTorusL->SetName("RB26s5TorusL"); + TGeoTube* shRB26s5PlieTubeL = new TGeoTube(kRB26s5BellowRi, kRB26s5BellowRi + kRB26s5PlieR, kRB26s5PlieR); + shRB26s5PlieTubeL->SetName("RB26s5TubeL"); + TGeoCompositeShape* shRB26s5LowerPlie = new TGeoCompositeShape("RB26s5LowerPlie", "RB26s5TorusL*RB26s5TubeL"); + + TGeoVolume* voRB26s5WiggleL = new TGeoVolume("RB26s5LowerPlie", shRB26s5LowerPlie, kMedSteelHC); + + // + // Connection between upper and lower part of undulation + TGeoVolume* voRB26s5WiggleC1 = new TGeoVolume("RB26s5PlieConn1", + new TGeoTube(kRB26s5BellowRi + kRB26s5PlieR, kRB26s5BellowRo - kRB26s5PlieR, kRB26s5PlieThickness / 2.), kMedSteelHC); + // + // One wiggle + TGeoVolumeAssembly* voRB26s5Wiggle = new TGeoVolumeAssembly("RB26s5Wiggle"); + z0 = -kRB26s5PlieThickness / 2.; + voRB26s5Wiggle->AddNode(voRB26s5WiggleC1, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5PlieR - kRB26s5PlieThickness / 2.; + voRB26s5Wiggle->AddNode(voRB26s5WiggleU, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5PlieR - kRB26s5PlieThickness / 2.; + voRB26s5Wiggle->AddNode(voRB26s5WiggleC1, 2, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5PlieR - kRB26s5PlieThickness; + voRB26s5Wiggle->AddNode(voRB26s5WiggleL, 1, new TGeoTranslation(0., 0., z0)); + // Positioning of the volumes + z0 = -kRB26s5BellowUndL / 2. + kRB26s5ConnectionPlieR; + voRB26s5Bellow->AddNode(voRB26s5WiggleL, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5ConnectionPlieR; + zsh = 4. * kRB26s5PlieR - 2. * kRB26s5PlieThickness; + for (int iw = 0; iw < kRB26s5NumberOfPlies; iw++) { + float zpos = z0 + iw * zsh; + voRB26s5Bellow->AddNode(voRB26s5Wiggle, iw + 1, new TGeoTranslation(0., 0., zpos - kRB26s5PlieThickness)); + } + + voRB26s5Compensator->AddNode(voRB26s5Bellow, 1, new TGeoTranslation(0., 0., 2. * kRB26s5WeldingTubeLeftL + kRB26s5BellowUndL / 2.)); + + // + // [Pos 2] Left Welding Tube + // + TGeoPcon* shRB26s5CompLeftTube = new TGeoPcon(0., 360., 3); + z0 = 0; + shRB26s5CompLeftTube->DefineSection(0, z0, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + z0 += 2 * kRB26s5WeldingTubeLeftL - (kRB26s5ConnectionR - kRB26s5CompTubeOuterR); + shRB26s5CompLeftTube->DefineSection(1, z0, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + z0 += (kRB26s5ConnectionR - kRB26s5CompTubeOuterR); + shRB26s5CompLeftTube->DefineSection(2, z0, kRB26s5ConnectionR - 0.15, kRB26s5ConnectionR); + TGeoVolume* voRB26s5CompLeftTube = new TGeoVolume("RB26s5CompLeftTube", shRB26s5CompLeftTube, kMedSteelHC); + voRB26s5Compensator->AddNode(voRB26s5CompLeftTube, 1, gGeoIdentity); + // + // [Pos 3] Right Welding Tube + // + TGeoPcon* shRB26s5CompRightTube = new TGeoPcon(0., 360., 11); + // Detail Z + shRB26s5CompRightTube->DefineSection(0, 0., kRB26s5CompTubeInnerR + 0.22, 11.2 / 2.); + shRB26s5CompRightTube->DefineSection(1, 0.05, kRB26s5CompTubeInnerR + 0.18, 11.2 / 2.); + shRB26s5CompRightTube->DefineSection(2, 0.22, kRB26s5CompTubeInnerR, 11.2 / 2. - 0.22); + shRB26s5CompRightTube->DefineSection(3, 0.44, kRB26s5CompTubeInnerR, 11.2 / 2.); + shRB26s5CompRightTube->DefineSection(4, 1.70, kRB26s5CompTubeInnerR, 11.2 / 2.); + shRB26s5CompRightTube->DefineSection(5, 2.10, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + shRB26s5CompRightTube->DefineSection(6, 2.80, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + shRB26s5CompRightTube->DefineSection(7, 2.80, kRB26s5CompTubeInnerR, 11.3 / 2.); + shRB26s5CompRightTube->DefineSection(8, 3.40, kRB26s5CompTubeInnerR, 11.3 / 2.); + // Normal pipe + shRB26s5CompRightTube->DefineSection(9, 3.50, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + shRB26s5CompRightTube->DefineSection(10, 2. * kRB26s5WeldingTubeRightL, kRB26s5CompTubeInnerR, kRB26s5CompTubeOuterR); + + TGeoVolume* voRB26s5CompRightTube = new TGeoVolume("RB26s5CompRightTube", shRB26s5CompRightTube, kMedSteelHC); + voRB26s5Compensator->AddNode(voRB26s5CompRightTube, 1, new TGeoTranslation(0., 0., kRB26s5CompL - 2. * kRB26s5WeldingTubeRightL)); + // + // [Pos 4] Ring + // + TGeoTube* shRB26s5CompRing = new TGeoTube(kRB26s5RingInnerR, kRB26s5RingOuterR, kRB26s5RingL); + TGeoVolume* voRB26s5CompRing = new TGeoVolume("RB26s5CompRing", shRB26s5CompRing, kMedSteelHC); + voRB26s5Compensator->AddNode(voRB26s5CompRing, 1, new TGeoTranslation(0., 0., kRB26s5RingZ + kRB26s5RingL)); + + // + // [Pos 5] Outer Protecting Tube + // + TGeoTube* shRB26s5CompProtTube = new TGeoTube(kRB26s5RingOuterR, kRB26s5ProtOuterR, kRB26s5ProtL); + TGeoVolume* voRB26s5CompProtTube = new TGeoVolume("RB26s5CompProtTube", shRB26s5CompProtTube, kMedSteelHC); + voRB26s5Compensator->AddNode(voRB26s5CompProtTube, 1, new TGeoTranslation(0., 0., kRB26s5ProtZ + kRB26s5ProtL)); + + /////////////////////////////////////// + // RB26/4 Fixed Point Section // + // Drawing LHCVC2a_0016 // + /////////////////////////////////////// + const float kRB26s4TubeRi = 30.30 / 2.; // Tube inner radius (0.3 cm added for welding) + const float kRB26s4TubeRo = 30.60 / 2.; // Tube outer radius + const float kRB26s4FixedPointL = 12.63; // Length of the fixed point section + const float kRB26s4FixedPointZ = 10.53; // Position of the ring (0.15 added for welding) + const float kRB26s4FixedPointD = 0.595; // Width of the ring + const float kRB26s4FixedPointR = 31.60 / 2.; // Radius of the ring + + TGeoPcon* shRB26s4FixedPoint = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s4FixedPoint->DefineSection(0, z0, kRB26s4TubeRi, kRB26s4TubeRo); + z0 += kRB26s4FixedPointZ; + shRB26s4FixedPoint->DefineSection(1, z0, kRB26s4TubeRi, kRB26s4TubeRo); + shRB26s4FixedPoint->DefineSection(2, z0, kRB26s4TubeRi, kRB26s4FixedPointR); + z0 += kRB26s4FixedPointD; + shRB26s4FixedPoint->DefineSection(3, z0, kRB26s4TubeRi, kRB26s4FixedPointR); + shRB26s4FixedPoint->DefineSection(4, z0, kRB26s4TubeRi, kRB26s4TubeRo); + z0 = kRB26s4FixedPointL; + shRB26s4FixedPoint->DefineSection(5, z0, kRB26s4TubeRi, kRB26s4TubeRo); + TGeoVolume* voRB26s4FixedPoint = new TGeoVolume("RB26s4FixedPoint", shRB26s4FixedPoint, kMedSteelHC); + + TGeoVolume* voRB26s4FixedPointM = new TGeoVolume("RB26s4FixedPointM", makeMotherFromTemplate(shRB26s4FixedPoint), kMedVacHC); + voRB26s4FixedPointM->AddNode(voRB26s4FixedPoint, 1, gGeoIdentity); + + /////////////////////////////////////// + // RB26/4 Split Flange // + // Drawing LHCVFX__0005 // + /////////////////////////////////////// + const float kRB26s4SFlangeL = 2.99; // Length of the flange + const float kRB26s4SFlangeD1 = 0.85; // Length of section 1 + const float kRB26s4SFlangeD2 = 0.36; // Length of section 2 + const float kRB26s4SFlangeD3 = 0.73 + 1.05; // Length of section 3 + const float kRB26s4SFlangeRo = 36.20 / 2.; // Flange outer radius + const float kRB26s4SFlangeRi1 = 30.60 / 2.; // Flange inner radius section 1 + const float kRB26s4SFlangeRi2 = 30.00 / 2.; // Flange inner radius section 2 + const float kRB26s4SFlangeRi3 = 30.60 / 2.; // Flange inner radius section 3 + z0 = 0; + TGeoPcon* shRB26s4SFlange = new TGeoPcon(0., 360., 6); + z0 = 0.; + shRB26s4SFlange->DefineSection(0, z0, kRB26s4SFlangeRi1, kRB26s4SFlangeRo); + z0 += kRB26s4SFlangeD1; + shRB26s4SFlange->DefineSection(1, z0, kRB26s4SFlangeRi1, kRB26s4SFlangeRo); + shRB26s4SFlange->DefineSection(2, z0, kRB26s4SFlangeRi2, kRB26s4SFlangeRo); + z0 += kRB26s4SFlangeD2; + shRB26s4SFlange->DefineSection(3, z0, kRB26s4SFlangeRi2, kRB26s4SFlangeRo); + shRB26s4SFlange->DefineSection(4, z0, kRB26s4SFlangeRi3, kRB26s4SFlangeRo); + z0 += kRB26s4SFlangeD3; + shRB26s4SFlange->DefineSection(5, z0, kRB26s4SFlangeRi3, kRB26s4SFlangeRo); + TGeoVolume* voRB26s4SFlange = new TGeoVolume("RB26s4SFlange", shRB26s4SFlange, kMedSteelHC); + + TGeoVolume* voRB26s4SFlangeM = new TGeoVolume("RB26s4SFlangeM", makeMotherFromTemplate(shRB26s4SFlange, 0, 3), kMedVacHC); + voRB26s4SFlangeM->AddNode(voRB26s4SFlange, 1, gGeoIdentity); + + /////////////////////////////////////// + // RB26/5 Rotable Flange // + // Drawing LHCVFX__0009 // + /////////////////////////////////////// + const float kRB26s5RFlangeL = 1.86; // Length of the flange + const float kRB26s5RFlangeD1 = 0.61; // Length of section 1 + const float kRB26s5RFlangeD2 = 0.15; // Length of section 2 + const float kRB26s5RFlangeD3 = 0.60; // Length of section 3 + const float kRB26s5RFlangeD4 = 0.50; // Length of section 4 + const float kRB26s5RFlangeRo = 15.20 / 2.; // Flange outer radius + const float kRB26s5RFlangeRi1 = 10.30 / 2.; // Flange inner radius section 1 + const float kRB26s5RFlangeRi2 = 10.00 / 2.; // Flange inner radius section 2 + const float kRB26s5RFlangeRi3 = 10.30 / 2.; // Flange inner radius section 3 + const float kRB26s5RFlangeRi4 = 10.50 / 2.; // Flange inner radius section 4 + + z0 = 0; + TGeoPcon* shRB26s5RFlange = new TGeoPcon(0., 360., 8); + z0 = 0.; + shRB26s5RFlange->DefineSection(0, z0, kRB26s5RFlangeRi4, kRB26s5RFlangeRo); + z0 += kRB26s5RFlangeD4; + shRB26s5RFlange->DefineSection(1, z0, kRB26s5RFlangeRi4, kRB26s5RFlangeRo); + shRB26s5RFlange->DefineSection(2, z0, kRB26s5RFlangeRi3, kRB26s5RFlangeRo); + z0 += kRB26s5RFlangeD3; + shRB26s5RFlange->DefineSection(3, z0, kRB26s5RFlangeRi3, kRB26s5RFlangeRo); + shRB26s5RFlange->DefineSection(4, z0, kRB26s5RFlangeRi2, kRB26s5RFlangeRo); + z0 += kRB26s5RFlangeD2; + shRB26s5RFlange->DefineSection(5, z0, kRB26s5RFlangeRi2, kRB26s5RFlangeRo); + shRB26s5RFlange->DefineSection(6, z0, kRB26s5RFlangeRi1, kRB26s5RFlangeRo); + z0 += kRB26s5RFlangeD1; + shRB26s5RFlange->DefineSection(7, z0, kRB26s5RFlangeRi1, kRB26s5RFlangeRo); + TGeoVolume* voRB26s5RFlange = new TGeoVolume("RB26s5RFlange", shRB26s5RFlange, kMedSteelHC); + + TGeoVolume* voRB26s5RFlangeM = new TGeoVolume("RB26s5RFlangeM", makeMotherFromTemplate(shRB26s5RFlange, 4, 7), kMedVacHC); + voRB26s5RFlangeM->AddNode(voRB26s5RFlange, 1, gGeoIdentity); + + // + // Assemble RB26/1-2 + // + TGeoVolumeAssembly* asRB26s12 = new TGeoVolumeAssembly("RB26s12"); + z0 = 0.; + // asRB26s12->AddNode(voRB26s1RFlange, 1, gGeoIdentity); + barrel->AddNode(voRB26s1RFlange, 1, new TGeoCombiTrans(0., 30., -82, rot180)); + z0 += kRB26s1RFlangeIsL + kRB26s1RFlangeFpL; + barrel->AddNode(voRB26s12TubeM, 1, new TGeoCombiTrans(0., 30., -82. - z0, rot180)); + z0 += kRB26s12TubeL; + asRB26s12->AddNode(voRB26s12msTubeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s12TubeL2; + asRB26s12->AddNode(voRB26s2Compensator, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2CompL; + z0 -= kRB26s2FFlangeD1; + asRB26s12->AddNode(voRB26s2FFlangeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s2FFlangeL; + const float kRB26s12L = z0; + + // + // Assemble RB26/3 + // + TGeoVolumeAssembly* asRB26s3 = new TGeoVolumeAssembly("RB26s3"); + z0 = 0.; + asRB26s3->AddNode(voRB26s3SFlangeM, 1, gGeoIdentity); + z0 += kRB26s3SFlangeL; + z0 -= kRB26s3SFlangeD3; + asRB26s3->AddNode(voRB26s3FixedPointM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3FixedPointL; + asRB26s3->AddNode(voRB26s3TubeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3TubeL; + asRB26s3->AddNode(voRB26s3Compensator, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3CompL; + z0 -= kRB26s3FFlangeD1; + asRB26s3->AddNode(voRB26s3FFlangeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3FFlangeL; + const float kRB26s3L = z0; + + // + // Assemble RB26/4-5 + // + TGeoVolumeAssembly* asRB26s45 = new TGeoVolumeAssembly("RB26s45"); + z0 = 0.; + asRB26s45->AddNode(voRB26s4SFlangeM, 1, gGeoIdentity); + z0 += kRB26s4SFlangeL; + z0 -= kRB26s4SFlangeD3; + asRB26s45->AddNode(voRB26s4FixedPointM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s4FixedPointL; + asRB26s45->AddNode(voRB26s45TubeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s45TubeL; + asRB26s45->AddNode(voRB26s5Compensator, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5CompL; + z0 -= kRB26s5RFlangeD3; + z0 -= kRB26s5RFlangeD4; + asRB26s45->AddNode(voRB26s5RFlangeM, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s5RFlangeL; + const float kRB26s45L = z0; + + // + // Assemble RB26 + // + TGeoVolumeAssembly* asRB26Pipe = new TGeoVolumeAssembly("RB26Pipe"); + z0 = 0.; + asRB26Pipe->AddNode(asRB26s12, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s12L; + asRB26Pipe->AddNode(asRB26s3, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s3L; + asRB26Pipe->AddNode(asRB26s45, 1, new TGeoTranslation(0., 0., z0)); + z0 += kRB26s45L; + top->AddNode(asRB26Pipe, 1, new TGeoCombiTrans(0., 0., -82., rot180)); +} + +void PipeRun4::createMaterials() +{ + // + // Define materials for beam pipe + // + int isxfld = 2.; + float sxmgmx = 10.; + o2::base::Detector::initFieldTrackingParams(isxfld, sxmgmx); + + // Steel (Inox) + float asteel[4] = {55.847, 51.9961, 58.6934, 28.0855}; + float zsteel[4] = {26., 24., 28., 14.}; + float wsteel[4] = {.715, .18, .1, .005}; + // AlBe - alloy + float aAlBe[2] = {26.98, 9.01}; // al=2.702 be=1.8477 + float zAlBe[2] = {13.00, 4.00}; + float wAlBe[2] = {0.4, 0.6}; + // Polyamid + float aPA[4] = {16., 14., 12., 1.}; + float zPA[4] = {8., 7., 6., 1.}; + float wPA[4] = {1., 1., 6., 11.}; + // Polyimide film + float aPI[4] = {16., 14., 12., 1.}; + float zPI[4] = {8., 7., 6., 1.}; + float wPI[4] = {5., 2., 22., 10.}; + // Rohacell + float aRohacell[4] = {16., 14., 12., 1.}; + float zRohacell[4] = {8., 7., 6., 1.}; + float wRohacell[4] = {2., 1., 9., 13.}; + // Air + float aAir[4] = {12.0107, 14.0067, 15.9994, 39.948}; + float zAir[4] = {6., 7., 8., 18.}; + float wAir[4] = {0.000124, 0.755267, 0.231781, 0.012827}; + float dAir = 1.20479E-3; + float dAir1 = 1.20479E-11; + // Insulation powder + // Si O Ti Al + float ains[4] = {28.0855, 15.9994, 47.867, 26.982}; + float zins[4] = {14., 8., 22., 13.}; + float wins[4] = {0.3019, 0.4887, 0.1914, 0.018}; + // + // + // Anticorodal + // + // Al Si7 Mg 0.6 + // + float aaco[3] = {26.982, 28.0855, 24.035}; + float zaco[3] = {13., 14., 12.}; + float waco[3] = {0.924, 0.07, 0.006}; + // Kapton + // + float aKapton[4] = {1.00794, 12.0107, 14.010, 15.9994}; + float zKapton[4] = {1., 6., 7., 8.}; + float wKapton[4] = {0.026362, 0.69113, 0.07327, 0.209235}; + float dKapton = 1.42; + // NEG coating + // Ti V Zr + float aNEG[4] = {47.87, 50.94, 91.24}; + float zNEG[4] = {22.00, 23.00, 40.00}; + float wNEG[4] = {1. / 3., 1. / 3., 1. / 3.}; + float dNEG = 5.6; // ? + + //--------------------------------- + // Aluminium AA 5083 for MFT: Al Manganese(Mn) Magnesium(Mg) Chrome(Cr) + float aALU5083[4] = {26.982, 54.938, 24.305, 51.996}; // Mg pas meme a que la ligne Anticorodal! + float zALU5083[4] = {13., 25., 12., 24.}; + float wALU5083[4] = {0.947, 0.007, 0.044, 0.0015}; + // Aluminium AA 2219 for MFT: Al Cu Mn Ti V Zr + float aALU2219[6] = {26.982, 63.546, 54.938, 47.867, 50.941, 91.224}; + float zALU2219[6] = {13., 29., 25., 22., 23., 40.}; + float wALU2219[6] = {0.93, 0.063, 0.003, 0.0006, 0.001, 0.0018}; + // Aluminium AA 7075 for beam pipe support (wings): Al Zn Mg Cu + float aALU7075[4] = {26.982, 65.38, 24.305, 63.546}; + float zALU7075[4] = {13., 30., 12., 29.}; + float wALU7075[4] = {0.902, 0.06, 0.024, 0.014}; + //--------------------------------- + + // **************** + // Defines tracking media parameters. + // + float epsil = .1; // Tracking precision, + float stemax = -0.01; // Maximum displacement for multiple scat + float tmaxfd = -20.; // Maximum angle due to field deflection + float deemax = -.3; // Maximum fractional energy loss, DLS + float stmin = -.8; + // *************** + // + + auto& matmgr = o2::base::MaterialManager::Instance(); + + // Beryllium + matmgr.Material("PIPE", 5, "BERILLIUM$", 9.01, 4., 1.848, 35.3, 36.7); + matmgr.Medium("PIPE", 5, "BE", 5, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Copper + matmgr.Material("PIPE", 10, "COPPER", 63.55, 29, 8.96, 1.43, 85.6 / 8.96); + matmgr.Material("PIPE", 30, "COPPER_NF", 63.55, 29, 8.96, 1.43, 85.6 / 8.96); + matmgr.Material("PIPE", 50, "COPPER_HC", 63.55, 29, 8.96, 1.43, 85.6 / 8.96); + matmgr.Material("PIPE", 70, "COPPER_NFHC", 63.55, 29, 8.96, 1.43, 85.6 / 8.96); + + matmgr.Medium("PIPE", 10, "CU", 10, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 30, "CU_NF", 30, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 50, "CU_HC", 50, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 70, "CU_NFHC", 70, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Air + matmgr.Mixture("PIPE", 15, "AIR$ ", aAir, zAir, dAir, 4, wAir); + matmgr.Mixture("PIPE", 35, "AIR_HIGH$ ", aAir, zAir, dAir, 4, wAir); + matmgr.Mixture("PIPE", 55, "AIR_NF ", aAir, zAir, dAir, 4, wAir); + matmgr.Medium("PIPE", 15, "AIR", 15, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 35, "AIR_HIGH", 35, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 55, "AIR_NF", 55, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Insulation + matmgr.Mixture("PIPE", 14, "INSULATION0$", ains, zins, 0.41, 4, wins); + matmgr.Medium("PIPE", 14, "INS_C0", 14, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // + // Vacuum + matmgr.Mixture("PIPE", 16, "VACUUM$ ", aAir, zAir, dAir1, 4, wAir); + matmgr.Mixture("PIPE", 36, "VACUUM$_NF", aAir, zAir, dAir1, 4, wAir); + matmgr.Mixture("PIPE", 56, "VACUUM$_HC ", aAir, zAir, dAir1, 4, wAir); + matmgr.Mixture("PIPE", 76, "VACUUM$_NFHC", aAir, zAir, dAir1, 4, wAir); + + matmgr.Medium("PIPE", 16, "VACUUM", 16, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 36, "VACUUM_NF", 36, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 56, "VACUUM_HC", 56, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 76, "VACUUM_NFHC", 76, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // + // Steel + matmgr.Mixture("PIPE", 19, "STAINLESS STEEL$", asteel, zsteel, 7.88, 4, wsteel); + matmgr.Mixture("PIPE", 39, "STAINLESS STEEL$_NF", asteel, zsteel, 7.88, 4, wsteel); + matmgr.Mixture("PIPE", 59, "STAINLESS STEEL$_HC", asteel, zsteel, 7.88, 4, wsteel); + matmgr.Mixture("PIPE", 79, "STAINLESS STEEL$_NFHC", asteel, zsteel, 7.88, 4, wsteel); + + matmgr.Medium("PIPE", 19, "INOX", 19, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 39, "INOX_NF", 39, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 59, "INOX_HC", 59, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 79, "INOX_NFHC", 79, 0, 0, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + //----------------- for the MFT ---------------------- + matmgr.Mixture("PIPE", 63, "ALUMINIUM5083$", aALU5083, zALU5083, 2.66, 4, wALU5083); // from aubertduval.fr + matmgr.Mixture("PIPE", 64, "ALUMINIUM2219$", aALU2219, zALU2219, 2.84, 6, wALU2219); // from aubertduval.fr + matmgr.Medium("PIPE", 63, "AA5083", 63, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + matmgr.Medium("PIPE", 64, "AA2219", 64, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + //---------------------------------------------------- + matmgr.Mixture("PIPE", 65, "PI$", aPI, zPI, 1.42, -4, wPI); + matmgr.Medium("PIPE", 65, "POLYIMIDE", 65, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + //--------------------------------- + // Carbon Fiber M55J + matmgr.Material("PIPE", 66, "M55J6K$", 12.0107, 6, 1.92, 999, 999); + matmgr.Medium("PIPE", 66, "M55J6K", 66, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Rohacell + matmgr.Mixture("PIPE", 67, "Rohacell$", aRohacell, zRohacell, 0.03, -4, wRohacell); + matmgr.Medium("PIPE", 67, "ROHACELL", 67, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Titanium + matmgr.Material("PIPE", 22, "Titanium$", 47.867, 22, 4.54, 3.560, 27.80); + matmgr.Medium("PIPE", 22, "TITANIUM", 22, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Alu 7075 (ZICRAL) + matmgr.Mixture("PIPE", 68, "ALUMINIUM7075$", aALU7075, zALU7075, 2.810, -4, wALU7075); + matmgr.Medium("PIPE", 68, "AA7075", 68, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); + + // Al-Be alloy + matmgr.Mixture("PIPE", 11, "AlBe$", aAlBe, zAlBe, 2.07, 2, wAlBe); + matmgr.Medium("PIPE", 11, "AlBe", 11, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); +} + +TGeoPcon* PipeRun4::makeMotherFromTemplate(const TGeoPcon* shape, int imin, int imax, float r0, int nz) +{ + // + // Create a mother shape from a template setting some min radii to 0 + // + int nz0 = shape->GetNz(); + // if nz > -1 the number of planes is given by nz + if (nz != -1) { + nz0 = nz; + } + TGeoPcon* mother = new TGeoPcon(0., 360., nz0); + + if (imin == -1 || imax == -1) { + imin = 0; + imax = shape->GetNz(); + } else if (imax >= nz0) { + imax = nz0 - 1; + printf("Warning: imax reset to nz-1 %5d %5d %5d %5d\n", imin, imax, nz, nz0); + } + + // construct the sections dynamically since duplications have to be avoided + std::vector pconparams; + pconparams.reserve(nz0); + pconparams.push_back(0.); + pconparams.push_back(360); + pconparams.push_back(nz0); + int zplanecounter = 0; + + auto addSection = [&pconparams, &zplanecounter](double z, double rmin, double rmax) { + pconparams.push_back(z); + pconparams.push_back(rmin); + pconparams.push_back(rmax); + zplanecounter++; + }; + + double zlast, rminlast, rmaxlast; + for (int i = 0; i < shape->GetNz(); i++) { + double rmin = shape->GetRmin(i); + if ((i >= imin) && (i <= imax)) { + rmin = r0; + } + double rmax = shape->GetRmax(i); + double z = shape->GetZ(i); + if (i == 0 || (z != zlast || rmin != rminlast || rmax != rmaxlast)) { + addSection(z, rmin, rmax); + } + zlast = z; + rminlast = rmin; + rmaxlast = rmax; + } + // correct dimension (unless the user chose the number of sections) + if (nz == -1) { + pconparams[2] = zplanecounter; + // reinit polycon from parameters + mother->SetDimensions(pconparams.data()); + } else { + for (int i = 0; i < zplanecounter; i++) { + mother->DefineSection(i, pconparams[3 + 3 * i], pconparams[4 + 3 * i], pconparams[5 + 3 * i]); + } + } + + return mother; +} + +TGeoPcon* PipeRun4::makeInsulationFromTemplate(TGeoPcon* shape) +{ + // + // Create an beam pipe insulation layer shape from a template + // + int nz = shape->GetNz(); + TGeoPcon* insu = new TGeoPcon(0., 360., nz); + + for (int i = 0; i < nz; i++) { + double z = shape->GetZ(i); + double rmin = shape->GetRmin(i); + double rmax = shape->GetRmax(i); + rmax += 0.5; + shape->DefineSection(i, z, rmin, rmax); + rmin = rmax - 0.5; + insu->DefineSection(i, z, rmin, rmax); + } + return insu; +} + +TGeoVolume* PipeRun4::makeBellow(const char* ext, int nc, float rMin, float rMax, float dU, float rPlie, + float dPlie) +{ + // nc Number of convolution + // rMin Inner radius of the bellow + // rMax Outer radius of the bellow + // dU Undulation length + // rPlie Plie radius + // dPlie Plie thickness + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* kMedVac = matmgr.getTGeoMedium("PIPE_VACUUM"); + const TGeoMedium* kMedSteel = matmgr.getTGeoMedium("PIPE_INOX"); + // + // Upper part of the undulation + // + std::string name, nameA, nameB; + TGeoTorus* shPlieTorusU = new TGeoTorus(rMax - rPlie, rPlie - dPlie, rPlie); + nameA = fmt::format("{:s}TorusU", ext); + shPlieTorusU->SetName(nameA.c_str()); + TGeoTube* shPlieTubeU = new TGeoTube(rMax - rPlie, rMax, rPlie); + nameB = fmt::format("{:s}TubeU", ext); + shPlieTubeU->SetName(nameB.c_str()); + name = fmt::format("{:s}UpperPlie", ext); + TGeoCompositeShape* shUpperPlie = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}*{:s}", nameA, nameB).c_str()); + + TGeoVolume* voWiggleU = new TGeoVolume(name.c_str(), shUpperPlie, kMedSteel); + // + // Lower part of the undulation + TGeoTorus* shPlieTorusL = new TGeoTorus(rMin + rPlie, rPlie - dPlie, rPlie); + nameA = fmt::format("{:s}TorusL", ext); + shPlieTorusL->SetName(nameA.c_str()); + TGeoTube* shPlieTubeL = new TGeoTube(rMin, rMin + rPlie, rPlie); + nameB = fmt::format("{:s}TubeL", ext); + shPlieTubeL->SetName(nameB.c_str()); + name = fmt::format("{:s}LowerPlie", ext); + TGeoCompositeShape* shLowerPlie = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}*{:s}", nameA, nameB).c_str()); + + TGeoVolume* voWiggleL = new TGeoVolume(name.c_str(), shLowerPlie, kMedSteel); + + // + // Connection between upper and lower part of undulation + TGeoVolume* voWiggleC1 = new TGeoVolume(fmt::format("{:s}PlieConn1", ext).c_str(), new TGeoTube(rMin + rPlie, rMax - rPlie, dPlie / 2.), kMedSteel); + // + // One wiggle + float dz = rPlie - dPlie / 2.; + float z0 = -dPlie / 2.; + TGeoVolumeAssembly* asWiggle = new TGeoVolumeAssembly(fmt::format("{:s}Wiggle", ext).c_str()); + asWiggle->AddNode(voWiggleC1, 1, new TGeoTranslation(0., 0., z0)); + z0 += dz; + asWiggle->AddNode(voWiggleU, 1, new TGeoTranslation(0., 0., z0)); + z0 += dz; + asWiggle->AddNode(voWiggleC1, 2, new TGeoTranslation(0., 0., z0)); + z0 += dz; + asWiggle->AddNode(voWiggleL, 1, new TGeoTranslation(0., 0., z0)); + asWiggle->GetShape()->ComputeBBox(); // enforce recomputing of BBox + // + float zBellowTot = nc * (static_cast(asWiggle->GetShape()))->GetDZ(); + TGeoVolume* voBellow = new TGeoVolume(fmt::format("{:s}BellowUS", ext).c_str(), new TGeoTube(rMin, rMax, zBellowTot), kMedVac); + // Positioning of the volumes + z0 = -dU / 2. + rPlie; + voBellow->AddNode(voWiggleL, 2, new TGeoTranslation(0., 0., z0)); + z0 += rPlie; + float zsh = 4. * rPlie - 2. * dPlie; + for (int iw = 0; iw < nc; iw++) { + float zpos = z0 + iw * zsh; + voBellow->AddNode(asWiggle, iw + 1, new TGeoTranslation(0., 0., zpos - dPlie)); + } + return voBellow; +} + +TGeoVolume* PipeRun4::makeBellowCside(const char* ext, int nc, float rMin, float rMax, float rPlie, float dPlie) +{ + // nc Number of convolution + // rMin Inner radius of the bellow + // rMax Outer radius of the bellow + // dU Undulation length + // rPlie Plie radius + // dPlie Plie thickness + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* kMedVac = matmgr.getTGeoMedium("PIPE_VACUUM"); + const TGeoMedium* kMedAlu5083 = matmgr.getTGeoMedium("PIPE_AA5083"); // fm + + float dU = nc * (4. * rPlie - 2. * dPlie); + + std::string name, nameA, nameB; + name = fmt::format("{:s}BellowUS", ext); + // TGeoVolume* voBellow = new TGeoVolume(name, new TGeoTube(rMin, rMax, dU/2.), kMedVac); + TGeoVolumeAssembly* voBellow = new TGeoVolumeAssembly(name.c_str()); + // + // Upper part of the undulation + // + + TGeoTorus* shPlieTorusU = new TGeoTorus(rMax - rPlie, rPlie - dPlie, rPlie); + nameA = fmt::format("{:s}TorusU", ext); + shPlieTorusU->SetName(nameA.c_str()); + TGeoTube* shPlieTubeU = new TGeoTube(rMax - rPlie, rMax, rPlie); + nameB = fmt::format("{:s}TubeU", ext); + shPlieTubeU->SetName(nameB.c_str()); + name = fmt::format("{:s}UpperPlie", ext); + TGeoCompositeShape* shUpperPlie = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}*{:s}", nameA, nameB).c_str()); + + TGeoVolume* voWiggleU = new TGeoVolume(name.c_str(), shUpperPlie, kMedAlu5083); + voWiggleU->SetLineColor(kOrange); // fm + + // First Lower part of the ondulation + TGeoTorus* shPlieTorusL = new TGeoTorus(rMin + rPlie, rPlie - dPlie, rPlie); + nameA = fmt::format("{:s}TorusL", ext); + shPlieTorusL->SetName(nameA.c_str()); + TGeoTranslation* t1 = new TGeoTranslation("t1", 0, 0, -rPlie / 2.); + t1->RegisterYourself(); + + TGeoTube* shPlieTubeL = new TGeoTube(rMin, rMin + rPlie, rPlie / 2.); + nameB = fmt::format("{:s}TubeL", ext); + shPlieTubeL->SetName(nameB.c_str()); + name = fmt::format("{:s}LowerPlie", ext); + TGeoCompositeShape* shLowerPlie1 = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}*{:s}:t1", nameA, nameB).c_str()); + + TGeoVolume* voWiggleL1 = new TGeoVolume(name.c_str(), shLowerPlie1, kMedAlu5083); + voWiggleL1->SetLineColor(kOrange); // fm + + // Second Lower part of the undulation + TGeoTranslation* t2 = new TGeoTranslation("t2", 0, 0, rPlie / 2.); + t2->RegisterYourself(); + + TGeoCompositeShape* shLowerPlie2 = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}*{:s}:t2", nameA, nameB).c_str()); + + TGeoVolume* voWiggleL2 = new TGeoVolume(name.c_str(), shLowerPlie2, kMedAlu5083); + voWiggleL2->SetLineColor(kOrange); // fm + + // Connection between upper and lower part of undulation + name = fmt::format("{:s}PlieConn1", ext); + TGeoVolume* voWiggleC1 = new TGeoVolume(name.c_str(), new TGeoTube(rMin + rPlie, rMax - rPlie, dPlie / 2.), kMedAlu5083); + voWiggleC1->SetLineColor(kOrange); // fm + + // + // Vacuum Part + // + + //--Upper part of the ondulation + + TGeoTorus* vacPlieTorusU = new TGeoTorus(rMax - rPlie, 0., rPlie - dPlie); + nameA = fmt::format("{:s}vacTorusU", ext); + vacPlieTorusU->SetName(nameA.c_str()); + TGeoTube* vacPlieTubeU = new TGeoTube(0., rMax - rPlie, rPlie - dPlie); + nameB = fmt::format("{:s}vacTubeU", ext); + vacPlieTubeU->SetName(nameB.c_str()); + name = fmt::format("{:s}vacUpperPlie", ext); + TGeoCompositeShape* vacUpperPlie = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}+{:s}", nameA, nameB).c_str()); + + TGeoVolume* voVacWiggleU = new TGeoVolume(name.c_str(), vacUpperPlie, kMedVac); + voVacWiggleU->SetVisibility(0); + + // First Lower part of the undulation + TGeoTorus* vacPlieTorusL = new TGeoTorus(rMin + rPlie, 0., rPlie); + nameA = fmt::format("{:s}vacTorusL", ext); + vacPlieTorusL->SetName(nameA.c_str()); + + TGeoTube* vacPlieTubeL = new TGeoTube(0., rMin + rPlie, rPlie / 2.); + nameB = fmt::format("{:s}vacTubeL", ext); + vacPlieTubeL->SetName(nameB.c_str()); + name = fmt::format("{:s}vacLowerPlie", ext); + TGeoCompositeShape* vacLowerPlie1 = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}:t1-{:s}", nameB, nameA).c_str()); + + TGeoVolume* voVacWiggleL1 = new TGeoVolume(name.c_str(), vacLowerPlie1, kMedVac); + voVacWiggleL1->SetVisibility(0); + + // Second Lower part of the undulation + TGeoCompositeShape* vacLowerPlie2 = new TGeoCompositeShape(name.c_str(), fmt::format("{:s}:t2-{:s}", nameB, nameA).c_str()); + + TGeoVolume* voVacWiggleL2 = new TGeoVolume(name.c_str(), vacLowerPlie2, kMedVac); + voVacWiggleL2->SetVisibility(0); + + // One wiggle + float dz = rPlie - dPlie / 2.; + float z0 = 2. * rPlie; + name = fmt::format("{:s}Wiggle", ext); + TGeoVolumeAssembly* asWiggle = new TGeoVolumeAssembly(name.c_str()); + + asWiggle->AddNode(voWiggleL1, 1, new TGeoTranslation(0., 0., z0)); + asWiggle->AddNode(voVacWiggleL1, 1, new TGeoTranslation(0., 0., z0)); + z0 -= dz; + asWiggle->AddNode(voWiggleC1, 1, new TGeoTranslation(0., 0., z0)); + z0 -= dz; + asWiggle->AddNode(voWiggleU, 1, new TGeoTranslation(0., 0., z0)); + asWiggle->AddNode(voVacWiggleU, 1, new TGeoTranslation(0., 0., z0)); + z0 -= dz; + asWiggle->AddNode(voWiggleC1, 2, new TGeoTranslation(0., 0., z0)); + z0 -= dz; + asWiggle->AddNode(voWiggleL2, 1, new TGeoTranslation(0., 0., z0)); + asWiggle->AddNode(voVacWiggleL2, 1, new TGeoTranslation(0., 0., z0)); + + // Positioning of the volumes + z0 = +dU / 2.; + float zsh = 4. * dz; + // for (int iw = 0; iw < 1; iw++) { + for (int iw = 0; iw < nc; iw++) { + float zpos = z0 - iw * zsh; + voBellow->AddNode(asWiggle, iw + 1, new TGeoTranslation(0., 0., zpos)); + } + return voBellow; +} + +TGeoVolume* PipeRun4::makeSupportBar(const char* tag, float Rin, float Rout, float length, float skinLength) +{ + // + // make a support bar with the specified dimensions of the collar and arms + // + + // Dimensions : + + const float kSupportXdim = length; // 20.67; + const float kBeamPipeRingZdim = 5.25 / 2; + /* thin layer of material between pipe and support; to be put back later */ + const float kVespelRmax = Rin + 0.08; + const float kVespelRmin = Rin; + const float kBeampipeCarbonCollarRmin = Rin + 0.18; // 2.4; + const float kBeampipeCarbonCollarRmax = Rout; // 2.7; + + const float kFixationCarbonCollarRmin = 1.5; + const float kFixationCarbonCollarRmax = 1.7; + const float kFixationCarbonCollarDZ = 2.5; + + const float kSkinThickness = 0.1; + const float kSkinXdim = skinLength; // 14.25; + const float kSkinYdim = 1.; + const float kSkinZdim = kFixationCarbonCollarDZ; + const float kCarbonEarsXdim = 1.01; + const float kCarbonEarsYdim = 0.2; + const float kCarbonEarsZdim = kFixationCarbonCollarDZ; + + const TGeoMedium* kMedRohacell = gGeoManager->GetMedium("PIPE_ROHACELL"); + const TGeoMedium* kMedPolyimide = gGeoManager->GetMedium("PIPE_POLYIMIDE"); + const TGeoMedium* kMedCarbonFiber = gGeoManager->GetMedium("PIPE_M55J6K"); + + TGeoVolume* beamPipeSupport = new TGeoVolumeAssembly(Form("BeampipeSupport_%s", tag)); + + // Support Bar + TGeoVolumeAssembly* supportBar = new TGeoVolumeAssembly(Form("BPS_SupportBar_%s", tag)); + + TGeoBBox* carbonSkinBPS = new TGeoBBox(kSkinXdim / 2., kSkinYdim / 2., kSkinZdim / 2.); + carbonSkinBPS->SetName(Form("carbonSkinBPS_%s", tag)); + + TGeoBBox* foambarBPS = new TGeoBBox(Form("foambarBPS_%s", tag), kSkinXdim / 2. - kSkinThickness, kSkinYdim / 2. - kSkinThickness, + kSkinZdim / 2. - kSkinThickness / 2.); + TGeoBBox* carbonEarsBPS = new TGeoBBox(kCarbonEarsXdim / 2., kCarbonEarsYdim / 2., kCarbonEarsZdim / 2.); + carbonEarsBPS->SetName(Form("carbonEarsBPS_%s", tag)); + + // TODO: could reuse those?.. + TGeoTranslation* transBP1 = new TGeoTranslation(Form("transBP1_%s", tag), (kSkinXdim + kCarbonEarsXdim) / 2., 0., 0.); + transBP1->RegisterYourself(); + TGeoTranslation* transBP2 = new TGeoTranslation(Form("transBP2_%s", tag), -(kSkinXdim + kCarbonEarsXdim) / 2., 0., 0.); + transBP2->RegisterYourself(); + TGeoCompositeShape* supportBarCarbon = new TGeoCompositeShape( + Form("BPS_supportBarCarbon_%s", tag), Form("(carbonSkinBPS_%s-foambarBPS_%s)+carbonEarsBPS_%s:transBP1_%s+carbonEarsBPS_%s:transBP2_%s", tag, tag, tag, tag, tag, tag)); + + TGeoVolume* supportBarCarbonVol = new TGeoVolume(Form("BPS_supportBarCarbon_%s", tag), supportBarCarbon, kMedCarbonFiber); + supportBarCarbonVol->SetLineColor(kGray + 3); + + supportBar->AddNode(supportBarCarbonVol, 1, new TGeoTranslation(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax, 0, 0)); + supportBar->AddNode(supportBarCarbonVol, 2, new TGeoTranslation(-(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax), 0, 0)); + + TGeoVolume* foamVol = new TGeoVolume(Form("supportBarFoam_%s", tag), foambarBPS, kMedRohacell); + foamVol->SetLineColor(kGray); + supportBar->AddNode(foamVol, 1, new TGeoTranslation(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax, 0, 0)); + supportBar->AddNode(foamVol, 2, new TGeoTranslation(-(kSkinXdim / 2. + kCarbonEarsXdim + kBeampipeCarbonCollarRmax), 0, 0)); + + beamPipeSupport->AddNode(supportBar, 1); + + // Fixation to wings + TGeoVolumeAssembly* fixationToWings = new TGeoVolumeAssembly(Form("BPS_fixationToWings_%s", tag)); + + float delatX = 0.1; + + TGeoTubeSeg* fixationTube = new TGeoTubeSeg(kFixationCarbonCollarRmin, kFixationCarbonCollarRmax, kFixationCarbonCollarDZ / 2., -90., 90.); + fixationTube->SetName(Form("fixationTube_%s", tag)); + TGeoBBox* fixationToBar = new TGeoBBox(kCarbonEarsXdim / 2. + delatX, kCarbonEarsYdim / 2., kCarbonEarsZdim / 2.); + fixationToBar->SetName(Form("fixationToBar_%s", tag)); + + TGeoTranslation* transBP3 = new TGeoTranslation(Form("transBP3_%s", tag), kFixationCarbonCollarRmax + kCarbonEarsXdim / 2. - delatX, kCarbonEarsYdim, 0.); + transBP3->RegisterYourself(); + TGeoTranslation* transBP4 = new TGeoTranslation(Form("transBP4_%s", tag), kFixationCarbonCollarRmax + kCarbonEarsXdim / 2. - delatX, -kCarbonEarsYdim, 0.); + transBP4->RegisterYourself(); + TGeoCompositeShape* fixationToWing = new TGeoCompositeShape(Form("fixationToWing_%s", tag), Form("fixationTube_%s+fixationToBar_%s:transBP3_%s+fixationToBar_%s:transBP4_%s", tag, tag, tag, tag, tag)); + + TGeoVolume* fixationToWingVol = new TGeoVolume(Form("fixationToWing_%s", tag), fixationToWing, kMedCarbonFiber); + fixationToWingVol->SetLineColor(kGray + 2); + + fixationToWings->AddNode(fixationToWingVol, 1, new TGeoTranslation(-kSupportXdim, 0, 0)); + fixationToWings->AddNode(fixationToWingVol, 2, new TGeoCombiTrans(+kSupportXdim, 0, 0, new TGeoRotation("rot", 0., 0., 180.))); + + beamPipeSupport->AddNode(fixationToWings, 1); + + // Fixation to pipe + + TGeoVolumeAssembly* fixationToPipe = new TGeoVolumeAssembly(Form("fixationToPipe_%s", tag)); + + TGeoTubeSeg* pipeSupportTubeCarbon = new TGeoTubeSeg(kBeampipeCarbonCollarRmin, kBeampipeCarbonCollarRmax, kFixationCarbonCollarDZ / 2., 0., 180.); + pipeSupportTubeCarbon->SetName(Form("pipeSupportTubeCarbon_%s", tag)); + + TGeoBBox* fixationTubeToBar = new TGeoBBox(kCarbonEarsXdim / 2. + delatX, kCarbonEarsYdim / 2., kCarbonEarsZdim / 2.); + fixationTubeToBar->SetName(Form("fixationTubeToBar_%s", tag)); + TGeoBBox* hole = new TGeoBBox((kBeampipeCarbonCollarRmax - kVespelRmin) / 2., kCarbonEarsYdim / 2., kCarbonEarsZdim / 2. + 1e-3); + hole->SetName(Form("hole_%s", tag)); + + TGeoTranslation* transBP5 = new TGeoTranslation(Form("transBP5_%s", tag), kBeampipeCarbonCollarRmax + kCarbonEarsXdim / 2. - delatX, kCarbonEarsYdim, 0.); + transBP5->RegisterYourself(); + TGeoTranslation* transBP6 = new TGeoTranslation(Form("transBP6_%s", tag), -(kBeampipeCarbonCollarRmax + kCarbonEarsXdim / 2. - delatX), kCarbonEarsYdim, 0.); + transBP6->RegisterYourself(); + TGeoTranslation* transBP7 = new TGeoTranslation(Form("transBP7_%s", tag), (kBeampipeCarbonCollarRmax + kVespelRmin) / 2., 0., 0.); + transBP7->RegisterYourself(); + TGeoTranslation* transBP8 = new TGeoTranslation(Form("transBP8_%s", tag), -((kBeampipeCarbonCollarRmax + kVespelRmin) / 2.), 0., 0.); + transBP8->RegisterYourself(); + TGeoCompositeShape* halfFixationToPipe = new TGeoCompositeShape( + Form("halfFixationToPipe_%s", tag), + Form("(pipeSupportTubeCarbon_%s-hole_%s:transBP7_%s-hole_%s:transBP8_%s)+fixationTubeToBar_%s:transBP5_%s+fixationTubeToBar_%s:transBP6_%s", tag, tag, tag, tag, tag, tag, tag, tag, tag)); + + TGeoVolume* halfFixationToPipeVol = new TGeoVolume(Form("halfFixationToPipe_%s", tag), halfFixationToPipe, kMedCarbonFiber); + halfFixationToPipeVol->SetLineColor(kRed + 2); + + fixationToPipe->AddNode(halfFixationToPipeVol, 1); + fixationToPipe->AddNode(halfFixationToPipeVol, 2, new TGeoCombiTrans(0, 0, 0, new TGeoRotation("rot", 0., 0., 180.))); + + beamPipeSupport->AddNode(fixationToPipe, 1); + + // Beam Pipe Ring + + TGeoVolumeAssembly* beamPipeRing = new TGeoVolumeAssembly(Form("beamPipeRing_%s", tag)); + + TGeoTube* beamPipeRingCarbon = new TGeoTube(kVespelRmax, kBeampipeCarbonCollarRmin, kBeamPipeRingZdim / 2.); + TGeoVolume* beamPipeRingCarbonVol = new TGeoVolume(Form("beamPipeRingCarbon_%s", tag), beamPipeRingCarbon, kMedCarbonFiber); + beamPipeRingCarbonVol->SetLineColor(kGreen + 2); + beamPipeRing->AddNode(beamPipeRingCarbonVol, 1, new TGeoTranslation(0., 0, (kBeamPipeRingZdim - kFixationCarbonCollarDZ) / 2.)); + + TGeoTube* beamPipeRingVespel = new TGeoTube(kVespelRmin, kVespelRmax, kBeamPipeRingZdim / 2.); + TGeoVolume* beamPipeRingVespelVol = new TGeoVolume(Form("beamPipeRingVespel_%s", tag), beamPipeRingVespel, kMedPolyimide); + beamPipeRingVespelVol->SetLineColor(kGreen + 4); + beamPipeRing->AddNode(beamPipeRingVespelVol, 1, new TGeoTranslation(0., 0, (kBeamPipeRingZdim - kFixationCarbonCollarDZ) / 2.)); + + beamPipeSupport->AddNode(beamPipeRing, 1); + beamPipeSupport->SetVisibility(0); + + return beamPipeSupport; +} + +// ---------------------------------------------------------------------------- +FairModule* PipeRun4::CloneModule() const { return new PipeRun4(*this); } +ClassImp(o2::passive::PipeRun4); diff --git a/macro/build_geometry.C b/macro/build_geometry.C index af74e7860ba3d..12d3842239874 100644 --- a/macro/build_geometry.C +++ b/macro/build_geometry.C @@ -22,6 +22,7 @@ #include "DetectorsPassive/Shil.h" #include "DetectorsPassive/Hall.h" #include "DetectorsPassive/Pipe.h" +#include "DetectorsPassive/PipeRun4.h" #include #include #include @@ -153,8 +154,8 @@ void build_geometry(FairRunSim* run = nullptr) // beam pipe if (isActivated("PIPE")) { #ifdef ENABLE_UPGRADES - if (isActivated("IT3")) { - run->AddModule(new o2::passive::Pipe("PIPE", "Beam pipe", 1.6f, 0.05f)); + if (isActivated("IT3") || isActivated("FOC")) { + run->AddModule(new o2::passive::PipeRun4("PIPE", "Beam pipe for Run4")); } else { run->AddModule(new o2::passive::Pipe("PIPE", "Beam pipe")); } From 660b65ebab760a34397e7460c2b1a88b4379b7d4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 6 Feb 2025 02:26:28 +0100 Subject: [PATCH 0064/1914] GPU: Cleanup: Can now assume that slice data are allways on GPU --- .../Global/GPUChainTrackingSliceTracker.cxx | 17 ++++---------- .../SliceTracker/GPUTPCSliceData.cxx | 23 +++++-------------- .../SliceTracker/GPUTPCSliceData.h | 3 +-- .../SliceTracker/GPUTPCTracker.cxx | 11 ++------- GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 4 ---- 5 files changed, 14 insertions(+), 44 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index 174b3757d3307..94d5e87846b63 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -64,7 +64,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() GPUInfo("Running TPC Slice Tracker"); } bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSliceTracking; - bool doSliceDataOnGPU = processors()->tpcTrackers[0].SliceDataOnGPU(); if (!param().par.earlyTpcTransform) { for (uint32_t i = 0; i < NSLICES; i++) { processors()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); @@ -93,7 +92,6 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { SetupGPUProcessor(&processors()->tpcTrackers[iSlice], false); // Prepare custom allocation for 1st stack level mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSlice].MemoryResSliceScratch()); - mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSlice].MemoryResSliceInput()); } mRec->PushNonPersistentMemory(qStr2Tag("TPCSLTRK")); for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { @@ -173,7 +171,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Slice Data (Slice %d)", iSlice); } - if (doSliceDataOnGPU) { + if (doGPU) { TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; @@ -194,7 +192,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (GetProcessingSettings().debugLevel >= 6) { *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; if (GetProcessingSettings().debugMask & 1) { - if (doSliceDataOnGPU) { + if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); } trk.DumpSliceData(*mDebugFile); @@ -205,15 +203,10 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); } - if (GetProcessingSettings().keepDisplayMemory && !doSliceDataOnGPU) { - memset((void*)trk.Data().HitWeights(), 0, trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - } else { - runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - } + runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - // Copy Data to GPU Global Memory - if (!doSliceDataOnGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); + if (!doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory } if (GPUDebug("Initialization (3)", useStream)) { throw std::runtime_error("memcpy failure"); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index e02cba2144920..8a727dc2da930 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -58,20 +58,6 @@ void GPUTPCSliceData::SetMaxData() mNumberOfHitsPlusAlign = GPUProcessor::nextMultipleOf<(kVectorAlignment > GPUCA_ROWALIGNMENT ? kVectorAlignment : GPUCA_ROWALIGNMENT) / sizeof(int32_t)>(hitMemCount); } -void* GPUTPCSliceData::SetPointersInput(void* mem, bool idsOnGPU, bool sliceDataOnGPU) -{ - if (sliceDataOnGPU) { - return mem; - } - const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t); - GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign); - GPUProcessor::computePointerWithAlignment(mem, mFirstHitInBin, firstHitInBinSize); - if (idsOnGPU) { - mem = SetPointersClusterIds(mem, false); // Hijack the allocation from SetPointersClusterIds - } - return mem; -} - void* GPUTPCSliceData::SetPointersLinks(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mLinkUpData, mNumberOfHitsPlusAlign); @@ -85,10 +71,13 @@ void* GPUTPCSliceData::SetPointersWeights(void* mem) return mem; } -void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU, bool sliceDataOnGPU) +void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU) { - if (sliceDataOnGPU) { - mem = SetPointersInput(mem, idsOnGPU, false); + const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t); + GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign); + GPUProcessor::computePointerWithAlignment(mem, mFirstHitInBin, firstHitInBinSize); + if (idsOnGPU) { + mem = SetPointersClusterIds(mem, false); // Hijack the allocation from SetPointersClusterIds } return mem; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h index 72e9f9d2c19d5..200a123b9bb83 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h @@ -38,8 +38,7 @@ class GPUTPCSliceData void InitializeRows(const GPUParam& p); void SetMaxData(); void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); - void* SetPointersInput(void* mem, bool idsOnGPU, bool sliceDataOnGPU); - void* SetPointersScratch(void* mem, bool idsOnGPU, bool sliceDataOnGPU); + void* SetPointersScratch(void* mem, bool idsOnGPU); void* SetPointersLinks(void* mem); void* SetPointersWeights(void* mem); void* SetPointersClusterIds(void* mem, bool idsOnGPU); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index d5a941b333c6e..df0c7813fa0db 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -40,7 +40,7 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) GPUTPCTracker::GPUTPCTracker() - : GPUProcessor(), mLinkTmpMemory(nullptr), mISlice(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSliceScratch(-1), mMemoryResSliceInput(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) + : GPUProcessor(), mLinkTmpMemory(nullptr), mISlice(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSliceScratch(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) { } @@ -62,15 +62,9 @@ void GPUTPCTracker::InitializeProcessor() SetupCommonMemory(); } -bool GPUTPCTracker::SliceDataOnGPU() -{ - return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) && (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) && (mRec->GetConstantMem().ioPtrs.clustersNative || mRec->GetConstantMem().ioPtrs.tpcZS || mRec->GetConstantMem().ioPtrs.tpcPackedDigits); -} - -void* GPUTPCTracker::SetPointersDataInput(void* mem) { return mData.SetPointersInput(mem, mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging, SliceDataOnGPU()); } void* GPUTPCTracker::SetPointersDataLinks(void* mem) { return mData.SetPointersLinks(mem); } void* GPUTPCTracker::SetPointersDataWeights(void* mem) { return mData.SetPointersWeights(mem); } -void* GPUTPCTracker::SetPointersDataScratch(void* mem) { return mData.SetPointersScratch(mem, mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging, SliceDataOnGPU()); } +void* GPUTPCTracker::SetPointersDataScratch(void* mem) { return mData.SetPointersScratch(mem, mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging); } void* GPUTPCTracker::SetPointersDataRows(void* mem) { return mData.SetPointersRows(mem); } void* GPUTPCTracker::SetPointersScratch(void* mem) @@ -108,7 +102,6 @@ void GPUTPCTracker::RegisterMemoryAllocation() GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceLinks", reLinks); mMemoryResSliceScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSliceScratch"); - mMemoryResSliceInput = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataInput, GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSliceInput"); GPUMemoryReuse reWeights{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataWeights, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataWeights, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceWeights", reWeights); GPUMemoryReuse reScratch{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerScratch, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index 10259c80ac80c..576688a429c66 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -114,8 +114,6 @@ class GPUTPCTracker : public GPUProcessor } void SetupCommonMemory(); - bool SliceDataOnGPU(); - void* SetPointersDataInput(void* mem); void* SetPointersDataLinks(void* mem); void* SetPointersDataWeights(void* mem); void* SetPointersDataScratch(void* mem); @@ -133,7 +131,6 @@ class GPUTPCTracker : public GPUProcessor int16_t MemoryResTracklets() const { return mMemoryResTracklets; } int16_t MemoryResOutput() const { return mMemoryResOutput; } int16_t MemoryResSliceScratch() const { return mMemoryResSliceScratch; } - int16_t MemoryResSliceInput() const { return mMemoryResSliceInput; } void SetMaxData(const GPUTrackingInOutPointers& io); void UpdateMaxData(); @@ -257,7 +254,6 @@ class GPUTPCTracker : public GPUProcessor int16_t mMemoryResTracklets; int16_t mMemoryResOutput; int16_t mMemoryResSliceScratch; - int16_t mMemoryResSliceInput; // GPU Temp Arrays GPUglobalref() int32_t* mRowStartHitCountOffset; // Offset, length and new offset of start hits in row From 558dba6d1dd3c98575861cd74d4dc09423296555 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 6 Feb 2025 02:30:58 +0100 Subject: [PATCH 0065/1914] GPU: Rename confusing globalTracking option to extrapolationTracking --- GPU/GPUTracking/CMakeLists.txt | 2 +- .../Definitions/GPUDefGPUParameters.h | 24 +++++----- GPU/GPUTracking/Definitions/GPUSettingsList.h | 16 +++---- GPU/GPUTracking/Global/GPUChainTracking.h | 2 +- .../Global/GPUChainTrackingMerger.cxx | 6 +-- .../Global/GPUChainTrackingSliceTracker.cxx | 42 +++++++--------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 24 +++++----- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h | 32 ++++++------- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 4 +- .../Merger/GPUTPCGlobalDebugSortKernels.h | 4 +- ...ng.cxx => GPUTPCExtrapolationTracking.cxx} | 48 +++++++++---------- ...acking.h => GPUTPCExtrapolationTracking.h} | 18 +++---- GPU/GPUTracking/SliceTracker/GPUTPCTrack.h | 2 +- GPU/GPUTracking/SliceTracker/GPUTPCTracker.h | 2 +- .../GPUTPCTrackletConstructor.cxx | 4 +- .../SliceTracker/GPUTPCTrackletConstructor.h | 2 +- GPU/GPUTracking/Standalone/tools/dump.C | 2 +- GPU/GPUTracking/display/GPUDisplay.cxx | 14 +++--- GPU/GPUTracking/display/GPUDisplay.h | 4 +- .../display/frontend/GPUDisplayKeys.cxx | 10 ++-- .../display/helpers/GPUDisplayColors.inc | 2 +- .../display/helpers/GPUDisplayHelpers.cxx | 2 +- .../display/render/GPUDisplayDraw.cxx | 12 ++--- GPU/GPUTracking/kernels.cmake | 10 ++-- 27 files changed, 145 insertions(+), 153 deletions(-) rename GPU/GPUTracking/SliceTracker/{GPUTPCGlobalTracking.cxx => GPUTPCExtrapolationTracking.cxx} (74%) rename GPU/GPUTracking/SliceTracker/{GPUTPCGlobalTracking.h => GPUTPCExtrapolationTracking.h} (68%) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 6acc7fd1dd537..a5d335931af37 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -45,7 +45,7 @@ set(SRCS SliceTracker/GPUTPCGrid.cxx SliceTracker/GPUTPCTrackletSelector.cxx SliceTracker/GPUTPCRow.cxx - SliceTracker/GPUTPCGlobalTracking.cxx + SliceTracker/GPUTPCExtrapolationTracking.cxx SliceTracker/GPUTPCCreateSliceData.cxx Merger/GPUTPCGMMerger.cxx Merger/GPUTPCGMSliceTrack.cxx diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index d246f77a50290..cebc74fcc4a5b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -39,7 +39,7 @@ #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCGlobalTracking 256 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE @@ -60,7 +60,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 - #define GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 512 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 @@ -105,7 +105,7 @@ #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCGlobalTracking 256 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE @@ -126,7 +126,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 512 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 @@ -171,7 +171,7 @@ #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCGlobalTracking 128, 4 + #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE @@ -192,7 +192,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 @@ -237,7 +237,7 @@ #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCGlobalTracking 192, 2 + #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE @@ -258,7 +258,7 @@ #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 @@ -312,8 +312,8 @@ #ifndef GPUCA_LB_GPUTPCNeighboursCleaner #define GPUCA_LB_GPUTPCNeighboursCleaner 256 #endif - #ifndef GPUCA_LB_GPUTPCGlobalTracking - #define GPUCA_LB_GPUTPCGlobalTracking 256 + #ifndef GPUCA_LB_GPUTPCExtrapolationTracking + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 #endif #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 512 @@ -414,8 +414,8 @@ #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks - #define GPUCA_LB_GPUTPCGMMergerLinkGlobalTracks 256 + #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerCollect #define GPUCA_LB_GPUTPCGMMergerCollect 256 diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index ca6f2f370300e..cd2916f5df679 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -90,8 +90,8 @@ AddOptionRTC(extraClusterErrorFactorSplitPadSharedSingleY2, float, 3.0f, "", 0, AddOptionRTC(extraClusterErrorSplitTimeSharedSingleZ2, float, 0.03f, "", 0, "Additive extra cluster error for Z2 if splittime, shared, or single set") AddOptionRTC(extraClusterErrorFactorSplitTimeSharedSingleZ2, float, 3.0f, "", 0, "Multiplicative extra cluster error for Z2 if splittime, shared, or single set") AddOptionArray(errorsCECrossing, float, 5, (0.f, 0.f, 0.f, 0.f, 0.f), "", 0, "Extra errors to add to track when crossing CE, depending on addErrorsCECrossing") // BUG: CUDA cannot yet handle AddOptionArrayRTC -AddOptionRTC(globalTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for global track candidates") -AddOptionRTC(globalTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for global track candidates") +AddOptionRTC(extrapolationTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") +AddOptionRTC(extrapolationTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") AddOptionRTC(trackFollowingYFactor, float, 4.f, "", 0, "Weight of y residual vs z residual in tracklet constructor") AddOptionRTC(trackMergerFactor2YS, float, 1.5f * 1.5f, "", 0, "factor2YS for track merging") AddOptionRTC(trackMergerFactor2ZT, float, 1.5f * 1.5f, "", 0, "factor2ZT for track merging") @@ -116,9 +116,9 @@ AddOptionRTC(mergerLooperSecondHorizontalQPtB5Limit, uint8_t, 2, "", 0, "Min Q/P AddOptionRTC(trackFollowingMaxRowGap, uint8_t, 4, "", 0, "Maximum number of consecutive rows without hit in track following") AddOptionRTC(trackFollowingMaxRowGapSeed, uint8_t, 2, "", 0, "Maximum number of consecutive rows without hit in track following during fit of seed") AddOptionRTC(trackFitMaxRowMissedHard, uint8_t, 10, "", 0, "Hard limit for number of missed rows in fit / propagation") -AddOptionRTC(globalTrackingRowRange, uint8_t, 45, "", 0, "Number of rows from the upped/lower limit to search for global track candidates in for") -AddOptionRTC(globalTrackingMinRows, uint8_t, 10, "", 0, "Min num of rows an additional global track must span over") -AddOptionRTC(globalTrackingMinHits, uint8_t, 8, "", 0, "Min num of hits for an additional global track") +AddOptionRTC(extrapolationTrackingRowRange, uint8_t, 45, "", 0, "Number of rows from the upped/lower limit to search for extrapolated track candidates in for") +AddOptionRTC(extrapolationTrackingMinRows, uint8_t, 10, "", 0, "Min num of rows an additional extrapolated track must span over") +AddOptionRTC(extrapolationTrackingMinHits, uint8_t, 8, "", 0, "Min num of hits for an additional extrapolated track") AddOptionRTC(noisyPadsQuickCheck, uint8_t, 1, "", 0, "Only check first fragment for noisy pads instead of all fragments (when test is enabled).") AddOptionRTC(cfQMaxCutoff, uint8_t, 3, "", 0, "Cluster Finder rejects cluster with qmax below or equal to this threshold") AddOptionRTC(cfQTotCutoff, uint8_t, 5, "", 0, "Cluster Finder rejects cluster with qtot below or equal to this threshold") @@ -134,7 +134,7 @@ AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or AddOptionRTC(rejectIFCLowRadiusCluster, uint8_t, 0, "", 0, "Reject clusters that get the IFC mask error during refit") AddOptionRTC(dEdxTruncLow, uint8_t, 2, "", 0, "Low truncation threshold, fraction of 128") AddOptionRTC(dEdxTruncHigh, uint8_t, 77, "", 0, "High truncation threshold, fraction of 128") -AddOptionRTC(globalTracking, int8_t, 1, "", 0, "Enable Global Tracking (prolong tracks to adjacent sectors to find short segments)") +AddOptionRTC(extrapolationTracking, int8_t, 1, "", 0, "Enable Extrapolation Tracking (prolong tracks to adjacent sectors to find short segments)") AddOptionRTC(disableRefitAttachment, uint8_t, 0, "", 0, "Bitmask to disable certain attachment steps during refit (1: attachment, 2: propagation, 4: loop following, 8: mirroring)") AddOptionRTC(rejectionStrategy, uint8_t, o2::gpu::GPUSettings::RejectionStrategyA, "", 0, "Enable rejection of TPC clusters for compression (0 = no, 1 = strategy A, 2 = strategy B)") AddOptionRTC(mergeLoopersAfterburner, uint8_t, 1, "", 0, "Run afterburner for additional looper merging") @@ -315,7 +315,7 @@ AddOption(drawInitLinks, bool, false, "", 0, "Highlight cleaned-up links") AddOption(drawSeeds, bool, false, "", 0, "Highlight seeds") AddOption(drawTracklets, bool, false, "", 0, "Highlight tracklets") AddOption(drawTracks, bool, false, "", 0, "Highlight sector tracks") -AddOption(drawGlobalTracks, bool, false, "", 0, "Highlight global sector tracks prolonged into adjacent sector") +AddOption(drawExtrapolatedTracks, bool, false, "", 0, "Highlight global sector tracks prolonged into adjacent sector") AddOption(drawFinal, bool, false, "", 0, "Highlight final tracks") AddOption(excludeClusters, int32_t, 0, "", 0, "Exclude clusters from selected draw objects from display, (2 = exclude clusters but still show tracks)") AddOption(drawSlice, int32_t, -1, "", 0, "Show individual slice") @@ -349,7 +349,7 @@ AddOption(drawTracksAndFilter, bool, false, "", 0, "Use AND filter instead of OR AddOption(propagateLoopers, bool, false, "", 0, "Enabale propagation of loopers") AddOption(clustersOnly, bool, false, "", 0, "Visualize clusters only") AddOption(clustersOnNominalRow, bool, false, "", 0, "Show clusters at nominal x of pad row for early-transformed data") -AddOption(separateGlobalTracks, bool, false, "", 0, "Draw track segments propagated to adjacent sectors separately") +AddOption(separateExtrapolatedTracks, bool, false, "", 0, "Draw track segments propagated to adjacent sectors separately") AddOption(splitCETracks, int8_t, -1, "", 0, "Split CE tracks when they cross the central electrode (-1 = for triggered data)") AddOption(markClusters, int32_t, 0, "", 0, "Mark clusters") AddOption(markFakeClusters, int32_t, 0, "", 0, "Mark fake clusters") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index d827b095773b1..6e5e0b3048140 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -233,7 +233,7 @@ class GPUChainTracking : public GPUChain int32_t ReadEvent(uint32_t iSlice, int32_t threadId); void WriteOutput(int32_t iSlice, int32_t threadId); - int32_t GlobalTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput = true); + int32_t ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput = true); int32_t PrepareProfile(); int32_t DoProfile(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 0831b260f881d..f28b99c0d8dd0 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -161,11 +161,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridAuto(0, deviceType)); + runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{1, -WarpSize(), 0, deviceType}}, 1); - runKernel({{1, -WarpSize(), 0, deviceType}}, 1); + runKernel({{1, -WarpSize(), 0, deviceType}}, 1); + runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index 94d5e87846b63..b68f0797f425f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -24,21 +24,13 @@ using namespace o2::gpu; -int32_t GPUChainTracking::GlobalTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) { - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("GPU Tracker running Global Tracking for slice %u on thread %d\n", iSlice, threadId); - } - - runKernel({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}}); + runKernel({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}}); TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams()); if (synchronizeOutput) { SynchronizeStream(iSlice % mRec->NStreams()); } - - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("GPU Tracker finished Global Tracking for slice %u on thread %d\n", iSlice, threadId); - } return (0); } @@ -253,7 +245,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { runKernel({GetGridAuto(useStream), {iSlice}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); + runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSlice}}); } @@ -310,7 +302,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() GPUInfo("Running TPC Tracklet selector (Stream %d, Slice %d to %d)", useStream, iSlice, iSlice + runSlices); } runKernel({GetGridAuto(useStream), {iSlice, runSlices}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, runSlices); + runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, runSlices); for (uint32_t k = iSlice; k < iSlice + runSlices; k++) { if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {k}}); @@ -330,7 +322,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() std::array transferRunning; transferRunning.fill(true); if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { - if (param().rec.tpc.globalTracking) { + if (param().rec.tpc.extrapolationTracking) { mWriteOutputDone.fill(0); } @@ -379,14 +371,14 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } mSliceSelectorReady = iSlice; - if (param().rec.tpc.globalTracking) { + if (param().rec.tpc.extrapolationTracking) { for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a++) { - uint32_t tmpSlice2 = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(tmpSlice2a); + uint32_t tmpSlice2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(tmpSlice2a); uint32_t sliceLeft, sliceRight; - GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); + GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); if (tmpSlice2 <= iSlice && sliceLeft <= iSlice && sliceRight <= iSlice && mWriteOutputDone[tmpSlice2] == 0) { - GlobalTracking(tmpSlice2, 0); + ExtrapolationTracking(tmpSlice2, 0); WriteOutput(tmpSlice2, 0); mWriteOutputDone[tmpSlice2] = 1; } @@ -396,7 +388,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } } - if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.globalTracking) { + if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { std::vector blocking(NSLICES * mRec->NStreams()); for (int32_t i = 0; i < NSLICES; i++) { for (int32_t j = 0; j < mRec->NStreams(); j++) { @@ -404,10 +396,10 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t tmpSlice = GPUTPCGlobalTracking::GlobalTrackingSliceOrder(iSlice); + uint32_t tmpSlice = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(iSlice); if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { uint32_t sliceLeft, sliceRight; - GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); + GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); if (doGPU && !blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()]) { StreamWaitForEvents(tmpSlice % mRec->NStreams(), &mEvents->slice[sliceLeft]); blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()] = true; @@ -417,7 +409,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()] = true; } } - GlobalTracking(tmpSlice, 0, false); + ExtrapolationTracking(tmpSlice, 0, false); } } for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { @@ -429,8 +421,8 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() mSliceSelectorReady = NSLICES; GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, NSLICES))) for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (param().rec.tpc.globalTracking) { - GlobalTracking(iSlice, 0); + if (param().rec.tpc.extrapolationTracking) { + ExtrapolationTracking(iSlice, 0); } if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { WriteOutput(iSlice, 0); @@ -439,9 +431,9 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() mRec->SetNestedLoopOmpFactor(1); } - if (param().rec.tpc.globalTracking && GetProcessingSettings().debugLevel >= 3) { + if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) { for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - GPUInfo("Slice %d - Tracks: Local %d Global %d - Hits: Local %d Global %d", iSlice, + GPUInfo("Slice %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSlice, processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSlice].CommonMemory()->nTrackHits); } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 60dd18a254904..a0b2c7b12246a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -502,7 +502,7 @@ GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, { const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = mMemory->firstGlobalTracks[iSlice]; + const GPUTPCTrack* sliceTr = mMemory->firstExtrapolatedTracks[iSlice]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; uint32_t nTracks = *trk.NTracks(); for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) { @@ -567,21 +567,21 @@ GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, track.SetNextNeighbour(-1); track.SetNextSegmentNeighbour(-1); track.SetPrevSegmentNeighbour(-1); - track.SetGlobalTrackId(0, -1); - track.SetGlobalTrackId(1, -1); + track.SetExtrapolatedTrackId(0, -1); + track.SetExtrapolatedTrackId(1, -1); uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); mTrackIDs[iSlice * mNMaxSingleSliceTracks + sliceTr->LocalTrackId()] = myTrack; mSliceTrackInfos[myTrack] = track; } } -GPUd() void GPUTPCGMMerger::LinkGlobalTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (int32_t itr = SliceTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SliceTrackInfoGlobalLast(NSLICES - 1); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& globalTrack = mSliceTrackInfos[itr]; - GPUTPCGMSliceTrack& localTrack = mSliceTrackInfos[globalTrack.LocalTrackId()]; - if (localTrack.GlobalTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.GlobalTrackIds()[0], -1, itr)) { - localTrack.SetGlobalTrackId(1, itr); + GPUTPCGMSliceTrack& extrapolatedTrack = mSliceTrackInfos[itr]; + GPUTPCGMSliceTrack& localTrack = mSliceTrackInfos[extrapolatedTrack.LocalTrackId()]; + if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) { + localTrack.SetExtrapolatedTrackId(1, itr); } } } @@ -1521,16 +1521,16 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread tr->SetLeg(leg); trackParts[nParts++] = tr; for (int32_t i = 0; i < 2; i++) { - if (tr->GlobalTrackId(i) != -1) { + if (tr->ExtrapolatedTrackId(i) != -1) { if (nParts >= kMaxParts) { break; } - if (nHits + mSliceTrackInfos[tr->GlobalTrackId(i)].NClusters() > kMaxClusters) { + if (nHits + mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { break; } - trackParts[nParts] = &mSliceTrackInfos[tr->GlobalTrackId(i)]; + trackParts[nParts] = &mSliceTrackInfos[tr->ExtrapolatedTrackId(i)]; trackParts[nParts++]->SetLeg(leg); - nHits += mSliceTrackInfos[tr->GlobalTrackId(i)].NClusters(); + nHits += mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); } } int32_t jtr = tr->NextSegmentNeighbour(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 3e4ae535fb740..7e309dcb79a9c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -76,7 +76,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; - const GPUTPCTrack* firstGlobalTracks[NSLICES]; + const GPUTPCTrack* firstExtrapolatedTracks[NSLICES]; GPUAtomic(uint32_t) tmpCounter[2 * NSLICES]; GPUAtomic(uint32_t) nLooperMatchCandidates; }; @@ -177,7 +177,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() void PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void LinkGlobalTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index c96fab2343d82..b6f11375328d0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -137,9 +137,9 @@ GPUdii() void GPUTPCGMMergerMergeCE::Thread<0>(int32_t nBlocks, int32_t nThreads } template <> -GPUdii() void GPUTPCGMMergerLinkGlobalTracks::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) +GPUdii() void GPUTPCGMMergerLinkExtrapolatedTracks::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.LinkGlobalTracks(nBlocks, nThreads, iBlock, iThread); + merger.LinkExtrapolatedTracks(nBlocks, nThreads, iBlock, iThread); } template <> diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index e1432830117c1..dec9befa25ce2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -126,7 +126,7 @@ class GPUTPCGMMergerMergeCE : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerLinkGlobalTracks : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerLinkExtrapolatedTracks : public GPUTPCGMMergerGeneral { public: template diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h index a2179b6c66b2a..47841a616a13e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h @@ -61,9 +61,9 @@ class GPUTPCGMSliceTrack GPUd() int32_t LocalTrackId() const { return mLocalTrackId; } GPUd() void SetLocalTrackId(int32_t v) { mLocalTrackId = v; } - GPUd() int32_t GlobalTrackId(int32_t n) const { return mGlobalTrackIds[n]; } - GPUd() void SetGlobalTrackId(int32_t n, int32_t v) { mGlobalTrackIds[n] = v; } - GPUd() int32_t* GlobalTrackIds() { return mGlobalTrackIds; } + GPUd() int32_t ExtrapolatedTrackId(int32_t n) const { return mExtrapolatedTrackIds[n]; } + GPUd() void SetExtrapolatedTrackId(int32_t n, int32_t v) { mExtrapolatedTrackIds[n] = v; } + GPUd() int32_t* ExtrapolatedTrackIds() { return mExtrapolatedTrackIds; } GPUd() float MaxClusterZT() const { return CAMath::Max(mClusterZT[0], mClusterZT[1]); } GPUd() float MinClusterZT() const { return CAMath::Min(mClusterZT[0], mClusterZT[1]); } @@ -126,19 +126,19 @@ class GPUTPCGMSliceTrack }; private: - const GPUTPCTrack* mOrigTrack; // pointer to original slice track - sliceTrackParam mParam; // Track parameters - sliceTrackParam mParam2; // Parameters at other side - float mTZOffset; // Z offset with early transform, T offset otherwise - float mAlpha; // alpha angle - float mClusterZT[2]; // Minimum maximum cluster Z / T - int32_t mNClusters; // N clusters - int32_t mNeighbour[2]; // - int32_t mSegmentNeighbour[2]; // - int32_t mLocalTrackId; // Corrected local track id in terms of GMSliceTracks array for global tracks, UNDEFINED for local tracks! - int32_t mGlobalTrackIds[2]; // IDs of associated global tracks - uint8_t mSlice; // slice of this track segment - uint8_t mLeg; // Leg of this track segment + const GPUTPCTrack* mOrigTrack; // pointer to original slice track + sliceTrackParam mParam; // Track parameters + sliceTrackParam mParam2; // Parameters at other side + float mTZOffset; // Z offset with early transform, T offset otherwise + float mAlpha; // alpha angle + float mClusterZT[2]; // Minimum maximum cluster Z / T + int32_t mNClusters; // N clusters + int32_t mNeighbour[2]; // + int32_t mSegmentNeighbour[2]; // + int32_t mLocalTrackId; // Corrected local track id in terms of GMSliceTracks array for extrapolated tracks, UNDEFINED for local tracks! + int32_t mExtrapolatedTrackIds[2]; // IDs of associated extrapolated tracks + uint8_t mSlice; // slice of this track segment + uint8_t mLeg; // Leg of this track segment ClassDefNV(GPUTPCGMSliceTrack, 1); }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index 9f6467923f56a..be057172a968f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -94,7 +94,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread -GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) +GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) { if (iThread || iBlock) { return; @@ -112,7 +112,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread -GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) +GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t parameter) { if (iBlock) { return; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h index 4daee67643cfd..7c3d4246ad303 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.h @@ -29,8 +29,8 @@ class GPUTPCGlobalDebugSortKernels : public GPUKernelTemplate enum K { defaultKernel = 0, clearIds = 0, sectorTracks = 1, - globalTracks1 = 2, - globalTracks2 = 3, + extrapolatedTracks1 = 2, + extrapolatedTracks2 = 3, borderTracks = 4 }; GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCMerging; } typedef GPUTPCGMMerger processorType; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx similarity index 74% rename from GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx rename to GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx index cdc72047ef0a4..1a5e99f0f52ca 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGlobalTracking.cxx +/// \file GPUTPCExtrapolationTracking.cxx /// \author David Rohr #include "GPUTPCDef.h" -#include "GPUTPCGlobalTracking.h" +#include "GPUTPCExtrapolationTracking.h" #include "GPUTPCTrackletConstructor.h" #include "GPUTPCTrackLinearisation.h" #include "GPUTPCTracker.h" @@ -22,7 +22,7 @@ using namespace o2::gpu; -GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) +GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) { @@ -68,8 +68,8 @@ GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tra } calink rowHits[GPUCA_ROW_COUNT]; - int32_t nHits = GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(tracker, smem, tParam, rowIndex, direction, 0, rowHits); - if (nHits >= tracker.Param().rec.tpc.globalTrackingMinHits) { + int32_t nHits = GPUTPCTrackletConstructor::GPUTPCTrackletConstructorExtrapolationTracking(tracker, smem, tParam, rowIndex, direction, 0, rowHits); + if (nHits >= tracker.Param().rec.tpc.extrapolationTrackingMinHits) { // GPUInfo("%d hits found", nHits); uint32_t hitId = CAMath::AtomicAdd(&tracker.CommonMemory()->nTrackHits, (uint32_t)nHits); if (hitId + nHits > tracker.NMaxTrackHits()) { @@ -91,7 +91,7 @@ GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tra if (rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL) { // GPUInfo("New track: entry %d, row %d, hitindex %d", i, rowIndex, mTrackletRowHits[rowIndex * tracker.CommonMemory()->nTracklets]); tracker.TrackHits()[hitId + i].Set(rowIndex, rowHit); - // if (i == 0) tParam.TransportToX(Row(rowIndex).X(), Param().bzCLight(), GPUCA_MAX_SIN_PHI); //Use transport with new linearisation, we have changed the track in between - NOT needed, fitting will always start at outer end of global track! + // if (i == 0) tParam.TransportToX(Row(rowIndex).X(), Param().bzCLight(), GPUCA_MAX_SIN_PHI); //Use transport with new linearisation, we have changed the track in between - NOT needed, fitting will always start at outer end of the extrapolated track! i++; } rowIndex++; @@ -115,42 +115,42 @@ GPUd() int32_t GPUTPCGlobalTracking::PerformGlobalTrackingRun(GPUTPCTracker& tra track.SetLocalTrackId((sliceSource.ISlice() << 24) | sliceSource.Tracks()[iTrack].LocalTrackId()); } - return (nHits >= tracker.Param().rec.tpc.globalTrackingMinHits); + return (nHits >= tracker.Param().rec.tpc.extrapolationTrackingMinHits); } -GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) +GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) { for (int32_t i = iBlock * nThreads + iThread; i < tracker.CommonMemory()->nLocalTracks; i += nThreads * nBlocks) { { const int32_t tmpHit = tracker.Tracks()[i].FirstHitID(); - if (tracker.TrackHits()[tmpHit].RowIndex() >= tracker.Param().rec.tpc.globalTrackingMinRows && tracker.TrackHits()[tmpHit].RowIndex() < tracker.Param().rec.tpc.globalTrackingRowRange) { + if (tracker.TrackHits()[tmpHit].RowIndex() >= tracker.Param().rec.tpc.extrapolationTrackingMinRows && tracker.TrackHits()[tmpHit].RowIndex() < tracker.Param().rec.tpc.extrapolationTrackingRowRange) { int32_t rowIndex = tracker.TrackHits()[tmpHit].RowIndex(); const GPUTPCRow& GPUrestrict() row = tracker.Row(rowIndex); float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); - if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.globalTrackingYRangeLower) { + if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformGlobalTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); } - if (right && Y > row.MaxY() * tracker.Param().rec.tpc.globalTrackingYRangeLower) { + if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformGlobalTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); } } } { const int32_t tmpHit = tracker.Tracks()[i].FirstHitID() + tracker.Tracks()[i].NHits() - 1; - if (tracker.TrackHits()[tmpHit].RowIndex() < GPUCA_ROW_COUNT - tracker.Param().rec.tpc.globalTrackingMinRows && tracker.TrackHits()[tmpHit].RowIndex() >= GPUCA_ROW_COUNT - tracker.Param().rec.tpc.globalTrackingRowRange) { + if (tracker.TrackHits()[tmpHit].RowIndex() < GPUCA_ROW_COUNT - tracker.Param().rec.tpc.extrapolationTrackingMinRows && tracker.TrackHits()[tmpHit].RowIndex() >= GPUCA_ROW_COUNT - tracker.Param().rec.tpc.extrapolationTrackingRowRange) { int32_t rowIndex = tracker.TrackHits()[tmpHit].RowIndex(); const GPUTPCRow& GPUrestrict() row = tracker.Row(rowIndex); float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); - if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.globalTrackingYRangeUpper) { + if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformGlobalTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); } - if (right && Y > row.MaxY() * tracker.Param().rec.tpc.globalTrackingYRangeUpper) { + if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformGlobalTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); } } } @@ -158,7 +158,7 @@ GPUd() void GPUTPCGlobalTracking::PerformGlobalTracking(int32_t nBlocks, int32_t } template <> -GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCExtrapolationTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); @@ -173,11 +173,11 @@ GPUdii() void GPUTPCGlobalTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, sliceLeft += GPUDataTypes::NSLICES / 2; sliceRight += GPUDataTypes::NSLICES / 2; } - PerformGlobalTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceLeft], smem, tracker, true); - PerformGlobalTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceRight], smem, tracker, false); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceLeft], smem, tracker, true); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceRight], smem, tracker, false); } -GPUd() int32_t GPUTPCGlobalTracking::GlobalTrackingSliceOrder(int32_t iSlice) +GPUd() int32_t GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(int32_t iSlice) { iSlice++; if (iSlice == GPUDataTypes::NSLICES / 2) { @@ -189,7 +189,7 @@ GPUd() int32_t GPUTPCGlobalTracking::GlobalTrackingSliceOrder(int32_t iSlice) return iSlice; } -GPUd() void GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right) +GPUd() void GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right) { left = (iSlice + (GPUDataTypes::NSLICES / 2 - 1)) % (GPUDataTypes::NSLICES / 2); right = (iSlice + 1) % (GPUDataTypes::NSLICES / 2); @@ -200,7 +200,7 @@ GPUd() void GPUTPCGlobalTracking::GlobalTrackingSliceLeftRight(uint32_t iSlice, } template <> -GPUdii() void GPUTPCGlobalTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker, int32_t n) +GPUdii() void GPUTPCExtrapolationTrackingCopyNumbers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker, int32_t n) { for (int32_t i = get_global_id(0); i < n; i += get_global_size(0)) { GPUconstantref() GPUTPCTracker& GPUrestrict() trk = (&tracker)[i]; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h b/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h similarity index 68% rename from GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h rename to GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h index c3f765f42cec5..cd6533a3439ed 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGlobalTracking.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGlobalTracking.h +/// \file GPUTPCExtrapolationTracking.h /// \author David Rohr -#ifndef GPUTPCGLOBALTRACKING_H -#define GPUTPCGLOBALTRACKING_H +#ifndef GPUTPCEXTRAPOLATIONTRACKING_H +#define GPUTPCEXTRAPOLATIONTRACKING_H #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" @@ -24,7 +24,7 @@ namespace gpu { class GPUTPCTracker; -class GPUTPCGlobalTracking : public GPUKernelTemplate +class GPUTPCExtrapolationTracking : public GPUKernelTemplate { public: struct GPUSharedMemory { @@ -40,15 +40,15 @@ class GPUTPCGlobalTracking : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); - GPUd() static int32_t GlobalTrackingSliceOrder(int32_t iSlice); - GPUd() static void GlobalTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); + GPUd() static int32_t ExtrapolationTrackingSliceOrder(int32_t iSlice); + GPUd() static void ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); private: - GPUd() static int32_t PerformGlobalTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); - GPUd() static void PerformGlobalTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); + GPUd() static int32_t PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); + GPUd() static void PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); }; -class GPUTPCGlobalTrackingCopyNumbers : public GPUKernelTemplate +class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h index 18418bc031d7e..fcf9d1149c588 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h @@ -62,7 +62,7 @@ class GPUTPCTrack private: int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array int32_t mNHits; // number of track cells - int32_t mLocalTrackId; // Id of local track this global track belongs to, index of this track itself if it is a local track + int32_t mLocalTrackId; // Id of local track this extrapolated track belongs to, index of this track itself if it is a local track GPUTPCBaseTrackParam mParam; // track parameters private: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h index 576688a429c66..c5d4d40a2bef8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h @@ -79,7 +79,7 @@ class GPUTPCTracker : public GPUProcessor GPUAtomic(uint32_t) nTracklets; // number of tracklets GPUAtomic(uint32_t) nRowHits; // number of tracklet hits GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks - int32_t nLocalTracks; // number of reconstructed tracks before global tracking + int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking GPUAtomic(uint32_t) nTrackHits; // number of track hits int32_t nLocalTrackHits; // see above StructGPUParameters gpuParameters; // GPU parameters diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx index 8e8c82393d659..04833375ad6df 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx @@ -21,7 +21,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCTracklet.h" #include "GPUTPCTrackletConstructor.h" -#include "GPUTPCGlobalTracking.h" +#include "GPUTPCExtrapolationTracking.h" #include "CorrectionMapsHelper.h" #include "CalibdEdxContainer.h" #include "GPUParam.inc" @@ -565,7 +565,7 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() GPUTPCT #endif // GPUCA_GPUCODE template <> // FIXME: GPUgeneric() needed to make the clang spirv output link correctly -GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCGlobalTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) +GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCExtrapolationTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { GPUTPCThreadMemory rMem; rMem.mISH = iTracklet; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h index 88a2d9c94d305..9af1eeb0ae7b2 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h @@ -96,7 +96,7 @@ class GPUTPCTrackletConstructor #endif // GPUCA_GPUCODE template - GPUd() static int32_t GPUTPCTrackletConstructorGlobalTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); + GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } diff --git a/GPU/GPUTracking/Standalone/tools/dump.C b/GPU/GPUTracking/Standalone/tools/dump.C index f5213f40a8a95..eed9e5c6c2770 100644 --- a/GPU/GPUTracking/Standalone/tools/dump.C +++ b/GPU/GPUTracking/Standalone/tools/dump.C @@ -3,5 +3,5 @@ void dump() AliHLTSystem* pHLT = AliHLTPluginBase::GetInstance(); AliHLTConfiguration overrideClusterTransformation("TPC-ClusterTransformation", "TPCClusterTransformation", "TPC-HWCFDecoder", "-use-orig-transform -do-mc"); AliHLTConfiguration dumper("Dumper", "GPUDump", "TPC-ClusterTransformation TRD-tracklet-reader", ""); - AliHLTConfiguration overrideTracker("TPC-TR", "TPCCATracker", "TPC-ClusterTransformation Dumper", "-GlobalTracking -SearchWindowDZDR 2.5"); + AliHLTConfiguration overrideTracker("TPC-TR", "TPCCATracker", "TPC-ClusterTransformation Dumper", "-ExtrapolationTracking -SearchWindowDZDR 2.5"); } diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 4c770b32ee66a..918011b85ea04 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -516,17 +516,17 @@ void GPUDisplay::DrawGLScene_drawCommands() LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tSLICETRACK][iCol], GPUDisplayBackend::POINTS)); skip2:; - if (mCfgL.drawGlobalTracks) { + if (mCfgL.drawExtrapolatedTracks) { if (mCfgL.excludeClusters) { goto skip3; } if (mCfgL.colorClusters) { - SetColorGlobalTracks(); + SetColorExtrapolatedTracks(); } } else { SetColorClusters(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tGLOBALTRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tEXTRAPOLATEDTRACK][iCol], GPUDisplayBackend::POINTS)); SetColorClusters(); if (mCfgL.drawFinal && mCfgL.propagateTracks < 2) { @@ -564,9 +564,9 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorTracks(); LOOP_SLICE drawVertices(mGlDLLines[iSlice][tSLICETRACK], GPUDisplayBackend::LINE_STRIP); } - if (mCfgL.drawGlobalTracks) { - SetColorGlobalTracks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tGLOBALTRACK], GPUDisplayBackend::LINE_STRIP); + if (mCfgL.drawExtrapolatedTracks) { + SetColorExtrapolatedTracks(); + LOOP_SLICE drawVertices(mGlDLLines[iSlice][tEXTRAPOLATEDTRACK], GPUDisplayBackend::LINE_STRIP); } } if (mCfgL.drawFinal) { @@ -670,7 +670,7 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) snprintf(info, 1024, "FPS: %6.2f (Slice: %d, 1:Clusters %d, 2:Prelinks %d, 3:Links %d, 4:Seeds %d, 5:Tracklets %d, 6:Tracks %d, 7:GTracks %d, 8:Merger %d) (%d frames, %d draw calls) " "(X %1.2f Y %1.2f Z %1.2f / R %1.2f Phi %1.1f Theta %1.1f) / Yaw %1.1f Pitch %1.1f Roll %1.1f)", - fps, mCfgL.drawSlice, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawGlobalTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), + fps, mCfgL.drawSlice, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawExtrapolatedTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), mRPhiTheta[2] * 180 / CAMath::Pi(), mAngle[1] * 180 / CAMath::Pi(), mAngle[0] * 180 / CAMath::Pi(), mAngle[2] * 180 / CAMath::Pi()); if (fpstime > 1.) { if (mPrintInfoText & 2) { diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index e7836461e4fd9..d6a65f212ecf3 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -96,7 +96,7 @@ class GPUDisplay : public GPUDisplayInterface tSEED = 3, tTRACKLET = 4, tSLICETRACK = 5, - tGLOBALTRACK = 6, + tEXTRAPOLATEDTRACK = 6, tFINALTRACK = 7, tMARKED = 8, tTRDCLUSTER = 9, @@ -188,7 +188,7 @@ class GPUDisplay : public GPUDisplayInterface void SetColorSeeds(); void SetColorTracklets(); void SetColorTracks(); - void SetColorGlobalTracks(); + void SetColorExtrapolatedTracks(); void SetColorFinal(); void SetColorGrid(); void SetColorGridTRD(); diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 431240e93b732..acf5566489f49 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -60,7 +60,7 @@ const char* HelpText[] = { "[SHIFT] Slow Zoom / Move / Rotate", "[ALT] / [CTRL] / [ENTER] Focus camera on origin / orient y-axis upwards (combine with [SHIFT] to lock) / Cycle through modes", "[RCTRL] / [RALT] Rotate model instead of camera / rotate TPC around beamline", - "[1] ... [8] / [N] Enable display of clusters, preseeds, seeds, starthits, tracklets, tracks, global tracks, merged tracks / Show assigned clusters in colors", + "[1] ... [8] / [N] Enable display of clusters, preseeds, seeds, starthits, tracklets, tracks, extrapolated tracks, merged tracks / Show assigned clusters in colors", "[F1] / [F2] / [F3] / [F4] Enable / disable drawing of TPC / TRD / TOF / ITS", "[SHIFT] + [F1] to [F4] Enable / disable track detector filter", "[SHIFT] + [F12] Switch track detector filter between AND and OR mode" @@ -164,11 +164,11 @@ void GPUDisplay::HandleKey(uint8_t key) mPrintInfoText &= 3; SetInfo("Info text display - console: %s, onscreen %s", (mPrintInfoText & 2) ? "enabled" : "disabled", (mPrintInfoText & 1) ? "enabled" : "disabled"); } else if (key == 'j') { - if (mCfgH.separateGlobalTracks) { + if (mCfgH.separateExtrapolatedTracks) { mCfgH.splitCETracks ^= 1; } - mCfgH.separateGlobalTracks ^= 1; - SetInfo("Seperated display of tracks propagated to adjacent sectors %s / of CE tracks %s", mCfgH.separateGlobalTracks ? "enabled" : "disabled", mCfgH.splitCETracks ? "enabled" : "disabled"); + mCfgH.separateExtrapolatedTracks ^= 1; + SetInfo("Seperated display of tracks propagated to adjacent sectors %s / of CE tracks %s", mCfgH.separateExtrapolatedTracks ? "enabled" : "disabled", mCfgH.splitCETracks ? "enabled" : "disabled"); } else if (key == 'c') { if (mCfgH.markClusters == 0) { mCfgH.markClusters = 1; @@ -310,7 +310,7 @@ void GPUDisplay::HandleKey(uint8_t key) } else if (key == '6') { mCfgL.drawTracks ^= 1; } else if (key == '7') { - mCfgL.drawGlobalTracks ^= 1; + mCfgL.drawExtrapolatedTracks ^= 1; } else if (key == '8') { mCfgL.drawFinal ^= 1; } else if (key == mFrontend->KEY_F1) { diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc b/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc index c10e0d3a55876..3716a07536e20 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc +++ b/GPU/GPUTracking/display/helpers/GPUDisplayColors.inc @@ -114,7 +114,7 @@ inline void GPUDisplay::SetColorTracks() } ActivateColor(); } -inline void GPUDisplay::SetColorGlobalTracks() +inline void GPUDisplay::SetColorExtrapolatedTracks() { if (mCfgL.invertColors) { mDrawColor = {0.8, 0.2, 0, 1.f}; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index 866d4a59aab82..764f659d07e64 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -52,7 +52,7 @@ void GPUDisplay::disableUnsupportedOptions() mCfgH.markFakeClusters = 0; } if (!mChain) { - mCfgL.excludeClusters = mCfgL.drawInitLinks = mCfgL.drawLinks = mCfgL.drawSeeds = mCfgL.drawTracklets = mCfgL.drawTracks = mCfgL.drawGlobalTracks = 0; + mCfgL.excludeClusters = mCfgL.drawInitLinks = mCfgL.drawLinks = mCfgL.drawSeeds = mCfgL.drawTracklets = mCfgL.drawTracks = mCfgL.drawExtrapolatedTracks = 0; } if (mConfig.showTPCTracksFromO2Format && mParam->par.earlyTpcTransform) { throw std::runtime_error("Cannot run GPU display with early Transform when input is O2 tracks"); diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index b1685fc61fc2c..5d4628cf0eb3f 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -414,7 +414,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* } // Print TPC part of track - int32_t separateGlobalTracksLimit = (mCfgH.separateGlobalTracks ? tGLOBALTRACK : TRACK_TYPE_ID_LIMIT); + int32_t separateExtrapolatedTracksLimit = (mCfgH.separateExtrapolatedTracks ? tEXTRAPOLATEDTRACK : TRACK_TYPE_ID_LIMIT); uint32_t lastSide = -1; for (int32_t k = 0; k < nClusters; k++) { if constexpr (std::is_same_v) { @@ -435,10 +435,10 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* drawing = false; lastCluster = -1; } else { - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateGlobalTracksLimit); + drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } } - if (w == separateGlobalTracksLimit) { + if (w == separateExtrapolatedTracksLimit) { if (drawing) { insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); } @@ -453,9 +453,9 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* } else { lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; } - drawPointLinestrip(iSlice, lastcid, tFINALTRACK, separateGlobalTracksLimit); + drawPointLinestrip(iSlice, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); } - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateGlobalTracksLimit); + drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } drawing = true; } @@ -812,7 +812,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() GPUCA_OPENMP(for) for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { const GPUTPCTracker& tracker = sliceTracker(iSlice); - mGlDLLines[iSlice][tGLOBALTRACK] = DrawTracks(tracker, 1); + mGlDLLines[iSlice][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); } GPUCA_OPENMP(barrier) } diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 57f0cce4989f3..4085bebee08c4 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -36,16 +36,16 @@ o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRAC o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCGlobalTrackingCopyNumbers" "GPUTPCGlobalTracking TPCTRACKER" NO single int32_t n) -o2_gpu_add_kernel("GPUTPCGlobalTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) +o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) +o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sliceTracks" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks1" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks2" "= TPCMERGER" NO single int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO single int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map) o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) @@ -68,7 +68,7 @@ o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMM o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice int8_t withinSlice int8_t mergeMode) o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerLinkGlobalTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) +o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER" NO simple) o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER" NO simple) From 513970ba68fd842b1b189599c933cddded0c505c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 10 Feb 2025 15:06:13 +0100 Subject: [PATCH 0066/1914] Remove obsolete file --- GPU/GPUTracking/Standalone/tools/dump.C | 7 ------- cmake/O2RootMacroExclusionList.cmake | 1 - 2 files changed, 8 deletions(-) delete mode 100644 GPU/GPUTracking/Standalone/tools/dump.C diff --git a/GPU/GPUTracking/Standalone/tools/dump.C b/GPU/GPUTracking/Standalone/tools/dump.C deleted file mode 100644 index eed9e5c6c2770..0000000000000 --- a/GPU/GPUTracking/Standalone/tools/dump.C +++ /dev/null @@ -1,7 +0,0 @@ -void dump() -{ - AliHLTSystem* pHLT = AliHLTPluginBase::GetInstance(); - AliHLTConfiguration overrideClusterTransformation("TPC-ClusterTransformation", "TPCClusterTransformation", "TPC-HWCFDecoder", "-use-orig-transform -do-mc"); - AliHLTConfiguration dumper("Dumper", "GPUDump", "TPC-ClusterTransformation TRD-tracklet-reader", ""); - AliHLTConfiguration overrideTracker("TPC-TR", "TPCCATracker", "TPC-ClusterTransformation Dumper", "-ExtrapolationTracking -SearchWindowDZDR 2.5"); -} diff --git a/cmake/O2RootMacroExclusionList.cmake b/cmake/O2RootMacroExclusionList.cmake index 4b87da5b4e42e..d5596ccc424f4 100644 --- a/cmake/O2RootMacroExclusionList.cmake +++ b/cmake/O2RootMacroExclusionList.cmake @@ -42,7 +42,6 @@ list(APPEND O2_ROOT_MACRO_EXCLUSION_LIST GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C # Needs AliRoot AliMagF GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C # Needs AliRoot AliMagF GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C # Needs AliRoot AliMagF - GPU/GPUTracking/Standalone/tools/dump.C # Needs AliRoot ALiHLTSystem GPU/GPUTracking/Standalone/tools/dumpTRDClusterMatrices.C # Needs AliRoot AliCDBManager, AliGeomManager and AliTRDgeometry GPU/GPUTracking/TRDTracking/macros/checkDbgOutput.C # Needs AliRoot TStatToolkit GPU/TPCFastTransformation/devtools/loadlibs.C # Special macro From cf85a4c5384344ac867b5b560197e5afeff65b7e Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 11 Feb 2025 11:06:57 +0100 Subject: [PATCH 0067/1914] PrimaryGen: More consistent vertex configuration * do not apply a vertex when the mode is kNoVertex (this was buggy) * introduce a new vertex mode kCollContext to indicate the the vertex is to be taken from a collision context --- Common/SimConfig/include/SimConfig/SimConfig.h | 3 ++- Common/SimConfig/src/SimConfig.cxx | 5 ++++- Generators/src/PrimaryGenerator.cxx | 9 ++++++++- run/O2PrimaryServerDevice.h | 7 +++++-- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Common/SimConfig/include/SimConfig/SimConfig.h b/Common/SimConfig/include/SimConfig/SimConfig.h index d70fca2400399..be88d9fbd8c33 100644 --- a/Common/SimConfig/include/SimConfig/SimConfig.h +++ b/Common/SimConfig/include/SimConfig/SimConfig.h @@ -37,7 +37,8 @@ enum class SimFieldMode { enum class VertexMode { kNoVertex = 0, // no vertexing should be applied in the generator kDiamondParam = 1, // Diamond param will influence vertexing - kCCDB = 2 // vertex should be taken from CCDB (Calib/MeanVertex object) + kCCDB = 2, // vertex should be taken from CCDB (Calib/MeanVertex object) + kCollCxt = 3 // vertex should be taken from collision context }; enum class TimeStampMode { diff --git a/Common/SimConfig/src/SimConfig.cxx b/Common/SimConfig/src/SimConfig.cxx index de494d565fd6a..9407a3c556179 100644 --- a/Common/SimConfig/src/SimConfig.cxx +++ b/Common/SimConfig/src/SimConfig.cxx @@ -391,8 +391,11 @@ bool SimConfig::parseVertexModeString(std::string const& vertexstring, VertexMod } else if (vertexstring == "kCCDB") { mode = VertexMode::kCCDB; return true; + } else if (vertexstring == "kCollContext") { + mode = VertexMode::kCollCxt; + return true; } - LOG(error) << "Vertex mode must be one of kNoVertex, kDiamondParam, kCCDB"; + LOG(error) << "Vertex mode must be one of kNoVertex, kDiamondParam, kCCDB, kCollContext"; return false; } diff --git a/Generators/src/PrimaryGenerator.cxx b/Generators/src/PrimaryGenerator.cxx index 21974472e7def..ee7c6a16330f9 100644 --- a/Generators/src/PrimaryGenerator.cxx +++ b/Generators/src/PrimaryGenerator.cxx @@ -270,6 +270,13 @@ void PrimaryGenerator::setVertexMode(o2::conf::VertexMode const& mode, o2::dataf LOG(info) << "The mean vertex is set to :"; mMeanVertex->print(); } + if (mVertexMode == o2::conf::VertexMode::kNoVertex) { + setApplyVertex(false); + LOG(info) << "Disabling vertexing"; + mMeanVertex = std::move(std::unique_ptr(new o2::dataformats::MeanVertexObject(0, 0, 0, 0, 0, 0, 0, 0))); + LOG(info) << "The mean vertex is set to :"; + mMeanVertex->print(); + } } /*****************************************************************/ @@ -298,7 +305,7 @@ void PrimaryGenerator::fixInteractionVertex() SmearGausVertexZ(false); // we use the mMeanVertexObject if initialized (initialize first) - if (!mMeanVertex) { + if (mMeanVertex.get() == nullptr) { if (mVertexMode == o2::conf::VertexMode::kDiamondParam) { auto const& param = InteractionDiamondParam::Instance(); const auto& xyz = param.position; diff --git a/run/O2PrimaryServerDevice.h b/run/O2PrimaryServerDevice.h index 4b313a7ca9499..ece3747b2c94a 100644 --- a/run/O2PrimaryServerDevice.h +++ b/run/O2PrimaryServerDevice.h @@ -127,6 +127,8 @@ class O2PrimaryServerDevice final : public fair::mq::Device } else if (vtxMode == VertexMode::kCCDB) { // we need to fetch the CCDB object mPrimGen->setVertexMode(vtxMode, ccdbmgr.getForTimeStamp("GLO/Calib/MeanVertex", conf.getTimestamp())); + } else if (vtxMode == VertexMode::kCollCxt) { + // The vertex will be injected from the outside via setExternalVertex } else { LOG(fatal) << "Unsupported vertex mode"; } @@ -186,13 +188,14 @@ class O2PrimaryServerDevice final : public fair::mq::Device const int MAX_RETRY = 100; do { mStack->Reset(); + const auto& conf = mSimConfig; // see if we the vertex comes from the collision context - if (mCollissionContext) { + if (mCollissionContext && conf.getVertexMode() == o2::conf::VertexMode::kCollCxt) { const auto& vertices = mCollissionContext->getInteractionVertices(); if (vertices.size() > 0) { auto collisionindex = mEventID_to_CollID.at(mEventCounter); auto& vertex = vertices.at(collisionindex); - LOG(info) << "Setting vertex " << vertex << " for event " << mEventCounter << " for prefix " << mSimConfig.getOutPrefix(); + LOG(info) << "Setting vertex " << vertex << " for event " << mEventCounter << " for prefix " << mSimConfig.getOutPrefix() << " from CollContext"; mPrimGen->setExternalVertexForNextEvent(vertex.X(), vertex.Y(), vertex.Z()); } } From b0f13b64ad77010d126408f5a62897aba559238f Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 4 Feb 2025 13:48:42 +0100 Subject: [PATCH 0068/1914] Add missing workflow param, dump MC label --- Detectors/TPC/workflow/src/TPCRefitter.cxx | 5 +++++ Detectors/TPC/workflow/src/tpc-refitter-workflow.cxx | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Detectors/TPC/workflow/src/TPCRefitter.cxx b/Detectors/TPC/workflow/src/TPCRefitter.cxx index 211f14cfae184..3ebe32d12ddb8 100644 --- a/Detectors/TPC/workflow/src/TPCRefitter.cxx +++ b/Detectors/TPC/workflow/src/TPCRefitter.cxx @@ -637,6 +637,11 @@ bool TPCRefitterSpec::processTPCTrack(o2::tpc::TrackTPC tr, o2::MCCompLabel lbl, << "dcazRef=" << dcazRef; } + if (mUseMC) { + (*streamer) << "tpc" + << "mcLabel=" << lbl; + } + (*streamer) << "tpc" << "\n"; } diff --git a/Detectors/TPC/workflow/src/tpc-refitter-workflow.cxx b/Detectors/TPC/workflow/src/tpc-refitter-workflow.cxx index 264e7d8a98c60..78bf63a44d60f 100644 --- a/Detectors/TPC/workflow/src/tpc-refitter-workflow.cxx +++ b/Detectors/TPC/workflow/src/tpc-refitter-workflow.cxx @@ -38,7 +38,8 @@ void customize(std::vector& workflowOptions) { // option allowing to set parameters std::vector options{ - {"enable-mc", o2::framework::VariantType::Bool, false, {"enable MC propagation"}}, + {"use-mc", o2::framework::VariantType::Bool, false, {"use MC information"}}, + {"disable-mc", o2::framework::VariantType::Bool, false, {"disable MC usage"}}, {"enable-cosmics", o2::framework::VariantType::Bool, false, {"enable reading cosmics"}}, {"track-sources", VariantType::String, std::string{GID::ALL}, {"comma-separated list of track sources to use"}}, {"cluster-sources", VariantType::String, std::string{GID::ALL}, {"comma-separated list of cluster sources to use"}}, @@ -61,7 +62,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) // Update the (declared) parameters if changed from the command line o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); - auto useMC = configcontext.options().get("enable-mc"); + auto useMC = configcontext.options().get("use-mc"); auto sclOpt = o2::tpc::CorrectionMapsLoader::parseGlobalOptions(configcontext.options()); const auto enableCosmics = configcontext.options().get("enable-cosmics"); From a86638794f180a0baf6d532ce5bbe614a8a6acb1 Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 4 Feb 2025 14:52:03 +0100 Subject: [PATCH 0069/1914] Move check of outliers in residuals to proper place --- .../SpacePoints/SpacePointsCalibParam.h | 2 +- .../include/SpacePoints/TrackInterpolation.h | 64 ++++++++--------- .../SpacePoints/src/TrackInterpolation.cxx | 71 ++++++++++++------- 3 files changed, 76 insertions(+), 61 deletions(-) diff --git a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h index 67226d108bae3..90b693820d0fa 100644 --- a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h +++ b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/SpacePointsCalibParam.h @@ -56,7 +56,7 @@ static constexpr int NZ2XBins = 5; ///< number of bins in z/x static constexpr float MaxResid = 20.f; ///< max residual in y and z static constexpr float MaxY = 50.f; ///< max value for y position (sector coordinates) static constexpr float MaxZ = 300.f; ///< max value for z position -static constexpr float MaxTgSlp = 1.f; ///< max value for phi (from snp) +static constexpr float MaxTgSlp = 1.f; ///< max value for phi (from snp, converted to tangens) // miscellaneous static constexpr float sEps = 1e-6f; ///< small number for float comparisons diff --git a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackInterpolation.h b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackInterpolation.h index 4b74eaadf5d92..9cbcf3d117661 100644 --- a/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackInterpolation.h +++ b/Detectors/TPC/calibration/SpacePoints/include/SpacePoints/TrackInterpolation.h @@ -60,11 +60,6 @@ struct TPCClusterResiduals { float snp{}; ///< sin of the phi angle between padrow and track unsigned char sec{}; ///< sector number 0..35 unsigned char dRow{}; ///< distance to previous row in units of pad rows - void setDY(float val) { dy = fabs(val) < param::MaxResid ? val : std::copysign(param::MaxResid, val); } - void setDZ(float val) { dz = fabs(val) < param::MaxResid ? val : std::copysign(param::MaxResid, val); } - void setY(float val) { y = fabs(val) < param::MaxY ? val : std::copysign(param::MaxY, val); } - void setZ(float val) { z = fabs(val) < param::MaxZ ? val : std::copysign(param::MaxZ, val); } - void setSnp(float val) { snp = fabs(val) < param::MaxTgSlp ? val : std::copysign(param::MaxTgSlp, val); } ClassDefNV(TPCClusterResiduals, 4); }; @@ -120,16 +115,16 @@ struct TrackDataExtended { /// Structure filled for each track with track quality information and a vector with TPCClusterResiduals struct TrackData { - o2::dataformats::GlobalTrackID gid{}; ///< global track ID for seeding track - o2::track::TrackPar par{}; ///< ITS track at inner TPC radius - float dEdxTPC{}; ///< TPC dEdx information - float chi2TPC{}; ///< chi2 of TPC track - float chi2ITS{}; ///< chi2 of ITS track - float chi2TRD{}; ///< chi2 of TRD track - unsigned short nClsTPC{}; ///< number of attached TPC clusters - unsigned short nClsITS{}; ///< number of attached ITS clusters - unsigned short nTrkltsTRD{}; ///< number of attached TRD tracklets - unsigned short clAvailTOF{}; ///< whether or not track seed has a matched TOF cluster + o2::dataformats::GlobalTrackID gid{}; ///< global track ID for seeding track + o2::track::TrackPar par{}; ///< ITS track at inner TPC radius + float dEdxTPC{}; ///< TPC dEdx information + float chi2TPC{}; ///< chi2 of TPC track + float chi2ITS{}; ///< chi2 of ITS track + float chi2TRD{}; ///< chi2 of TRD track + unsigned short nClsTPC{}; ///< number of attached TPC clusters + unsigned short nClsITS{}; ///< number of attached ITS clusters + unsigned short nTrkltsTRD{}; ///< number of attached TRD tracklets + unsigned short clAvailTOF{}; ///< whether or not track seed has a matched TOF cluster o2::dataformats::RangeReference<> clIdx{}; ///< index of first cluster residual and total number of cluster residuals of this track ClassDefNV(TrackData, 6); }; @@ -282,30 +277,30 @@ class TrackInterpolation static constexpr float sFloatEps{1.e-7f}; ///< float epsilon for robust linear fitting // parameters + settings const SpacePointsCalibConfParam* mParams = nullptr; - float mTPCTimeBinMUS{.2f}; ///< TPC time bin duration in us - float mTPCVDriftRef = -1.; ///< TPC nominal drift speed in cm/microseconds - float mTPCDriftTimeOffsetRef = 0.; ///< TPC nominal (e.g. at the start of run) drift time bias in cm/mus - float mSqrtS{13600.f}; ///< centre of mass energy set from LHC IF - MatCorrType mMatCorr{MatCorrType::USEMatCorrNONE}; ///< if material correction should be done - int mMaxTracksPerTF{-1}; ///< max number of tracks to be processed per TF (-1 means there is no limit) - int mAddTracksForMapPerTF{0}; ///< in case residuals from different track types are used for vDrift calibration and map creation this defines the statistics for the latter - bool mDumpTrackPoints{false}; ///< dump also track points in ITS, TRD and TOF - bool mProcessSeeds{false}; ///< in case for global tracks also their shorter parts are processed separately - bool mProcessITSTPConly{false}; ///< flag, whether or not to extrapolate ITS-only through TPC + float mTPCTimeBinMUS{.2f}; ///< TPC time bin duration in us + float mTPCVDriftRef = -1.; ///< TPC nominal drift speed in cm/microseconds + float mTPCDriftTimeOffsetRef = 0.; ///< TPC nominal (e.g. at the start of run) drift time bias in cm/mus + float mSqrtS{13600.f}; ///< centre of mass energy set from LHC IF + MatCorrType mMatCorr{MatCorrType::USEMatCorrNONE}; ///< if material correction should be done + int mMaxTracksPerTF{-1}; ///< max number of tracks to be processed per TF (-1 means there is no limit) + int mAddTracksForMapPerTF{0}; ///< in case residuals from different track types are used for vDrift calibration and map creation this defines the statistics for the latter + bool mDumpTrackPoints{false}; ///< dump also track points in ITS, TRD and TOF + bool mProcessSeeds{false}; ///< in case for global tracks also their shorter parts are processed separately + bool mProcessITSTPConly{false}; ///< flag, whether or not to extrapolate ITS-only through TPC o2::dataformats::GlobalTrackID::mask_t mSourcesConfigured; ///< the track sources taken into account for extra-/interpolation o2::dataformats::GlobalTrackID::mask_t mSourcesConfiguredMap; ///< possible subset of mSourcesConfigured bool mSingleSourcesConfigured{true}; ///< whether mSourcesConfigured == mSourcesConfiguredMap // input - const o2::globaltracking::RecoContainer* mRecoCont = nullptr; ///< input reco container - std::vector mGIDs{}; ///< GIDs of input tracks - std::vector mGIDtables{}; ///< GIDs of contributors from single detectors for each seed - std::vector mTrackTimes{}; ///< time estimates for all input tracks in micro seconds - std::vector mSeeds{}; ///< seeding track parameters (ITS tracks) - std::map mTrackTypes; ///< mapping of track source to array index in mTrackIndices - std::array, 4> mTrackIndices; ///< keep GIDs of input tracks separately for each track type - gsl::span mTPCTracksClusIdx; ///< input TPC cluster indices from span - const ClusterNativeAccess* mTPCClusterIdxStruct = nullptr; ///< struct holding the TPC cluster indices + const o2::globaltracking::RecoContainer* mRecoCont = nullptr; ///< input reco container + std::vector mGIDs{}; ///< GIDs of input tracks + std::vector mGIDtables{}; ///< GIDs of contributors from single detectors for each seed + std::vector mTrackTimes{}; ///< time estimates for all input tracks in micro seconds + std::vector mSeeds{}; ///< seeding track parameters (ITS tracks) + std::map mTrackTypes; ///< mapping of track source to array index in mTrackIndices + std::array, 4> mTrackIndices; ///< keep GIDs of input tracks separately for each track type + gsl::span mTPCTracksClusIdx; ///< input TPC cluster indices from span + const ClusterNativeAccess* mTPCClusterIdxStruct = nullptr; ///< struct holding the TPC cluster indices // ITS specific input only needed for debugging gsl::span mITSTrackClusIdx; ///< input ITS track cluster indices span std::vector> mITSClustersArray; ///< ITS clusters created in run() method from compact clusters @@ -329,6 +324,7 @@ class TrackInterpolation std::unique_ptr mFastTransform{}; ///< TPC cluster transformation float mBz; ///< required for helix approximation bool mInitDone{false}; ///< initialization done flag + size_t mRejectedResiduals{}; ///< number of rejected residuals ClassDefNV(TrackInterpolation, 1); }; diff --git a/Detectors/TPC/calibration/SpacePoints/src/TrackInterpolation.cxx b/Detectors/TPC/calibration/SpacePoints/src/TrackInterpolation.cxx index a32bf17fcd1c5..7c2110fd5b4e7 100644 --- a/Detectors/TPC/calibration/SpacePoints/src/TrackInterpolation.cxx +++ b/Detectors/TPC/calibration/SpacePoints/src/TrackInterpolation.cxx @@ -334,7 +334,8 @@ void TrackInterpolation::process() extrapolateTrack(iSeed); } } - LOG(info) << "Could process " << mTrackData.size() << " tracks successfully"; + LOG(info) << "Could process " << mTrackData.size() << " tracks successfully. " << mRejectedResiduals << " residuals were rejected. " << mClRes.size() << " residuals were accepted."; + mRejectedResiduals = 0; } void TrackInterpolation::interpolateTrack(int iSeed) @@ -404,7 +405,7 @@ void TrackInterpolation::interpolateTrack(int iSeed) mCache[iRow].szy[ExtOut] = trkWork.getSigmaZY(); mCache[iRow].sz2[ExtOut] = trkWork.getSigmaZ2(); mCache[iRow].snp[ExtOut] = trkWork.getSnp(); - //printf("Track alpha at row %i: %.2f, Y(%.2f), Z(%.2f)\n", iRow, trkWork.getAlpha(), trkWork.getY(), trkWork.getZ()); + // printf("Track alpha at row %i: %.2f, Y(%.2f), Z(%.2f)\n", iRow, trkWork.getAlpha(), trkWork.getY(), trkWork.getZ()); } // start from outermost cluster with outer refit and back propagation @@ -431,7 +432,7 @@ void TrackInterpolation::interpolateTrack(int iSeed) // TODO: check if reset of covariance matrix is needed here (or, in case TOF point is not available at outermost TRD layer) if (!trkWork.update(clTOFYZ, clTOFCov)) { LOG(debug) << "Failed to update extrapolated ITS track with TOF cluster"; - //LOGF(info, "trkWork.y=%f, cl.y=%f, trkWork.z=%f, cl.z=%f", trkWork.getY(), clTOFYZ[0], trkWork.getZ(), clTOFYZ[1]); + // LOGF(info, "trkWork.y=%f, cl.y=%f, trkWork.z=%f, cl.z=%f", trkWork.getY(), clTOFYZ[0], trkWork.getZ(), clTOFYZ[1]); return; } } @@ -509,7 +510,7 @@ void TrackInterpolation::interpolateTrack(int iSeed) } if (!propagator->PropagateToXBxByBz(trkWork, param::RowX[iRow], mParams->maxSnp, mParams->maxStep, mMatCorr)) { LOG(debug) << "Failed on back propagation"; - //printf("trkX(%.2f), clX(%.2f), clY(%.2f), clZ(%.2f), alphaTOF(%.2f)\n", trkWork.getX(), param::RowX[iRow], clTOFYZ[0], clTOFYZ[1], clTOFAlpha); + // printf("trkX(%.2f), clX(%.2f), clY(%.2f), clZ(%.2f), alphaTOF(%.2f)\n", trkWork.getX(), param::RowX[iRow], clTOFYZ[0], clTOFYZ[1], clTOFAlpha); return; } mCache[iRow].y[ExtIn] = trkWork.getY(); @@ -535,15 +536,14 @@ void TrackInterpolation::interpolateTrack(int iSeed) // simple average w/o weighting for angle mCache[iRow].snp[Int] = (mCache[iRow].snp[ExtOut] + mCache[iRow].snp[ExtIn]) / 2.f; - TPCClusterResiduals res; - res.setDY(mCache[iRow].clY - mCache[iRow].y[Int]); - res.setDZ(mCache[iRow].clZ - mCache[iRow].z[Int]); - res.setY(mCache[iRow].y[Int]); - res.setZ(mCache[iRow].z[Int]); - res.setSnp(mCache[iRow].snp[Int]); - res.sec = mCache[iRow].clSec; - res.dRow = deltaRow; - clusterResiduals.push_back(std::move(res)); + const auto dY = mCache[iRow].clY - mCache[iRow].y[Int]; + const auto dZ = mCache[iRow].clZ - mCache[iRow].z[Int]; + const auto y = mCache[iRow].y[Int]; + const auto z = mCache[iRow].z[Int]; + const auto snp = mCache[iRow].snp[Int]; + const auto sec = mCache[iRow].clSec; + clusterResiduals.emplace_back(dY, dZ, y, z, snp, sec, deltaRow); + deltaRow = 1; } trackData.chi2TRD = gidTable[GTrackID::TRD].isIndexSet() ? mRecoCont->getITSTPCTRDTrack(gidTable[GTrackID::ITSTPCTRD]).getChi2() : 0; @@ -567,8 +567,17 @@ void TrackInterpolation::interpolateTrack(int iSeed) continue; } ++nClValidated; - float tgPhi = clusterResiduals[iCl].snp / std::sqrt((1.f - clusterResiduals[iCl].snp) * (1.f + clusterResiduals[iCl].snp)); - mClRes.emplace_back(clusterResiduals[iCl].dy, clusterResiduals[iCl].dz, tgPhi, clusterResiduals[iCl].y, clusterResiduals[iCl].z, iRow, clusterResiduals[iCl].sec); + const float tgPhi = clusterResiduals[iCl].snp / std::sqrt((1.f - clusterResiduals[iCl].snp) * (1.f + clusterResiduals[iCl].snp)); + const auto dy = clusterResiduals[iCl].dy; + const auto dz = clusterResiduals[iCl].dz; + const auto y = clusterResiduals[iCl].y; + const auto z = clusterResiduals[iCl].z; + const auto sec = clusterResiduals[iCl].sec; + if ((std::abs(dy) < param::MaxResid) && (std::abs(dz) < param::MaxResid) && (std::abs(y) < param::MaxY) && (std::abs(z) < param::MaxZ) && (std::abs(tgPhi) < param::MaxTgSlp)) { + mClRes.emplace_back(dy, dz, tgPhi, y, z, iRow, sec); + } else { + ++mRejectedResiduals; + } } trackData.clIdx.setEntries(nClValidated); mTrackData.push_back(std::move(trackData)); @@ -645,16 +654,17 @@ void TrackInterpolation::extrapolateTrack(int iSeed) if (!propagator->PropagateToXBxByBz(trkWork, x, mParams->maxSnp, mParams->maxStep, mMatCorr)) { return; } - TPCClusterResiduals res; - res.setDY(y - trkWork.getY()); - res.setDZ(z - trkWork.getZ()); - res.setY(trkWork.getY()); - res.setZ(trkWork.getZ()); - res.setSnp(trkWork.getSnp()); - res.sec = sector; - res.dRow = row - rowPrev; + + const auto dY = y - trkWork.getY(); + const auto dZ = z - trkWork.getZ(); + const auto ty = trkWork.getY(); + const auto tz = trkWork.getZ(); + const auto snp = trkWork.getSnp(); + const auto sec = sector; + + clusterResiduals.emplace_back(dY, dZ, ty, tz, snp, sec, row - rowPrev); + rowPrev = row; - clusterResiduals.push_back(std::move(res)); ++nMeasurements; } trackData.chi2TPC = trkTPC.getChi2(); @@ -683,8 +693,17 @@ void TrackInterpolation::extrapolateTrack(int iSeed) continue; } ++nClValidated; - float tgPhi = clusterResiduals[iCl].snp / std::sqrt((1.f - clusterResiduals[iCl].snp) * (1.f + clusterResiduals[iCl].snp)); - mClRes.emplace_back(clusterResiduals[iCl].dy, clusterResiduals[iCl].dz, tgPhi, clusterResiduals[iCl].y, clusterResiduals[iCl].z, iRow, clusterResiduals[iCl].sec); + const float tgPhi = clusterResiduals[iCl].snp / std::sqrt((1.f - clusterResiduals[iCl].snp) * (1.f + clusterResiduals[iCl].snp)); + const auto dy = clusterResiduals[iCl].dy; + const auto dz = clusterResiduals[iCl].dz; + const auto y = clusterResiduals[iCl].y; + const auto z = clusterResiduals[iCl].z; + const auto sec = clusterResiduals[iCl].sec; + if ((std::abs(dy) < param::MaxResid) && (std::abs(dz) < param::MaxResid) && (std::abs(y) < param::MaxY) && (std::abs(z) < param::MaxZ) && (std::abs(tgPhi) < param::MaxTgSlp)) { + mClRes.emplace_back(dy, dz, tgPhi, y, z, iRow, sec); + } else { + ++mRejectedResiduals; + } } trackData.clIdx.setEntries(nClValidated); mTrackData.push_back(std::move(trackData)); From 9ea4d84cbdf5c0945be13dc057cb849093b58d54 Mon Sep 17 00:00:00 2001 From: wiechula Date: Fri, 7 Feb 2025 12:31:52 +0100 Subject: [PATCH 0070/1914] Adjust object name, add error messages --- .../include/DataFormatsTPC/CalibdEdxCorrection.h | 4 ++-- .../Detectors/TPC/src/CalibdEdxCorrection.cxx | 13 +++++++++++++ Detectors/TPC/workflow/src/CalibdEdxSpec.cxx | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h index 8a731a61c8a2d..1d7b10dc965f7 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/CalibdEdxCorrection.h @@ -91,8 +91,8 @@ class CalibdEdxCorrection void clear(); - void writeToFile(std::string_view fileName, std::string_view objName = "CalibdEdxCorrection") const; - void loadFromFile(std::string_view fileName, std::string_view objName = "CalibdEdxCorrection"); + void writeToFile(std::string_view fileName, std::string_view objName = "ccdb_object") const; + void loadFromFile(std::string_view fileName, std::string_view objName = "ccdb_object"); /// \param outFileName name of the output file void dumpToTree(const char* outFileName = "calib_dedx.root") const; diff --git a/DataFormats/Detectors/TPC/src/CalibdEdxCorrection.cxx b/DataFormats/Detectors/TPC/src/CalibdEdxCorrection.cxx index c8224aca5b344..0991c8693d8e8 100644 --- a/DataFormats/Detectors/TPC/src/CalibdEdxCorrection.cxx +++ b/DataFormats/Detectors/TPC/src/CalibdEdxCorrection.cxx @@ -15,6 +15,7 @@ #include // o2 includes +#include "Framework/Logger.h" #include "DataFormatsTPC/Defs.h" #include "CommonUtils/TreeStreamRedirector.h" @@ -39,15 +40,27 @@ void CalibdEdxCorrection::clear() void CalibdEdxCorrection::writeToFile(std::string_view fileName, std::string_view objName) const { std::unique_ptr file(TFile::Open(fileName.data(), "recreate")); + if (!file) { + LOGP(error, "Failed to open file {} for writing", fileName.data()); + return; + } + file->WriteObject(this, objName.data()); } void CalibdEdxCorrection::loadFromFile(std::string_view fileName, std::string_view objName) { std::unique_ptr file(TFile::Open(fileName.data())); + if (!file || file->IsZombie()) { + LOGP(error, "Failed to open file {}", fileName.data()); + return; + } + auto tmp = file->Get(objName.data()); if (tmp != nullptr) { *this = *tmp; + } else { + LOGP(error, "Failed to load object with name {} from file {}", objName.data(), fileName.data()); } } diff --git a/Detectors/TPC/workflow/src/CalibdEdxSpec.cxx b/Detectors/TPC/workflow/src/CalibdEdxSpec.cxx index 97b69156a2a6d..2eaf6125e6c29 100644 --- a/Detectors/TPC/workflow/src/CalibdEdxSpec.cxx +++ b/Detectors/TPC/workflow/src/CalibdEdxSpec.cxx @@ -134,7 +134,7 @@ class CalibdEdxDevice : public Task if (mDumpToFile) { mCalib->dumpToFile("calibdEdx_Obj.root", "calib"); - mCalib->getCalib().writeToFile("calibdEdx.root", "ccdb_object"); + mCalib->getCalib().writeToFile("calibdEdx.root"); if (mDumpToFile > 1) { mCalib->writeTTree("calibdEdx.histo.tree.root"); } From 2668615ca836c9695c9f55f2a5722e7ea510c2c4 Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 11 Feb 2025 15:46:51 +0100 Subject: [PATCH 0071/1914] Reintroduce custom caching --- Detectors/TPC/base/src/DeadChannelMapCreator.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx index bcbfa8fe68956..8c4e754fc5327 100644 --- a/Detectors/TPC/base/src/DeadChannelMapCreator.cxx +++ b/Detectors/TPC/base/src/DeadChannelMapCreator.cxx @@ -43,6 +43,11 @@ void DeadChannelMapCreator::loadFEEConfigViaRunInfoTS(long timeStamp) if (mObjectValidity[CDBType::ConfigRunInfo].isValid(timeStamp)) { return; } + + const auto meta = mCCDBApi.retrieveHeaders(CDBTypeMap.at(CDBType::ConfigRunInfo), {}, timeStamp); + mObjectValidity[CDBType::ConfigRunInfo].startvalidity = std::stol(meta.at("Valid-From")); + mObjectValidity[CDBType::ConfigRunInfo].endvalidity = std::stol(meta.at("Valid-Until")); + loadFEEConfig(timeStamp); } From 07c24dc2100ad9d15b7d272201f9dffb45d3b011 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 10 Feb 2025 11:07:44 +0100 Subject: [PATCH 0072/1914] DPL Analysis: avoid leaking analysis task abstractions --- Generators/include/Generators/AODToHepMC.h | 120 ++++----------------- run/o2aod_mc_to_hepmc.cxx | 28 ++++- 2 files changed, 48 insertions(+), 100 deletions(-) diff --git a/Generators/include/Generators/AODToHepMC.h b/Generators/include/Generators/AODToHepMC.h index 5c9fd69408050..426f9c223410b 100644 --- a/Generators/include/Generators/AODToHepMC.h +++ b/Generators/include/Generators/AODToHepMC.h @@ -257,25 +257,20 @@ struct AODToHepMC { * framework::OptionManager that propagates the options * to the program. */ - struct : framework::ConfigurableGroup { + struct { /** Option for dumping HepMC event structures to disk. Takes one * argument - the name of the file to write to. */ - framework::Configurable dump{"hepmc-dump", "", - "Dump HepMC event to output"}; + std::string dump{""}; /** Option for only storing particles from the event generator. * Note, if a particle is stored down, then its mothers will also * be stored. */ - framework::Configurable onlyGen{"hepmc-only-generated", false, - "Only export generated"}; + bool onlyGen{false}; /** Use HepMC's tree parsing for building event structure */ - framework::Configurable useTree{"hepmc-use-tree", false, - "Export as tree"}; + bool useTree{false}; /** Floating point precision used when writing to disk */ - framework::Configurable precision{"hepmc-precision", 8, - "Export precision in dump"}; + int precision{8}; /** Recenter event at IP=(0,0,0,0). */ - framework::Configurable recenter{"hepmc-recenter", false, - "Recenter the events at (0,0,0,0)"}; + bool recenter{false}; } configs; /** * @{ @@ -585,99 +580,26 @@ struct AODToHepMC { namespace framework { -/** - * This specialisation of o2::framework::OutputManager ensures that - * we can call the post-processing routine of o2::eventgen::AODToHepMC - * and thus ensure that the possible HepMC is written to disk. - * - * The O2 framework (via o2::framework::adoptAnalysisTask) inspects - * the members of the passed class (@c T) and creates - * o2::framework::OutputManager callbacks for every member. The - * default template for this does nothing. - * - * Thus, to delegate a call to a member of the analysis task (of class - * @c T), we can specialise the @c o2::framework::OutputManager - * template on the @e member type. We will then effectively have - * call-backs for - * - * - @c appendOutput - when the task is constructed - * - @c prepare - when a new set of data is recieved - * - @c finalize - when a set of data has been processed - * - @c postRun - when the run is over - * - * Concretely, we use the @c postRun to flush the HepMC data file - * to disk. - * - * For this to work, the AODToHepMC object must be a member of the - * "Task" class, e.g., - * - * @code - * struct Task { - * o2::eventgen::AODToHepMC mConverter; - * ... - * }; - * - * WorkflowSpec defineDataProcessing(ConfigContext const& cfg) { - * return WorkflowSpec{adaptAnalysisTask(cfg)}; - * } - * @endcode - */ -template <> -struct OutputManager { - /** Type of the target */ - using Target = eventgen::AODToHepMC; - /** Called when task is constructed */ - static bool appendOutput(std::vector&, Target&, uint32_t) { return true; } - /** Called when new data is received */ - static bool prepare(ProcessingContext&, Target&) { return true; } - /** Called when all data has been received */ - static bool postRun(EndOfStreamContext&, Target& t) { return t.postRun(); } - /** Called when the job finishes */ - static bool finalize(ProcessingContext&, Target& t) { return true; } -}; - -/** - * Spacialisation to pull in configurables from the converter. - * - * Ideally, the converter should simply derive from ConfigurableGroup - * and all should flow automatically, but that doesn't work for some - * reason. - * - * For this to work, the AODToHepMC object must be a member of the - * "Task" class, e.g., - * - * @code - * struct Task { - * o2::eventgen::AODToHepMC mConverter; - * ... - * }; - * - * WorkflowSpec defineDataProcessing(ConfigContext const& cfg) { - * return WorkflowSpec{adaptAnalysisTask(cfg)}; - * } - * @endcode - */ -template <> -struct OptionManager { - /** type of the target */ - using Target = eventgen::AODToHepMC; - /** Called when the task is constructed */ - static bool - appendOption(std::vector& options, - Target& target) +struct AODToHepMCPostRun { + static AODToHepMCPostRun& instance() { - OptionManager::appendOption(options, target.configs); - return true; + static AODToHepMCPostRun inst{}; + return inst; } - /** Called when options are processed */ - static bool - prepare(o2::framework::InitContext& ic, Target& target) + + AODToHepMCPostRun(eventgen::AODToHepMC* ptr_ = nullptr) + : ptr{ptr_} { - OptionManager::prepare(ic, target.configs); - return true; } -}; + void endOfStream() { + if (ptr != nullptr) { + ptr->postRun(); + } + } + + eventgen::AODToHepMC* ptr = nullptr; +}; } // namespace framework } // namespace o2 diff --git a/run/o2aod_mc_to_hepmc.cxx b/run/o2aod_mc_to_hepmc.cxx index 8827a38c7ff72..73bf5b6475a22 100644 --- a/run/o2aod_mc_to_hepmc.cxx +++ b/run/o2aod_mc_to_hepmc.cxx @@ -49,9 +49,33 @@ struct AodToHepmc { /** Alias the converter type */ using Converter = o2::eventgen::AODToHepMC; + struct : o2::framework::ConfigurableGroup { + /** Option for dumping HepMC event structures to disk. Takes one + * argument - the name of the file to write to. */ + o2::framework::Configurable dump{"hepmc-dump", "", + "Dump HepMC event to output"}; + /** Option for only storing particles from the event generator. + * Note, if a particle is stored down, then its mothers will also + * be stored. */ + o2::framework::Configurable onlyGen{"hepmc-only-generated", false, + "Only export generated"}; + /** Use HepMC's tree parsing for building event structure */ + o2::framework::Configurable useTree{"hepmc-use-tree", false, + "Export as tree"}; + /** Floating point precision used when writing to disk */ + o2::framework::Configurable precision{"hepmc-precision", 8, + "Export precision in dump"}; + /** Recenter event at IP=(0,0,0,0). */ + o2::framework::Configurable recenter{"hepmc-recenter", false, + "Recenter the events at (0,0,0,0)"}; + } configs; + /** Our converter */ Converter mConverter; + /** Post-run trigger service **/ + o2::framework::Service trigger; + /** @{ * @name Container types */ /** Alias converter header table type */ @@ -75,9 +99,11 @@ struct AodToHepmc { /** @} */ /** Initialize the job */ - void init(o2::framework::InitContext& ic) + void init(o2::framework::InitContext&) { + mConverter.configs = {(std::string)configs.dump, (bool)configs.onlyGen, (bool)configs.useTree, (int)configs.precision, (bool)configs.recenter}; mConverter.init(); + trigger->ptr = &mConverter; } /** Processing of event to extract extra HepMC information * From f4de6bbb87a17d7ce45c2a711d65cf5392331547 Mon Sep 17 00:00:00 2001 From: ALICE Action Bot Date: Mon, 10 Feb 2025 10:41:03 +0000 Subject: [PATCH 0073/1914] Please consider the following formatting changes --- Generators/include/Generators/AODToHepMC.h | 3 ++- run/o2aod_mc_to_hepmc.cxx | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Generators/include/Generators/AODToHepMC.h b/Generators/include/Generators/AODToHepMC.h index 426f9c223410b..aef738455d9ad 100644 --- a/Generators/include/Generators/AODToHepMC.h +++ b/Generators/include/Generators/AODToHepMC.h @@ -592,7 +592,8 @@ struct AODToHepMCPostRun { { } - void endOfStream() { + void endOfStream() + { if (ptr != nullptr) { ptr->postRun(); } diff --git a/run/o2aod_mc_to_hepmc.cxx b/run/o2aod_mc_to_hepmc.cxx index 73bf5b6475a22..230e3247821cf 100644 --- a/run/o2aod_mc_to_hepmc.cxx +++ b/run/o2aod_mc_to_hepmc.cxx @@ -53,21 +53,21 @@ struct AodToHepmc { /** Option for dumping HepMC event structures to disk. Takes one * argument - the name of the file to write to. */ o2::framework::Configurable dump{"hepmc-dump", "", - "Dump HepMC event to output"}; + "Dump HepMC event to output"}; /** Option for only storing particles from the event generator. * Note, if a particle is stored down, then its mothers will also * be stored. */ o2::framework::Configurable onlyGen{"hepmc-only-generated", false, - "Only export generated"}; + "Only export generated"}; /** Use HepMC's tree parsing for building event structure */ o2::framework::Configurable useTree{"hepmc-use-tree", false, - "Export as tree"}; + "Export as tree"}; /** Floating point precision used when writing to disk */ o2::framework::Configurable precision{"hepmc-precision", 8, - "Export precision in dump"}; + "Export precision in dump"}; /** Recenter event at IP=(0,0,0,0). */ o2::framework::Configurable recenter{"hepmc-recenter", false, - "Recenter the events at (0,0,0,0)"}; + "Recenter the events at (0,0,0,0)"}; } configs; /** Our converter */ From 5f51fc707f38a17c541ba6d5891274a964537fbb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Feb 2025 09:42:00 +0100 Subject: [PATCH 0074/1914] GPU TPC: Add dEdxClusterRejectionFlagMask option --- GPU/GPUTracking/DataTypes/GPUSettings.h | 1 + GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index eff95e07fe09d..afde8d4128bab 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -17,6 +17,7 @@ #include "GPUCommonDef.h" #include "GPUDataTypes.h" +#include "GPUTPCGMMergedTrackHit.h" #ifndef GPUCA_GPUCODE_DEVICE #include #include diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 2039638c81b9a..fb1a12da994da 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -26,6 +26,7 @@ struct GPUTPCGMMergedTrackHit { uint8_t slice, row, leg, state; // NOTE: the lower states must match those from ClusterNative! + // TODO: take them directly from clusterNative header. enum hitState { flagSplitPad = 0x1, flagSplitTime = 0x2, flagSplit = 0x3, diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index cd2916f5df679..10bb4797a1c15 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -153,6 +153,7 @@ AddOptionRTC(dropSecondaryLegsInOutput, int8_t, 1, "", 0, "Do not store secondar AddOptionRTC(enablePID, int8_t, 1, "", 0, "Enable PID response") AddOptionRTC(PID_useNsigma, int8_t, 1, "", 0, "Use nSigma instead of absolute distance in PID response") AddOptionRTC(adddEdxSubThresholdClusters, int8_t, 1, "", 0, "Add sub threshold clusters in TPC dEdx computation") +AddOptionRTC(dEdxClusterRejectionFlagMask, int8_t, o2::gpu::GPUTPCGMMergedTrackHit::flagEdge, "", 0, "OR mask of TPC flags that will reject the cluster in dEdx") AddOptionRTC(rejectEdgeClustersInSeeding, int8_t, 0, "", 0, "Reject edge clusters based on uncorrected track Y during seeding") AddOptionRTC(rejectEdgeClustersInTrackFit, int8_t, 0, "", 0, "Reject edge clusters based on uncorrected track Y during track fit") AddOptionArray(PID_remap, int8_t, 9, (0, 1, 2, 3, 4, 5, 6, 7, 8), "", 0, "Remap Ipid to PID_reamp[Ipid] (no remap if<0)") // BUG: CUDA cannot yet hand AddOptionArrayRTC diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index bb450f1f4112f..790e911a1d865 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -366,7 +366,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg && !(clusterState & GPUTPCGMMergedTrackHit::flagEdge)) { // TODO: Costimize flag to remove, and option to remove double-clusters + if (param.par.dodEdx && param.dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg && (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0) { // TODO: Costimize flag to remove, and option to remove double-clusters float qtot = 0, qmax = 0, pad = 0, relTime = 0; const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { From 5fe7c0b0bbc4c8e430e0f062979958bd0f1d4ad5 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 12 Feb 2025 14:50:47 +0100 Subject: [PATCH 0075/1914] DPL Analysis: refactor Preslice; move some code out of line (#13901) --- Framework/Core/include/Framework/ASoA.h | 265 +++++++++--------- .../Core/include/Framework/AnalysisHelpers.h | 4 +- .../Core/include/Framework/AnalysisManagers.h | 58 ++-- Framework/Core/src/ASoA.cxx | 47 ++++ 4 files changed, 212 insertions(+), 162 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index be6329b48b7eb..f72d26e84d82f 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1389,76 +1389,69 @@ consteval static bool relatedBySortedIndex() namespace o2::framework { -template -struct PresliceBase { - constexpr static bool sorted = SORTED; + +struct PreslicePolicyBase { + const std::string binding; + StringPair bindingKey; + + bool isMissing() const; + StringPair const& getBindingKey() const; +}; + +struct PreslicePolicySorted : public PreslicePolicyBase { + void updateSliceInfo(SliceInfoPtr&& si); + + SliceInfoPtr sliceInfo; + std::shared_ptr getSliceFor(int value, std::shared_ptr const& input, uint64_t& offset) const; +}; + +struct PreslicePolicyGeneral : public PreslicePolicyBase { + void updateSliceInfo(SliceInfoUnsortedPtr&& si); + + SliceInfoUnsortedPtr sliceInfo; + gsl::span getSliceFor(int value) const; +}; + +template +struct PresliceBase : public Policy { constexpr static bool optional = OPT; using target_t = T; const std::string binding; PresliceBase(expressions::BindingNode index_) - : binding{o2::soa::getLabelFromTypeForKey(index_.name)}, - bindingKey{binding, index_.name} {} - - void updateSliceInfo(std::conditional_t&& si) + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { - sliceInfo = si; } std::shared_ptr getSliceFor(int value, std::shared_ptr const& input, uint64_t& offset) const { if constexpr (OPT) { - if (isMissing()) { + if (Policy::isMissing()) { return nullptr; } } - if constexpr (SORTED) { - auto [offset_, count] = sliceInfo.getSliceFor(value); - auto output = input->Slice(offset_, count); - offset = static_cast(offset_); - return output; - } else { - static_assert(SORTED, "Wrong method called for unsorted cache"); - } + return Policy::getSliceFor(value, input, offset); } gsl::span getSliceFor(int value) const { if constexpr (OPT) { - if (isMissing()) { + if (Policy::isMissing()) { return {}; } } - if constexpr (!SORTED) { - return sliceInfo.getSliceFor(value); - } else { - static_assert(!SORTED, "Wrong method called for sorted cache"); - } + return Policy::getSliceFor(value); } - - bool isMissing() const - { - return binding == "[MISSING]"; - } - - StringPair const& getBindingKey() const - { - return bindingKey; - } - - std::conditional_t sliceInfo; - - StringPair bindingKey; }; template -using PresliceUnsorted = PresliceBase; +using PresliceUnsorted = PresliceBase; template -using PresliceUnsortedOptional = PresliceBase; +using PresliceUnsortedOptional = PresliceBase; template -using Preslice = PresliceBase; +using Preslice = PresliceBase; template -using PresliceOptional = PresliceBase; +using PresliceOptional = PresliceBase; } // namespace o2::framework @@ -1497,96 +1490,84 @@ static consteval auto extractBindings(framework::pack) SelectionVector selectionToVector(gandiva::Selection const& sel); -template -auto doSliceBy(T const* table, o2::framework::PresliceBase const& container, int value) +template + requires std::same_as && (o2::soa::is_binding_compatible_v()) +auto doSliceBy(T const* table, o2::framework::PresliceBase const& container, int value) { - if constexpr (o2::soa::is_binding_compatible_v()) { - if constexpr (OPT) { - if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); - } - } - if constexpr (SORTED) { - uint64_t offset = 0; - auto out = container.getSliceFor(value, table->asArrowTable(), offset); - auto t = typename T::self_t({out}, offset); - table->copyIndexBindings(t); - t.bindInternalIndicesTo(table); - return t; - } else { - auto selection = container.getSliceFor(value); - if constexpr (soa::is_filtered_table) { - auto t = soa::Filtered({table->asArrowTable()}, selection); - table->copyIndexBindings(t); - t.bindInternalIndicesTo(table); - t.intersectWithSelection(table->getSelectedRows()); // intersect filters - return t; - } else { - auto t = soa::Filtered({table->asArrowTable()}, selection); - table->copyIndexBindings(t); - t.bindInternalIndicesTo(table); - return t; - } + if constexpr (OPT) { + if (container.isMissing()) { + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } - } else { - if constexpr (SORTED) { - static_assert(o2::framework::always_static_assert_v, "Wrong Preslice<> entry used: incompatible type"); - } else { - static_assert(o2::framework::always_static_assert_v, "Wrong PresliceUnsorted<> entry used: incompatible type"); + } + uint64_t offset = 0; + auto out = container.getSliceFor(value, table->asArrowTable(), offset); + auto t = typename T::self_t({out}, offset); + table->copyIndexBindings(t); + t.bindInternalIndicesTo(table); + return t; +} + +template +auto doSliceByHelper(T const* table, gsl::span const& selection) +{ + auto t = soa::Filtered({table->asArrowTable()}, selection); + table->copyIndexBindings(t); + t.bindInternalIndicesTo(table); + t.intersectWithSelection(table->getSelectedRows()); // intersect filters + return t; +} + +template + requires(!soa::is_filtered_table) +auto doSliceByHelper(T const* table, gsl::span const& selection) +{ + auto t = soa::Filtered({table->asArrowTable()}, selection); + table->copyIndexBindings(t); + t.bindInternalIndicesTo(table); + return t; +} + +template + requires std::same_as && (o2::soa::is_binding_compatible_v()) +auto doSliceBy(T const* table, o2::framework::PresliceBase const& container, int value) +{ + if constexpr (OPT) { + if (container.isMissing()) { + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } } + auto selection = container.getSliceFor(value); + return doSliceByHelper(table, selection); } -template +SelectionVector sliceSelection(gsl::span const& mSelectedRows, int64_t nrows, uint64_t offset); + +template auto prepareFilteredSlice(T const* table, std::shared_ptr slice, uint64_t offset) { if (offset >= static_cast(table->tableSize())) { - if constexpr (soa::is_filtered_table) { - Filtered fresult{{{slice}}, SelectionVector{}, 0}; - table->copyIndexBindings(fresult); - return fresult; - } else { - typename T::self_t fresult{{{slice}}, SelectionVector{}, 0}; - table->copyIndexBindings(fresult); - return fresult; - } - } - auto start = offset; - auto end = start + slice->num_rows(); - auto mSelectedRows = table->getSelectedRows(); - auto start_iterator = std::lower_bound(mSelectedRows.begin(), mSelectedRows.end(), start); - auto stop_iterator = std::lower_bound(start_iterator, mSelectedRows.end(), end); - SelectionVector slicedSelection{start_iterator, stop_iterator}; - std::transform(slicedSelection.begin(), slicedSelection.end(), slicedSelection.begin(), - [&start](int64_t idx) { - return idx - static_cast(start); - }); - if constexpr (soa::is_filtered_table) { - Filtered fresult{{{slice}}, std::move(slicedSelection), start}; - table->copyIndexBindings(fresult); - return fresult; - } else { - typename T::self_t fresult{{{slice}}, std::move(slicedSelection), start}; + Filtered fresult{{{slice}}, SelectionVector{}, 0}; table->copyIndexBindings(fresult); return fresult; } + auto slicedSelection = sliceSelection(table->getSelectedRows(), slice->num_rows(), offset); + Filtered fresult{{{slice}}, std::move(slicedSelection), offset}; + table->copyIndexBindings(fresult); + return fresult; } -template -auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase const& container, int value) +template + requires(o2::soa::is_binding_compatible_v()) +auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase const& container, int value) { - if constexpr (o2::soa::is_binding_compatible_v()) { - if constexpr (OPT) { - if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.second.c_str()); - } + if constexpr (OPT) { + if (container.isMissing()) { + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.second.c_str()); } - uint64_t offset = 0; - auto slice = container.getSliceFor(value, table->asArrowTable(), offset); - return prepareFilteredSlice(table, slice, offset); - } else { - static_assert(o2::framework::always_static_assert_v, "Wrong Preslice<> entry used: incompatible type"); } + uint64_t offset = 0; + auto slice = container.getSliceFor(value, table->asArrowTable(), offset); + return prepareFilteredSlice(table, slice, offset); } template @@ -2099,8 +2080,8 @@ class Table return doSliceByCachedUnsorted(this, node, value, cache); } - template - auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const { return doSliceBy(this, container, value); } @@ -3201,8 +3182,8 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. return doSliceByCachedUnsorted(this, node, value, cache); } - template - auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const { return doSliceBy(this, container, value); } @@ -3463,14 +3444,16 @@ class FilteredBase : public T return doSliceByCachedUnsorted(this, node, value, cache); } - template - auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const { - if constexpr (SORTED) { - return doFilteredSliceBy(this, container, value); - } else { - return doSliceBy(this, container, value); - } + return doFilteredSliceBy(this, container, value); + } + + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const + { + return doSliceBy(this, container, value); } auto select(framework::expressions::Filter const& f) const @@ -3697,14 +3680,16 @@ class Filtered : public FilteredBase return doSliceByCachedUnsorted(this, node, value, cache); } - template - auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const { - if constexpr (SORTED) { - return doFilteredSliceBy(this, container, value); - } else { - return doSliceBy(this, container, value); - } + return doFilteredSliceBy(this, container, value); + } + + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const + { + return doSliceBy(this, container, value); } auto select(framework::expressions::Filter const& f) const @@ -3864,14 +3849,16 @@ class Filtered> : public FilteredBase return doSliceByCachedUnsorted(this, node, value, cache); } - template - auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const { - if constexpr (SORTED) { - return doFilteredSliceBy(this, container, value); - } else { - return doSliceBy(this, container, value); - } + return doFilteredSliceBy(this, container, value); + } + + template + auto sliceBy(o2::framework::PresliceBase const& container, int value) const + { + return doSliceBy(this, container, value); } private: diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index d84c9714b2f30..bb7e5e14aaa75 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -652,8 +652,8 @@ struct Partition { return mFiltered->sliceByCachedUnsorted(node, value, cache); } - template - [[nodiscard]] auto sliceBy(o2::framework::PresliceBase const& container, int value) const + template + [[nodiscard]] auto sliceBy(o2::framework::PresliceBase const& container, int value) const { return mFiltered->sliceBy(container, value); } diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index e0dd21708e841..30ebf1799b227 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -645,44 +645,60 @@ struct PresliceManager { } }; -template -struct PresliceManager> { - static bool registerCache(PresliceBase& container, std::vector& bsks, std::vector& bsksU) +template +struct PresliceManager> { + static bool registerCache(PresliceBase& container, std::vector& bsks, std::vector&) + requires std::same_as { if constexpr (OPT) { if (container.binding == "[MISSING]") { return true; } } - if constexpr (SORTED) { - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); - if (locate == bsks.end()) { - bsks.emplace_back(container.getBindingKey()); - } - return true; - } else { - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); - if (locate == bsksU.end()) { - bsksU.emplace_back(container.getBindingKey()); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); + if (locate == bsks.end()) { + bsks.emplace_back(container.getBindingKey()); + } + return true; + } + + static bool registerCache(PresliceBase& container, std::vector&, std::vector& bsksU) + requires std::same_as + { + if constexpr (OPT) { + if (container.binding == "[MISSING]") { + return true; } - return true; } + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); + if (locate == bsksU.end()) { + bsksU.emplace_back(container.getBindingKey()); + } + return true; } - static bool updateSliceInfo(PresliceBase& container, ArrowTableSlicingCache& cache) + static bool updateSliceInfo(PresliceBase& container, ArrowTableSlicingCache& cache) + requires std::same_as { if constexpr (OPT) { if (container.binding == "[MISSING]") { return true; } } - if constexpr (SORTED) { - container.updateSliceInfo(cache.getCacheFor(container.getBindingKey())); - return true; - } else { - container.updateSliceInfo(cache.getCacheUnsortedFor(container.getBindingKey())); - return true; + container.updateSliceInfo(cache.getCacheFor(container.getBindingKey())); + return true; + } + + static bool updateSliceInfo(PresliceBase& container, ArrowTableSlicingCache& cache) + requires std::same_as + { + if constexpr (OPT) { + if (container.binding == "[MISSING]") { + return true; + } } + container.updateSliceInfo(cache.getCacheUnsortedFor(container.getBindingKey())); + return true; } }; } // namespace o2::framework diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 8f509ea17d2ba..810398747de88 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -50,6 +50,20 @@ SelectionVector selectionToVector(gandiva::Selection const& sel) return rows; } +SelectionVector sliceSelection(gsl::span const& mSelectedRows, int64_t nrows, uint64_t offset) +{ + auto start = offset; + auto end = start + nrows; + auto start_iterator = std::lower_bound(mSelectedRows.begin(), mSelectedRows.end(), start); + auto stop_iterator = std::lower_bound(start_iterator, mSelectedRows.end(), end); + SelectionVector slicedSelection{start_iterator, stop_iterator}; + std::transform(slicedSelection.begin(), slicedSelection.end(), slicedSelection.begin(), + [&start](int64_t idx) { + return idx - static_cast(start); + }); + return slicedSelection; +} + std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables) { if (tables.size() == 1) { @@ -177,4 +191,37 @@ std::string strToUpper(std::string&& str) std::transform(str.begin(), str.end(), str.begin(), [](unsigned char c) { return std::toupper(c); }); return str; } + +bool PreslicePolicyBase::isMissing() const +{ + return binding == "[MISSING]"; +} + +StringPair const& PreslicePolicyBase::getBindingKey() const +{ + return bindingKey; +} + +void PreslicePolicySorted::updateSliceInfo(SliceInfoPtr&& si) +{ + sliceInfo = si; +} + +void PreslicePolicyGeneral::updateSliceInfo(SliceInfoUnsortedPtr&& si) +{ + sliceInfo = si; +} + +std::shared_ptr PreslicePolicySorted::getSliceFor(int value, std::shared_ptr const& input, uint64_t& offset) const +{ + auto [offset_, count] = this->sliceInfo.getSliceFor(value); + auto output = input->Slice(offset_, count); + offset = static_cast(offset_); + return output; +} + +gsl::span PreslicePolicyGeneral::getSliceFor(int value) const +{ + return this->sliceInfo.getSliceFor(value); +} } // namespace o2::framework From 81c73e4c108642957ed08b762daf6662bea1554f Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 4 Feb 2025 14:04:11 +0100 Subject: [PATCH 0076/1914] GLOQC: revert changes to mc histos @shahor02 this should fix the crash observed in MC, there the PtBin is set to 0 thus leading to inf. --- Detectors/GLOQC/src/MatchITSTPCQC.cxx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Detectors/GLOQC/src/MatchITSTPCQC.cxx b/Detectors/GLOQC/src/MatchITSTPCQC.cxx index 121df5e134817..86de9cd9c056e 100644 --- a/Detectors/GLOQC/src/MatchITSTPCQC.cxx +++ b/Detectors/GLOQC/src/MatchITSTPCQC.cxx @@ -386,11 +386,11 @@ bool MatchITSTPCQC::init() mPhiPhysPrimDen[i]->Sumw2(); mFractionITSTPCmatchPhysPrim[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs Pt (physical primary), wrt %s tracks %s; Pt [GeV/c]; Eff", title[i].c_str(), etaSel[i].c_str()), nbinsPt, xbinsPt); - m1OverPtPhysPrimNum[i] = new TH1D(Form("m1OverPtPhysPrimNum_%s", title[i].c_str()), Form("1/Pt distribution of matched tracks (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); + m1OverPtPhysPrimNum[i] = new TH1D(Form("m1OverPtPhysPrimNum_%s", title[i].c_str()), Form("1/Pt distribution of matched tracks (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -20., 20.); m1OverPtPhysPrimNum[i]->Sumw2(); - m1OverPtPhysPrimDen[i] = new TH1D(Form("m1OverPtPhysPrimDen_%s", title[i].c_str()), Form("1/PtPt distribution of %s tracks (physical primary) %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); + m1OverPtPhysPrimDen[i] = new TH1D(Form("m1OverPtPhysPrimDen_%s", title[i].c_str()), Form("1/PtPt distribution of %s tracks (physical primary) %s; 1/Pt [c/GeV]; dNd1/Pt", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -20., 20.); m1OverPtPhysPrimDen[i]->Sumw2(); - mFractionITSTPCmatchPhysPrim1OverPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim1OverPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs 1/Pt (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; Eff", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -1. / mPtCut, 1. / mPtCut); + mFractionITSTPCmatchPhysPrim1OverPt[i] = new TEfficiency(Form("mFractionITSTPCmatchPhysPrim1OverPt_%s", title[i].c_str()), Form("Fraction of ITSTPC matched tracks vs 1/Pt (physical primary), wrt %s tracks %s; 1/Pt [c/GeV]; Eff", title[i].c_str(), etaSel[i].c_str()), 2 * mPtBins, -20., 20.); } } From 28d9c7622ec3429fb306df2499fa2c21363c28b6 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 28 Jan 2025 22:41:55 +0100 Subject: [PATCH 0077/1914] [MUON] use track time in global muon matching Added option for using the track time when selecting the MFT candidates to be matched with a given MCH track. This helps to reduce the number of candidates when the MCH track is already matched with MID and therefore has a precise timing. In this case the number of MCH tracks that need to be combined with MFT tracks from two adjacent ROFs is strongly reduced. --- .../include/GlobalTracking/MatchGlobalFwd.h | 1 + .../include/GlobalTracking/MatchGlobalFwdParam.h | 1 + Detectors/GlobalTracking/src/MatchGlobalFwd.cxx | 14 ++++++++++++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwd.h b/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwd.h index c3de0cdc74ed3..3445123385cfa 100644 --- a/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwd.h +++ b/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwd.h @@ -339,6 +339,7 @@ class MatchGlobalFwd o2::itsmft::ChipMappingMFT mMFTMapping; bool mMCTruthON = false; ///< Flag availability of MC truth bool mUseMIDMCHMatch = false; ///< Flag for using MCHMID matches (TrackMCHMID) + bool mUseTrackTime = false; ///< Flag for using the MCH or MCHMID track time information to select the MFT ROF(s) int mSaveMode = 0; ///< Output mode [0 = SaveBestMatch; 1 = SaveAllMatches; 2 = SaveTrainingData; 3 = SaveNCandidates] int mNCandidates = 5; ///< Numbers of matching candidates to save in savemode=3 MatchingType mMatchingType = MATCHINGUNDEFINED; diff --git a/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwdParam.h b/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwdParam.h index 5633decb80985..757d74ff40780 100644 --- a/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwdParam.h +++ b/Detectors/GlobalTracking/include/GlobalTracking/MatchGlobalFwdParam.h @@ -39,6 +39,7 @@ struct GlobalFwdMatchingParam : public o2::conf::ConfigurableParamHelper mMCHID2Work[MCHId]:" << mMCHID2Work[MCHId]; const auto& IR = MIDMatch.getIR(); int nBC = IR.differenceInBC(mStartIR); - float tMin = nBC * o2::constants::lhc::LHCBunchSpacingMUS; - float tMax = (nBC + 1) * o2::constants::lhc::LHCBunchSpacingMUS; + float tMin = (nBC - 1) * o2::constants::lhc::LHCBunchSpacingMUS; + float tMax = (nBC + 2) * o2::constants::lhc::LHCBunchSpacingMUS; thisMuonTrack.setMIDTrackID(MIDId); thisMuonTrack.setTimeMUS(MIDMatch.getTimeMUS(mStartIR).first); thisMuonTrack.tBracket.set(tMin, tMax); @@ -435,6 +438,7 @@ void MatchGlobalFwd::ROFMatch(int MFTROFId, int firstMCHROFId, int lastMCHROFId) { /// Matches MFT tracks on a given ROF with MCH tracks in a range of ROFs const auto& thisMFTROF = mMFTTrackROFRec[MFTROFId]; + const auto& thisMFTBracket = mMFTROFTimes[MFTROFId]; const auto& firstMCHROF = mMCHTrackROFRec[firstMCHROFId]; const auto& lastMCHROF = mMCHTrackROFRec[lastMCHROFId]; int nFakes = 0, nTrue = 0; @@ -464,6 +468,12 @@ void MatchGlobalFwd::ROFMatch(int MFTROFId, int firstMCHROFId, int lastMCHROFId) // loop over all MCH tracks for (auto MCHId = firstMCHTrackID; MCHId <= lastMCHTrackID; MCHId++) { auto& thisMCHTrack = mMCHWork[MCHId]; + + // If enabled, use the muon track time to check if the track is correlated with the MFT ROF + if (mUseTrackTime && (thisMFTBracket.isOutside(thisMCHTrack.tBracket))) { + continue; + } + o2::MCCompLabel matchLabel; for (auto MFTId = firstMFTTrackID; MFTId <= lastMFTTrackID; MFTId++) { auto& thisMFTTrack = mMFTWork[MFTId]; From 97c3e517174645253f85e8b94f5b730e6feec519 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 14 Feb 2025 12:23:39 +0100 Subject: [PATCH 0078/1914] DPL: add support for decompressing directly to shared memory (#13962) This PR postpones the read operations which would usually populate an intermediate RecordBatch and it performs them directly on its subsequent shared memory serialization. Doing so avoids having the intermediate representation allocate most of the memory. For the moment this is only done for the TTree plugin. RNtuple support will come in a subsequent PR. --- .../AnalysisSupport/src/RNTuplePlugin.cxx | 6 +- Framework/AnalysisSupport/src/TTreePlugin.cxx | 633 +++++++++++++----- .../include/Framework/RootArrowFilesystem.h | 6 + Framework/Core/src/RootArrowFilesystem.cxx | 6 + Framework/Core/test/test_Root2ArrowTable.cxx | 64 +- 5 files changed, 542 insertions(+), 173 deletions(-) diff --git a/Framework/AnalysisSupport/src/RNTuplePlugin.cxx b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx index 51b585d0714bb..a910964e6527c 100644 --- a/Framework/AnalysisSupport/src/RNTuplePlugin.cxx +++ b/Framework/AnalysisSupport/src/RNTuplePlugin.cxx @@ -12,6 +12,7 @@ #include "Framework/RuntimeError.h" #include "Framework/RootArrowFilesystem.h" #include "Framework/Plugins.h" +#include "Framework/FairMQResizableBuffer.h" #include #include #include @@ -852,7 +853,10 @@ struct RNTupleObjectReadingImplementation : public RootArrowFactoryPlugin { return new RootArrowFactory{ .options = [context]() { return context->format->DefaultWriteOptions(); }, .format = [context]() { return context->format; }, - }; + .deferredOutputStreamer = [](std::shared_ptr fragment, const std::shared_ptr& buffer) -> std::shared_ptr { + auto treeFragment = std::dynamic_pointer_cast(fragment); + return std::make_shared(buffer); + }}; } }; diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 4b130a2144253..881f7d6edc117 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -13,10 +13,15 @@ #include "Framework/Plugins.h" #include "Framework/Signpost.h" #include "Framework/Endian.h" +#include +#include +#include #include #include +#include #include #include +#include #include #include #include @@ -26,13 +31,278 @@ #include #include #include +#include +#include #include +#include +#include O2_DECLARE_DYNAMIC_LOG(root_arrow_fs); namespace o2::framework { +enum struct ReadOpKind { + Unknown, + Offsets, + Values, + Booleans, + VLA +}; + +struct ReadOps { + TBranch* branch = nullptr; + std::shared_ptr targetBuffer = nullptr; + int64_t rootBranchEntries = 0; + size_t typeSize = 0; + size_t listSize = 0; + // If this is an offset reading op, keep track of the actual + // range for the offsets, not only how many VLAs are there. + int64_t offsetCount = 0; + ReadOpKind kind = ReadOpKind::Unknown; +}; + +/// An OutputStream which does the reading of the input buffers directly +/// on writing, if needed. Each deferred operation is encoded in the source +/// buffer by an incremental number which can be used to lookup in the @a ops +/// vector the operation to perform. +class TTreeDeferredReadOutputStream : public arrow::io::OutputStream +{ + public: + explicit TTreeDeferredReadOutputStream(std::vector& ops, + const std::shared_ptr& buffer); + + /// \brief Create in-memory output stream with indicated capacity using a + /// memory pool + /// \param[in] initial_capacity the initial allocated internal capacity of + /// the OutputStream + /// \param[in,out] pool a MemoryPool to use for allocations + /// \return the created stream + static arrow::Result> Create( + std::vector& ops, + int64_t initial_capacity = 4096, + arrow::MemoryPool* pool = arrow::default_memory_pool()); + + // By the time we call the destructor, the contents + // of the buffer are already moved to fairmq + // for being sent. + ~TTreeDeferredReadOutputStream() override = default; + + // Implement the OutputStream interface + + /// Close the stream, preserving the buffer (retrieve it with Finish()). + arrow::Status Close() override; + [[nodiscard]] bool closed() const override; + [[nodiscard]] arrow::Result Tell() const override; + arrow::Status Write(const void* data, int64_t nbytes) override; + + /// \cond FALSE + using OutputStream::Write; + /// \endcond + + /// Close the stream and return the buffer + arrow::Result> Finish(); + + /// \brief Initialize state of OutputStream with newly allocated memory and + /// set position to 0 + /// \param[in] initial_capacity the starting allocated capacity + /// \param[in,out] pool the memory pool to use for allocations + /// \return Status + arrow::Status Reset(std::vector ops, + int64_t initial_capacity, arrow::MemoryPool* pool); + + [[nodiscard]] int64_t capacity() const { return capacity_; } + + private: + TTreeDeferredReadOutputStream(); + std::vector ops_; + + // Ensures there is sufficient space available to write nbytes + arrow::Status Reserve(int64_t nbytes); + + std::shared_ptr buffer_; + bool is_open_; + int64_t capacity_; + int64_t position_; + uint8_t* mutable_data_; +}; + +static constexpr int64_t kBufferMinimumSize = 256; + +TTreeDeferredReadOutputStream::TTreeDeferredReadOutputStream() + : is_open_(false), capacity_(0), position_(0), mutable_data_(nullptr) {} + +TTreeDeferredReadOutputStream::TTreeDeferredReadOutputStream(std::vector& ops, + const std::shared_ptr& buffer) + : ops_(ops), + buffer_(buffer), + is_open_(true), + capacity_(buffer->size()), + position_(0), + mutable_data_(buffer->mutable_data()) {} + +arrow::Result> TTreeDeferredReadOutputStream::Create( + std::vector& ops, + int64_t initial_capacity, arrow::MemoryPool* pool) +{ + // ctor is private, so cannot use make_shared + auto ptr = std::shared_ptr(new TTreeDeferredReadOutputStream); + RETURN_NOT_OK(ptr->Reset(ops, initial_capacity, pool)); + return ptr; +} + +arrow::Status TTreeDeferredReadOutputStream::Reset(std::vector ops, + int64_t initial_capacity, arrow::MemoryPool* pool) +{ + ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(initial_capacity, pool)); + ops_ = ops; + is_open_ = true; + capacity_ = initial_capacity; + position_ = 0; + mutable_data_ = buffer_->mutable_data(); + return arrow::Status::OK(); +} + +arrow::Status TTreeDeferredReadOutputStream::Close() +{ + if (is_open_) { + is_open_ = false; + if (position_ < capacity_) { + RETURN_NOT_OK(buffer_->Resize(position_, false)); + } + } + return arrow::Status::OK(); +} + +bool TTreeDeferredReadOutputStream::closed() const { return !is_open_; } + +arrow::Result> TTreeDeferredReadOutputStream::Finish() +{ + RETURN_NOT_OK(Close()); + buffer_->ZeroPadding(); + is_open_ = false; + return std::move(buffer_); +} + +arrow::Result TTreeDeferredReadOutputStream::Tell() const { return position_; } + +auto readValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) { + int readEntries = 0; + rootBuffer.Reset(); + while (readEntries < op.rootBranchEntries) { + auto readLast = op.branch->GetBulkRead().GetEntriesSerialized(readEntries, rootBuffer); + if (readLast < 0) { + throw runtime_error_f("Error while reading branch %s starting from %zu.", op.branch->GetName(), readEntries); + } + int size = readLast * op.listSize; + readEntries += readLast; + swapCopy(target, rootBuffer.GetCurrent(), size, op.typeSize); + target += (ptrdiff_t)(size * op.typeSize); + } +}; + +auto readBoolValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) { + int readEntries = 0; + rootBuffer.Reset(); + // Set to 0 + memset(target, 0, op.targetBuffer->size()); + int readLast = 0; + while (readEntries < op.rootBranchEntries) { + auto beginValue = readLast; + auto readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); + int size = readLast * op.listSize; + readEntries += readLast; + for (int i = beginValue; i < beginValue + size; ++i) { + auto value = static_cast(rootBuffer.GetCurrent()[i - beginValue] << (i % 8)); + target[i / 8] |= value; + } + } +}; + +auto readVLAValues = [](uint8_t* target, ReadOps& op, ReadOps const& offsetOp, TBufferFile& rootBuffer) { + int readEntries = 0; + auto* tPtrOffset = reinterpret_cast(offsetOp.targetBuffer->data()); + std::span const offsets{tPtrOffset, tPtrOffset + offsetOp.rootBranchEntries + 1}; + + rootBuffer.Reset(); + while (readEntries < op.rootBranchEntries) { + auto readLast = op.branch->GetBulkRead().GetEntriesSerialized(readEntries, rootBuffer); + int size = offsets[readEntries + readLast] - offsets[readEntries]; + readEntries += readLast; + swapCopy(target, rootBuffer.GetCurrent(), size, op.typeSize); + target += (ptrdiff_t)(size * op.typeSize); + } +}; + +TBufferFile& rootBuffer() +{ + // FIXME: we will need more than one once we have multithreaded reading. + static TBufferFile rootBuffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; + return rootBuffer; +} + +arrow::Status TTreeDeferredReadOutputStream::Write(const void* data, int64_t nbytes) +{ + if (ARROW_PREDICT_FALSE(!is_open_)) { + return arrow::Status::IOError("OutputStream is closed"); + } + if (ARROW_PREDICT_TRUE(nbytes == 0)) { + return arrow::Status::OK(); + } + if (ARROW_PREDICT_FALSE(position_ + nbytes >= capacity_)) { + RETURN_NOT_OK(Reserve(nbytes)); + } + // This is a real address which needs to be copied. Do it! + auto ref = (int64_t)data; + if (ref >= ops_.size()) { + memcpy(mutable_data_ + position_, data, nbytes); + position_ += nbytes; + return arrow::Status::OK(); + } + auto& op = ops_[ref]; + + switch (op.kind) { + // Offsets need to be read in advance because we need to know + // how many elements are there in total (since TTree does not allow discovering such informantion) + case ReadOpKind::Offsets: + break; + case ReadOpKind::Values: + readValues(mutable_data_ + position_, op, rootBuffer()); + break; + case ReadOpKind::VLA: + readVLAValues(mutable_data_ + position_, op, ops_[ref - 1], rootBuffer()); + break; + case ReadOpKind::Booleans: + readBoolValues(mutable_data_ + position_, op, rootBuffer()); + break; + case ReadOpKind::Unknown: + throw runtime_error("Unknown Op"); + } + op.branch->SetStatus(false); + op.branch->DropBaskets("all"); + op.branch->Reset(); + op.branch->GetTransientBuffer(0)->Expand(0); + + position_ += nbytes; + return arrow::Status::OK(); +} + +arrow::Status TTreeDeferredReadOutputStream::Reserve(int64_t nbytes) +{ + // Always overallocate by doubling. It seems that it is a better growth + // strategy, at least for memory_benchmark.cc. + // This may be because it helps match the allocator's allocation buckets + // more exactly. Or perhaps it hits a sweet spot in jemalloc. + int64_t new_capacity = std::max(kBufferMinimumSize, capacity_); + new_capacity = position_ + nbytes; + if (new_capacity > capacity_) { + RETURN_NOT_OK(buffer_->Resize(new_capacity)); + capacity_ = new_capacity; + mutable_data_ = buffer_->mutable_data(); + } + return arrow::Status::OK(); +} + class TTreeFileWriteOptions : public arrow::dataset::FileWriteOptions { public: @@ -174,8 +444,21 @@ class TTreeFileFragment : public arrow::dataset::FileFragment return mTree.get(); } + std::vector& ops() + { + return mOps; + } + + /// The pointer to each allocation is an incremental number, indexing a collection to track + /// the size of each allocation. + std::shared_ptr GetPlaceholderForOp(size_t size) + { + return std::make_shared((uint8_t*)(mOps.size() - 1), size); + } + private: std::unique_ptr mTree; + std::vector mOps; }; // An arrow outputstream which allows to write to a TTree. Eventually @@ -246,6 +529,9 @@ bool TTreeOutputStream::closed() const TBranch* TTreeOutputStream::CreateBranch(char const* branchName, char const* sizeBranch) { + if (mBranchPrefix.empty() == true) { + return mTree->Branch(branchName, (char*)nullptr, sizeBranch); + } return mTree->Branch((mBranchPrefix + "/" + branchName).c_str(), (char*)nullptr, (mBranchPrefix + sizeBranch).c_str()); } @@ -263,7 +549,10 @@ struct TTreeObjectReadingImplementation : public RootArrowFactoryPlugin { return new RootArrowFactory{ .options = [context]() { return context->format->DefaultWriteOptions(); }, .format = [context]() { return context->format; }, - }; + .deferredOutputStreamer = [](std::shared_ptr fragment, const std::shared_ptr& buffer) -> std::shared_ptr { + auto treeFragment = std::dynamic_pointer_cast(fragment); + return std::make_shared(treeFragment->ops(), buffer); + }}; } }; @@ -273,10 +562,36 @@ struct BranchFieldMapping { int datasetFieldIdx; }; +auto readOffsets = [](ReadOps& op, TBufferFile& rootBuffer) { + uint32_t offset = 0; + std::span offsets; + int readEntries = 0; + int count = 0; + auto* tPtrOffset = reinterpret_cast(op.targetBuffer->mutable_data()); + offsets = std::span{tPtrOffset, tPtrOffset + op.rootBranchEntries + 1}; + + // read sizes first + rootBuffer.Reset(); + while (readEntries < op.rootBranchEntries) { + auto readLast = op.branch->GetBulkRead().GetEntriesSerialized(readEntries, rootBuffer); + if (readLast == -1) { + throw runtime_error_f("Unable to read from branch %s.", op.branch->GetName()); + } + readEntries += readLast; + for (auto i = 0; i < readLast; ++i) { + offsets[count++] = (int)offset; + offset += swap32_(reinterpret_cast(rootBuffer.GetCurrent())[i]); + } + } + offsets[count] = (int)offset; + op.offsetCount = offset; +}; + arrow::Result TTreeFileFormat::ScanBatchesAsync( const std::shared_ptr& options, const std::shared_ptr& fragment) const { + assert(options->dataset_schema != nullptr); // This is the schema we want to read auto dataset_schema = options->dataset_schema; auto treeFragment = std::dynamic_pointer_cast(fragment); @@ -286,6 +601,8 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( auto generator = [pool = options->pool, treeFragment, dataset_schema, &totalCompressedSize = mTotCompressedSize, &totalUncompressedSize = mTotUncompressedSize]() -> arrow::Future> { + O2_SIGNPOST_ID_FROM_POINTER(tid, root_arrow_fs, treeFragment->GetTree()); + O2_SIGNPOST_START(root_arrow_fs, tid, "Generator", "Creating batch for tree %{public}s", treeFragment->GetTree()->GetName()); std::vector> columns; std::vector> fields = dataset_schema->fields(); auto physical_schema = *treeFragment->ReadPhysicalSchema(); @@ -297,201 +614,170 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( // Register physical fields into the cache std::vector mappings; + // We need to count the number of readops to avoid moving the vector. + int opsCount = 0; for (int fi = 0; fi < dataset_schema->num_fields(); ++fi) { auto dataset_field = dataset_schema->field(fi); + // This is needed because for now the dataset_field + // is actually the schema of the ttree + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Processing dataset field %{public}s.", dataset_field->name().c_str()); int physicalFieldIdx = physical_schema->GetFieldIndex(dataset_field->name()); if (physicalFieldIdx < 0) { - throw runtime_error_f("Cannot find physical field associated to %s", dataset_field->name().c_str()); + throw runtime_error_f("Cannot find physical field associated to %s. Possible fields: %s", + dataset_field->name().c_str(), physical_schema->ToString().c_str()); } if (physicalFieldIdx > 1 && physical_schema->field(physicalFieldIdx - 1)->name().ends_with("_size")) { + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Field %{public}s has sizes in %{public}s.", dataset_field->name().c_str(), + physical_schema->field(physicalFieldIdx - 1)->name().c_str()); mappings.push_back({physicalFieldIdx, physicalFieldIdx - 1, fi}); + opsCount += 2; } else { mappings.push_back({physicalFieldIdx, -1, fi}); + opsCount++; } } auto* tree = treeFragment->GetTree(); - tree->SetCacheSize(25000000); auto branches = tree->GetListOfBranches(); + size_t totalTreeSize = 0; + std::vector selectedBranches; for (auto& mapping : mappings) { - tree->AddBranchToCache((TBranch*)branches->At(mapping.mainBranchIdx), false); + selectedBranches.push_back((TBranch*)branches->At(mapping.mainBranchIdx)); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Adding branch %{public}s to stream.", selectedBranches.back()->GetName()); + totalTreeSize += selectedBranches.back()->GetTotalSize(); if (mapping.vlaIdx != -1) { - tree->AddBranchToCache((TBranch*)branches->At(mapping.vlaIdx), false); + selectedBranches.push_back((TBranch*)branches->At(mapping.vlaIdx)); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Adding branch %{public}s to stream.", selectedBranches.back()->GetName()); + totalTreeSize += selectedBranches.back()->GetTotalSize(); } } - tree->StopCacheLearningPhase(); - static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; + size_t cacheSize = std::max(std::min(totalTreeSize, 25000000UL), 1000000UL); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Resizing cache to %zu.", cacheSize); + tree->SetCacheSize(cacheSize); + for (auto* branch : selectedBranches) { + tree->AddBranchToCache(branch, false); + } + tree->StopCacheLearningPhase(); - int64_t rows = -1; + // Intermediate buffer to bulk read. Two for now + std::vector& ops = treeFragment->ops(); + ops.clear(); + ops.reserve(opsCount); for (size_t mi = 0; mi < mappings.size(); ++mi) { BranchFieldMapping mapping = mappings[mi]; // The field actually on disk auto datasetField = dataset_schema->field(mapping.datasetFieldIdx); auto physicalField = physical_schema->field(mapping.mainBranchIdx); - auto* branch = (TBranch*)branches->At(mapping.mainBranchIdx); - assert(branch); - buffer.Reset(); - auto totalEntries = branch->GetEntries(); - if (rows == -1) { - rows = totalEntries; + + if (mapping.vlaIdx != -1) { + auto* branch = (TBranch*)branches->At(mapping.vlaIdx); + ops.emplace_back(ReadOps{ + .branch = branch, + .rootBranchEntries = branch->GetEntries(), + .typeSize = 4, + .listSize = 1, + .kind = ReadOpKind::Offsets, + }); + auto& op = ops.back(); + ARROW_ASSIGN_OR_RAISE(op.targetBuffer, arrow::AllocateBuffer((op.rootBranchEntries + 1) * op.typeSize, pool)); + // Offsets need to be read immediately to know how many values are there + readOffsets(op, rootBuffer()); } - if (rows != totalEntries) { - throw runtime_error_f("Unmatching number of rows for branch %s", branch->GetName()); + ops.push_back({}); + auto& valueOp = ops.back(); + valueOp.branch = (TBranch*)branches->At(mapping.mainBranchIdx); + valueOp.rootBranchEntries = valueOp.branch->GetEntries(); + // In case this is a vla, we set the offsetCount as totalEntries + // In case we read booleans we need a special coversion from bytes to bits. + auto listType = std::dynamic_pointer_cast(datasetField->type()); + valueOp.typeSize = physicalField->type()->byte_width(); + // Notice how we are not (yet) allocating buffers at this point. We merely + // create placeholders to subsequently fill. + if ((datasetField->type() == arrow::boolean())) { + valueOp.kind = ReadOpKind::Booleans; + valueOp.listSize = 1; + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp((valueOp.rootBranchEntries) / 8 + 1); + } else if (listType && datasetField->type()->field(0)->type() == arrow::boolean()) { + valueOp.typeSize = physicalField->type()->field(0)->type()->byte_width(); + valueOp.listSize = listType->list_size(); + valueOp.kind = ReadOpKind::Booleans; + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp((valueOp.rootBranchEntries * valueOp.listSize) / 8 + 1); + } else if (mapping.vlaIdx != -1) { + valueOp.typeSize = physicalField->type()->field(0)->type()->byte_width(); + valueOp.listSize = -1; + // -1 is the current one, -2 is the one with for the offsets + valueOp.kind = ReadOpKind::VLA; + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp(ops[ops.size() - 2].offsetCount * valueOp.typeSize); + } else if (listType) { + valueOp.kind = ReadOpKind::Values; + valueOp.listSize = listType->list_size(); + valueOp.typeSize = physicalField->type()->field(0)->type()->byte_width(); + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp(valueOp.rootBranchEntries * valueOp.typeSize * valueOp.listSize); + } else { + valueOp.typeSize = physicalField->type()->byte_width(); + valueOp.kind = ReadOpKind::Values; + valueOp.listSize = 1; + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp(valueOp.rootBranchEntries * valueOp.typeSize); } arrow::Status status; - int readEntries = 0; std::shared_ptr array; - auto listType = std::dynamic_pointer_cast(datasetField->type()); - if (datasetField->type() == arrow::boolean() || - (listType && datasetField->type()->field(0)->type() == arrow::boolean())) { - if (listType) { - std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, datasetField->type()->field(0)->type(), &builder); - if (!status.ok()) { - throw runtime_error("Cannot create value builder"); - } - auto listBuilder = std::make_unique(pool, std::move(builder), listType->list_size()); - auto valueBuilder = listBuilder.get()->value_builder(); - // boolean array special case: we need to use builder to create the bitmap - status = valueBuilder->Reserve(totalEntries * listType->list_size()); - status &= listBuilder->Reserve(totalEntries); - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); - readEntries += readLast; - status &= static_cast(valueBuilder)->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast * listType->list_size()); - } - status &= static_cast(listBuilder.get())->AppendValues(readEntries); - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - status &= listBuilder->Finish(&array); - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } else if (listType == nullptr) { - std::unique_ptr builder = nullptr; - auto status = arrow::MakeBuilder(pool, datasetField->type(), &builder); - if (!status.ok()) { - throw runtime_error("Cannot create builder"); - } - auto valueBuilder = static_cast(builder.get()); - // boolean array special case: we need to use builder to create the bitmap - status = valueBuilder->Reserve(totalEntries); - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetBulkEntries(readEntries, buffer); - readEntries += readLast; - status &= valueBuilder->AppendValues(reinterpret_cast(buffer.GetCurrent()), readLast); - } - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - status &= valueBuilder->Finish(&array); - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } - } else { - // This is needed for branches which have not been persisted. - auto bytes = branch->GetTotBytes(); - auto branchSize = bytes ? bytes : 1000000; - auto&& result = arrow::AllocateResizableBuffer(branchSize, pool); - if (!result.ok()) { - throw runtime_error("Cannot allocate values buffer"); - } - std::shared_ptr arrowValuesBuffer = result.MoveValueUnsafe(); - auto ptr = arrowValuesBuffer->mutable_data(); - if (ptr == nullptr) { - throw runtime_error("Invalid buffer"); - } - - std::unique_ptr offsetBuffer = nullptr; - - uint32_t offset = 0; - int count = 0; - std::shared_ptr arrowOffsetBuffer; - std::span offsets; - int size = 0; - uint32_t totalSize = 0; - if (mapping.vlaIdx != -1) { - auto* mSizeBranch = (TBranch*)branches->At(mapping.vlaIdx); - offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); - result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), pool); - if (!result.ok()) { - throw runtime_error("Cannot allocate offset buffer"); - } - arrowOffsetBuffer = result.MoveValueUnsafe(); - unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); - auto* tPtrOffset = reinterpret_cast(ptrOffset); - offsets = std::span{tPtrOffset, tPtrOffset + totalEntries + 1}; - - // read sizes first - while (readEntries < totalEntries) { - auto readLast = mSizeBranch->GetBulkRead().GetEntriesSerialized(readEntries, *offsetBuffer); - readEntries += readLast; - for (auto i = 0; i < readLast; ++i) { - offsets[count++] = (int)offset; - offset += swap32_(reinterpret_cast(offsetBuffer->GetCurrent())[i]); - } - } - offsets[count] = (int)offset; - totalSize = offset; - readEntries = 0; - } - int typeSize = physicalField->type()->byte_width(); - int64_t listSize = 1; - if (auto fixedSizeList = std::dynamic_pointer_cast(datasetField->type())) { - listSize = fixedSizeList->list_size(); - typeSize = physicalField->type()->field(0)->type()->byte_width(); - } else if (mapping.vlaIdx != -1) { - typeSize = physicalField->type()->field(0)->type()->byte_width(); - listSize = -1; - } - - while (readEntries < totalEntries) { - auto readLast = branch->GetBulkRead().GetEntriesSerialized(readEntries, buffer); - if (mapping.vlaIdx != -1) { - size = offsets[readEntries + readLast] - offsets[readEntries]; - } else { - size = readLast * listSize; - } - readEntries += readLast; - swapCopy(ptr, buffer.GetCurrent(), size, typeSize); - ptr += (ptrdiff_t)(size * typeSize); - } - if (listSize >= 1) { - totalSize = readEntries * listSize; - } - if (listSize == 1) { - array = std::make_shared(datasetField->type(), readEntries, arrowValuesBuffer); - } else { - auto varray = std::make_shared(datasetField->type()->field(0)->type(), totalSize, arrowValuesBuffer); - if (mapping.vlaIdx != -1) { - array = std::make_shared(datasetField->type(), readEntries, arrowOffsetBuffer, varray); - } else { - array = std::make_shared(datasetField->type(), readEntries, varray); - } - } + if (listType) { + auto varray = std::make_shared(datasetField->type()->field(0)->type(), valueOp.rootBranchEntries * valueOp.listSize, valueOp.targetBuffer); + array = std::make_shared(datasetField->type(), valueOp.rootBranchEntries, varray); + // This is a vla, there is also an offset op + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Op", "Created op for branch %{public}s with %lli entries, size of the buffer %lli.", + valueOp.branch->GetName(), + valueOp.rootBranchEntries, + valueOp.targetBuffer->size()); + } else if (mapping.vlaIdx != -1) { + auto& offsetOp = ops[ops.size() - 2]; + auto varray = std::make_shared(datasetField->type()->field(0)->type(), offsetOp.offsetCount, valueOp.targetBuffer); + // We have pushed an offset op if this was the case. + array = std::make_shared(datasetField->type(), offsetOp.rootBranchEntries, offsetOp.targetBuffer, varray); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Op", "Created op for branch %{public}s with %lli entries, size of the buffer %lli.", + offsetOp.branch->GetName(), offsetOp.rootBranchEntries, offsetOp.targetBuffer->size()); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Op", "Created op for branch %{public}s with %lli entries, size of the buffer %lli.", + valueOp.branch->GetName(), + offsetOp.offsetCount, + valueOp.targetBuffer->size()); + } else { + array = std::make_shared(datasetField->type(), valueOp.rootBranchEntries, valueOp.targetBuffer); + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Op", "Created op for branch %{public}s with %lli entries, size of the buffer %lli.", + valueOp.branch->GetName(), + valueOp.rootBranchEntries, + valueOp.targetBuffer->size()); } - branch->SetStatus(false); - branch->DropBaskets("all"); - branch->Reset(); - branch->GetTransientBuffer(0)->Expand(0); - columns.push_back(array); } + + // Do the actual filling of the buffers. This happens after we have created the whole structure + // so that we can read directly in shared memory. + int64_t rows = -1; + for (size_t i = 0; i < ops.size(); ++i) { + auto& op = ops[i]; + if (rows == -1 && op.kind != ReadOpKind::VLA) { + rows = op.rootBranchEntries; + } + if (rows == -1 && op.kind == ReadOpKind::VLA) { + auto& offsetOp = ops[i - 1]; + rows = offsetOp.rootBranchEntries; + } + if (op.kind != ReadOpKind::VLA && rows != op.rootBranchEntries) { + throw runtime_error_f("Unmatching number of rows for branch %s. Expected %lli, found %lli", op.branch->GetName(), rows, op.rootBranchEntries); + } + if (op.kind == ReadOpKind::VLA && rows != ops[i - 1].rootBranchEntries) { + throw runtime_error_f("Unmatching number of rows for branch %s. Expected %lli, found %lli", op.branch->GetName(), rows, ops[i - 1].offsetCount); + } + } + auto batch = arrow::RecordBatch::Make(dataset_schema, rows, columns); totalCompressedSize += tree->GetZipBytes(); totalUncompressedSize += tree->GetTotBytes(); + O2_SIGNPOST_END(root_arrow_fs, tid, "Generator", "Done creating batch compressed:%zu uncompressed:%zu", totalCompressedSize, totalUncompressedSize); return batch; }; return generator; @@ -817,11 +1103,31 @@ class TTreeFileWriter : public arrow::dataset::FileWriter switch (field->type()->id()) { case arrow::Type::FIXED_SIZE_LIST: { auto list = std::static_pointer_cast(column); - valueArrays.back() = list->values(); + if (list->list_type()->field(0)->type()->id() == arrow::Type::BOOL) { + int64_t length = list->length() * list->list_type()->list_size(); + arrow::UInt8Builder builder; + auto ok = builder.Reserve(length); + // I need to build an array of uint8_t for the conversion to ROOT which uses + // bytes for boolans. + auto boolArray = std::static_pointer_cast(list->values()); + for (int64_t i = 0; i < length; ++i) { + if (boolArray->IsValid(i)) { + // Expand each boolean value (true/false) to uint8 (1/0) + uint8_t value = boolArray->Value(i) ? 1 : 0; + auto ok = builder.Append(value); + } else { + // Append null for invalid entries + auto ok = builder.AppendNull(); + } + } + valueArrays.back() = *builder.Finish(); + } else { + valueArrays.back() = list->values(); + } } break; case arrow::Type::LIST: { auto list = std::static_pointer_cast(column); - valueArrays.back() = list; + valueArrays.back() = list->values(); } break; case arrow::Type::BOOL: { // In case of arrays of booleans, we need to go back to their @@ -867,11 +1173,12 @@ class TTreeFileWriter : public arrow::dataset::FileWriter uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + list->value_offset(pos) * valueType->byte_width(); branch->SetAddress((void*)buffer); sizeBranch->SetAddress(&listSize); - }; - break; + } break; case arrow::Type::FIXED_SIZE_LIST: default: { - uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + pos * listSize * valueType->byte_width(); + // needed for the boolean case, I should probably cache this. + auto byteWidth = valueType->byte_width() ? valueType->byte_width() : 1; + uint8_t const* buffer = std::static_pointer_cast(valueArray)->values()->data() + array->offset() + pos * listSize * byteWidth; branch->SetAddress((void*)buffer); }; } diff --git a/Framework/Core/include/Framework/RootArrowFilesystem.h b/Framework/Core/include/Framework/RootArrowFilesystem.h index 441b43aeca331..5aceaed077001 100644 --- a/Framework/Core/include/Framework/RootArrowFilesystem.h +++ b/Framework/Core/include/Framework/RootArrowFilesystem.h @@ -12,6 +12,7 @@ #define O2_FRAMEWORK_ROOT_ARROW_FILESYSTEM_H_ #include +#include #include #include #include @@ -96,6 +97,9 @@ class VirtualRootFileSystemBase : public arrow::fs::FileSystem struct RootArrowFactory final { std::function()> options = nullptr; std::function()> format = nullptr; + // Builds an output streamer which is able to read from the source fragment + // in a deferred way. + std::function(std::shared_ptr, const std::shared_ptr& buffer)> deferredOutputStreamer = nullptr; }; struct RootArrowFactoryPlugin { @@ -144,6 +148,8 @@ class TFileFileSystem : public VirtualRootFileSystemBase TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObjectReadingFactory&); + ~TFileFileSystem() override; + std::string type_name() const override { return "TDirectoryFile"; diff --git a/Framework/Core/src/RootArrowFilesystem.cxx b/Framework/Core/src/RootArrowFilesystem.cxx index c563866e802bb..403e393ec6090 100644 --- a/Framework/Core/src/RootArrowFilesystem.cxx +++ b/Framework/Core/src/RootArrowFilesystem.cxx @@ -42,6 +42,12 @@ TFileFileSystem::TFileFileSystem(TDirectoryFile* f, size_t readahead, RootObject ((TFile*)mFile)->SetReadaheadSize(50 * 1024 * 1024); } +TFileFileSystem::~TFileFileSystem() +{ + mFile->Close(); + delete mFile; +} + std::shared_ptr TFileFileSystem::GetObjectHandler(arrow::dataset::FileSource source) { // We use a plugin to create the actual objects inside the diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index 438f388ec86b5..663be91a1e6f3 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -388,6 +389,7 @@ bool validatePhysicalSchema(std::shared_ptr schema) { REQUIRE(schema->num_fields() == 12); REQUIRE(schema->field(0)->type()->id() == arrow::float32()->id()); + REQUIRE(schema->field(0)->name() == "px"); REQUIRE(schema->field(1)->type()->id() == arrow::float32()->id()); REQUIRE(schema->field(2)->type()->id() == arrow::float32()->id()); REQUIRE(schema->field(3)->type()->id() == arrow::float64()->id()); @@ -541,12 +543,28 @@ TEST_CASE("RootTree2Dataset") options->dataset_schema = schema; auto scanner = format->ScanBatchesAsync(options, *fragment); REQUIRE(scanner.ok()); + + // This is batch has deferred contents. Therefore we need to use a DeferredOutputStream to + // write it to a real one and read it back with the BufferReader, which is hopefully zero copy + std::shared_ptr batch; + auto batches = (*scanner)(); auto result = batches.result(); REQUIRE(result.ok()); REQUIRE((*result)->columns().size() == 11); REQUIRE((*result)->num_rows() == 100); - validateContents(*result); + std::shared_ptr buffer = *arrow::AllocateResizableBuffer(1000, 64); + auto deferredWriterStream = factory.capabilities[1].factory().deferredOutputStreamer(*fragment, buffer); + auto outBatch = arrow::ipc::MakeStreamWriter(deferredWriterStream.get(), schema); + auto status = outBatch.ValueOrDie()->WriteRecordBatch(**result); + std::shared_ptr bufferReader = std::make_shared(buffer); + auto readerResult = arrow::ipc::RecordBatchStreamReader::Open(bufferReader); + auto batchReader = readerResult.ValueOrDie(); + + auto next = batchReader->ReadNext(&batch); + REQUIRE(batch != nullptr); + + validateContents(batch); auto* output = new TMemFile("foo", "RECREATE"); auto outFs = std::make_shared(output, 0, factory); @@ -558,7 +576,8 @@ TEST_CASE("RootTree2Dataset") // Write to the /DF_3 tree at top level arrow::fs::FileLocator locator{outFs, "/DF_3"}; auto writer = format->MakeWriter(*destination, schema, {}, locator); - auto success = writer->get()->Write(*result); + auto success = writer->get()->Write(batch); + REQUIRE(batch->schema()->field(0)->name() == "px"); auto rootDestination = std::dynamic_pointer_cast(*destination); SECTION("Read tree") @@ -568,7 +587,11 @@ TEST_CASE("RootTree2Dataset") auto tfileFs = std::dynamic_pointer_cast(outFs); REQUIRE(tfileFs.get()); REQUIRE(tfileFs->GetFile()); - REQUIRE(tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree"))); + auto* tree = (TTree*)tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree")); + REQUIRE(tree != nullptr); + REQUIRE(((TBranch*)tree->GetListOfBranches()->At(0))->GetEntries() == 100); + REQUIRE(((TBranch*)tree->GetListOfBranches()->At(0))->GetName() == std::string("px")); + arrow::dataset::FileSource source2("/DF_3", outFs); REQUIRE(format->IsSupported(source2) == true); @@ -577,6 +600,10 @@ TEST_CASE("RootTree2Dataset") REQUIRE(tfileFs->GetFile()); REQUIRE(tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree"))); + tree = (TTree*)tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree")); + REQUIRE(tree != nullptr); + REQUIRE(((TBranch*)tree->GetListOfBranches()->At(0))->GetEntries() == 100); + auto schemaOptWritten = format->Inspect(source2); tfileFs = std::dynamic_pointer_cast(source2.filesystem()); REQUIRE(tfileFs.get()); @@ -585,6 +612,10 @@ TEST_CASE("RootTree2Dataset") REQUIRE(schemaOptWritten.ok()); auto schemaWritten = *schemaOptWritten; + tree = (TTree*)tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree")); + REQUIRE(tree != nullptr); + REQUIRE(((TBranch*)tree->GetListOfBranches()->At(0))->GetEntries() == 100); + REQUIRE(validatePhysicalSchema(schemaWritten)); std::vector> fields; for (auto& field : schemaWritten->fields()) { @@ -599,23 +630,38 @@ TEST_CASE("RootTree2Dataset") auto fragmentWritten = format->MakeFragment(source2, {}, *physicalSchema); REQUIRE(fragmentWritten.ok()); auto optionsWritten = std::make_shared(); - options->dataset_schema = schema; - auto scannerWritten = format->ScanBatchesAsync(optionsWritten, *fragment); + optionsWritten->dataset_schema = schema; + auto scannerWritten = format->ScanBatchesAsync(optionsWritten, *fragmentWritten); REQUIRE(scannerWritten.ok()); - auto batchesWritten = (*scanner)(); - auto resultWritten = batches.result(); + tree = (TTree*)tfileFs->GetFile()->GetObjectChecked("/DF_3", TClass::GetClass("TTree")); + REQUIRE(tree != nullptr); + REQUIRE(((TBranch*)tree->GetListOfBranches()->At(0))->GetEntries() == 100); + auto batchesWritten = (*scannerWritten)(); + auto resultWritten = batchesWritten.result(); REQUIRE(resultWritten.ok()); REQUIRE((*resultWritten)->columns().size() == 11); REQUIRE((*resultWritten)->num_rows() == 100); - validateContents(*resultWritten); + + std::shared_ptr buffer = *arrow::AllocateResizableBuffer(1000, 64); + auto deferredWriterStream2 = factory.capabilities[1].factory().deferredOutputStreamer(*fragmentWritten, buffer); + auto outBatch = arrow::ipc::MakeStreamWriter(deferredWriterStream2.get(), schema); + auto status = outBatch.ValueOrDie()->WriteRecordBatch(**resultWritten); + std::shared_ptr bufferReader = std::make_shared(buffer); + auto readerResult = arrow::ipc::RecordBatchStreamReader::Open(bufferReader); + auto batchReader = readerResult.ValueOrDie(); + + auto next = batchReader->ReadNext(&batch); + REQUIRE(batch != nullptr); + validateContents(batch); } + arrow::fs::FileLocator rnTupleLocator{outFs, "/rntuple"}; // We write an RNTuple in the same TMemFile, using /rntuple as a location auto rntupleDestination = std::dynamic_pointer_cast(*destination); { auto rNtupleWriter = rNtupleFormat->MakeWriter(*destination, schema, {}, rnTupleLocator); - auto rNtupleSuccess = rNtupleWriter->get()->Write(*result); + auto rNtupleSuccess = rNtupleWriter->get()->Write(batch); REQUIRE(rNtupleSuccess.ok()); } From bf2896c2d98aa5ff977eaf425b167316af408ff5 Mon Sep 17 00:00:00 2001 From: ehellbar Date: Fri, 14 Feb 2025 16:59:57 +0100 Subject: [PATCH 0079/1914] Allow ussing FST_TMUX_DD_WAIT in start_tmux.sh script again (#13967) --- prodtests/full-system-test/start_tmux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prodtests/full-system-test/start_tmux.sh b/prodtests/full-system-test/start_tmux.sh index a58f49ac306d3..3048943241627 100755 --- a/prodtests/full-system-test/start_tmux.sh +++ b/prodtests/full-system-test/start_tmux.sh @@ -106,8 +106,8 @@ if [[ -z $SHM_MANAGER_SHMID ]]; then exit 1 fi fi -[[ ! -z $FST_TMUX_DD_WAIT ]] && FST_SLEEP2=$FST_TMUX_DD_WAIT [[ ${O2_GPU_RTC:-0} == 1 ]] && FST_SLEEP2=60 +[[ ! -z $FST_TMUX_DD_WAIT ]] && FST_SLEEP2=$FST_TMUX_DD_WAIT if workflow_has_parameter CALIB_PROXIES; then CALIB_COMMAND="$GEN_TOPO_MYDIR/aggregator-workflow.sh" From 28e9bf1415831e956588b88dcb00b18030c7b29b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 14 Feb 2025 21:57:49 +0100 Subject: [PATCH 0080/1914] DPL: add simple helper to reproduce the calibration workflow (#13965) --- Framework/TestWorkflows/CMakeLists.txt | 4 + .../TestWorkflows/scripts/mock-calibration.sh | 7 ++ Framework/TestWorkflows/scripts/mock-flp.sh | 3 + .../TestWorkflows/src/o2SimpleProcessor.cxx | 99 +++++++++++++++++++ .../TestWorkflows/src/o2SimpleSource.cxx | 9 +- Framework/Utils/src/raw-proxy.cxx | 2 +- 6 files changed, 121 insertions(+), 3 deletions(-) create mode 100755 Framework/TestWorkflows/scripts/mock-calibration.sh create mode 100755 Framework/TestWorkflows/scripts/mock-flp.sh create mode 100644 Framework/TestWorkflows/src/o2SimpleProcessor.cxx diff --git a/Framework/TestWorkflows/CMakeLists.txt b/Framework/TestWorkflows/CMakeLists.txt index 8548d6570e4a4..b147a4871bf26 100644 --- a/Framework/TestWorkflows/CMakeLists.txt +++ b/Framework/TestWorkflows/CMakeLists.txt @@ -119,6 +119,10 @@ o2_add_dpl_workflow(simple-sink SOURCES src/o2SimpleSink.cxx COMPONENT_NAME TestWorkflows) +o2_add_dpl_workflow(simple-processor + SOURCES src/o2SimpleProcessor.cxx + COMPONENT_NAME TestWorkflows) + o2_add_dpl_workflow(analysis-workflow SOURCES src/o2AnalysisWorkflow.cxx COMPONENT_NAME TestWorkflows) diff --git a/Framework/TestWorkflows/scripts/mock-calibration.sh b/Framework/TestWorkflows/scripts/mock-calibration.sh new file mode 100755 index 0000000000000..a56fcdf45561c --- /dev/null +++ b/Framework/TestWorkflows/scripts/mock-calibration.sh @@ -0,0 +1,7 @@ +#/bin/sh -ex +export DPL_SIGNPOSTS="calibration" +stage/bin/o2-dpl-raw-proxy --exit-transition-timeout 20 --data-processing-timeout 10 --dataspec "tst:TST/A/0" --channel-config "readout-proxy:address=tcp://0.0.0.0:4200,method=connect,type=pair" | \ + stage/bin/o2-testworkflows-simple-processor --exit-transition-timeout 20 --data-processing-timeout 10 --name reconstruction --processing-delay 5000 --eos-dataspec tst3:TST/C/0 --in-dataspec "tst2:TST/A/0" --out-dataspec "tst:TST/B/0" | \ + stage/bin/o2-testworkflows-simple-processor --exit-transition-timeout 20 --data-processing-timeout 10 --name calibration --processing-delay 1000 --in-dataspec "tst2:TST/C/0?lifetime=sporadic" --out-dataspec "tst:TCL/C/0?lifetime=sporadic" | \ + stage/bin/o2-testworkflows-simple-sink --exit-transition-timeout 20 --data-processing-timeout 10 --name calibration-publisher --dataspec "tst2:TCL/C/0?lifetime=sporadic" | \ + stage/bin/o2-testworkflows-simple-sink --exit-transition-timeout 20 --data-processing-timeout 10 --dataspec "tst:TST/B/0" diff --git a/Framework/TestWorkflows/scripts/mock-flp.sh b/Framework/TestWorkflows/scripts/mock-flp.sh new file mode 100755 index 0000000000000..c1ad7c2f0dbaf --- /dev/null +++ b/Framework/TestWorkflows/scripts/mock-flp.sh @@ -0,0 +1,3 @@ +#/bin/sh -ex +stage/bin/o2-testworkflows-simple-source --dataspec tst:TST/A/0 --delay 1000 | \ + stage/bin/o2-dpl-output-proxy --dataspec "tst:TST/A/0" --channel-config "downstream:address=tcp://0.0.0.0:4200,method=bind,type=pair" diff --git a/Framework/TestWorkflows/src/o2SimpleProcessor.cxx b/Framework/TestWorkflows/src/o2SimpleProcessor.cxx new file mode 100644 index 0000000000000..078500a886ada --- /dev/null +++ b/Framework/TestWorkflows/src/o2SimpleProcessor.cxx @@ -0,0 +1,99 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/ConfigParamSpec.h" +#include "Framework/RawDeviceService.h" + +#include +#include +#include +#include + +using namespace o2::framework; + +void customize(std::vector& workflowOptions) +{ + workflowOptions.emplace_back( + ConfigParamSpec{"in-dataspec", VariantType::String, "", {"DataSpec for the outputs"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"out-dataspec", VariantType::String, "", {"DataSpec for the outputs"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"eos-dataspec", VariantType::String, "", {"DataSpec for the outputs during EoS"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"processing-delay", VariantType::Int, 0, {"How long the processing takes"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"eos-delay", VariantType::Int, 0, {"How long the takes to do eos"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"name", VariantType::String, "test-processor", {"Name of the processor"}}); +} +#include "Framework/runDataProcessing.h" + +// This is how you can define your processing in a declarative way +WorkflowSpec defineDataProcessing(ConfigContext const& ctx) +{ + // Get the dataspec option and creates OutputSpecs from it + auto inDataspec = ctx.options().get("in-dataspec"); + auto outDataspec = ctx.options().get("out-dataspec"); + // For data created at the End-Of-Stream + auto eosDataspec = ctx.options().get("eos-dataspec"); + + auto processingDelay = ctx.options().get("processing-delay"); + auto eosDelay = ctx.options().get("eos-delay"); + + std::vector inputs = select(inDataspec.c_str()); + + for (auto& input : inputs) { + LOGP(info, "{} : lifetime {}", DataSpecUtils::describe(input), (int)input.lifetime); + } + + std::vector matchers = select(outDataspec.c_str()); + std::vector outputRefs; + std::vector outputs; + + for (auto const& matcher : matchers) { + outputRefs.emplace_back(matcher.binding); + outputs.emplace_back(DataSpecUtils::asOutputSpec(matcher)); + } + + std::vector eosMatchers = select(eosDataspec.c_str()); + std::vector eosRefs; + std::vector eosOutputs; + + for (auto const& matcher : eosMatchers) { + eosRefs.emplace_back(matcher.binding); + auto eosOut = DataSpecUtils::asOutputSpec(matcher); + eosOut.lifetime = Lifetime::Sporadic; + outputs.emplace_back(eosOut); + } + + AlgorithmSpec algo = adaptStateful([outputRefs, eosRefs, processingDelay, eosDelay](CallbackService& service) { + service.set([eosRefs, eosDelay](EndOfStreamContext&) { + LOG(info) << "Creating objects on end of stream reception."; + std::this_thread::sleep_for(std::chrono::seconds(eosDelay)); + }); + + return adaptStateless( + [outputRefs, processingDelay](InputRecord& inputs, DataAllocator& outputs) { + LOG(info) << "Received " << inputs.size() << " messages. Converting."; + auto i = 0; + std::this_thread::sleep_for(std::chrono::milliseconds(processingDelay)); + for (auto& ref : outputRefs) { + LOGP(info, "Creating {}.", ref); + outputs.make(ref, ++i); + } + }); + }); + + return WorkflowSpec{ + {.name = ctx.options().get("name"), + .inputs = inputs, + .outputs = outputs, + .algorithm = algo}}; +} diff --git a/Framework/TestWorkflows/src/o2SimpleSource.cxx b/Framework/TestWorkflows/src/o2SimpleSource.cxx index d095b16065ebe..5f9193465834b 100644 --- a/Framework/TestWorkflows/src/o2SimpleSource.cxx +++ b/Framework/TestWorkflows/src/o2SimpleSource.cxx @@ -29,6 +29,8 @@ void customize(std::vector& workflowOptions) ConfigParamSpec{"name", VariantType::String, "test-source", {"Name of the source"}}); workflowOptions.emplace_back( ConfigParamSpec{"timer", VariantType::String, "", {"What to use as timer intervals. Format is :[, ...]"}}); + workflowOptions.emplace_back( + ConfigParamSpec{"delay", VariantType::Int, 0, {"How long it takes to do the processing (in ms)"}}); } #include "Framework/runDataProcessing.h" @@ -39,6 +41,8 @@ WorkflowSpec defineDataProcessing(ConfigContext const& ctx) // Get the dataspec option and creates OutputSpecs from it auto dataspec = ctx.options().get("dataspec"); auto timer = ctx.options().get("timer"); + auto delay = ctx.options().get("delay"); + std::vector inputs; std::vector timers; if (timer.empty() == false) { @@ -74,13 +78,14 @@ WorkflowSpec defineDataProcessing(ConfigContext const& ctx) .inputs = inputs, .outputs = outputSpecs, .algorithm = AlgorithmSpec{adaptStateful( - [outputSpecs](ConfigParamRegistry const& options) { + [outputSpecs, delay](ConfigParamRegistry const& options) { // the size of the messages is also a workflow option auto dataSize = options.get("data-size"); return adaptStateless( - [outputSpecs, dataSize](DataAllocator& outputs, ProcessingContext& ctx) { + [outputSpecs, dataSize, delay](DataAllocator& outputs, ProcessingContext& ctx) { for (auto const& output : outputSpecs) { auto concrete = DataSpecUtils::asConcreteDataMatcher(output); + std::this_thread::sleep_for(std::chrono::milliseconds(delay)); outputs.make(Output{concrete.origin, concrete.description, concrete.subSpec}, dataSize); } }); diff --git a/Framework/Utils/src/raw-proxy.cxx b/Framework/Utils/src/raw-proxy.cxx index fe33b4b4c8ab8..76fb10aec963d 100644 --- a/Framework/Utils/src/raw-proxy.cxx +++ b/Framework/Utils/src/raw-proxy.cxx @@ -29,7 +29,7 @@ void customize(std::vector& workflowOptions) workflowOptions.push_back( ConfigParamSpec{ - "dataspec", VariantType::String, "A:FLP/RAWDATA;B:FLP/DISTSUBTIMEFRAME/0", {"selection string for the data to be proxied"}}); + "dataspec", VariantType::String, "tst:TST/A", {"selection string for the data to be proxied"}}); workflowOptions.push_back( ConfigParamSpec{ From 66e56fe0980a7b2513d4b813eccbc6dd400ae619 Mon Sep 17 00:00:00 2001 From: czhang Date: Sat, 15 Feb 2025 23:07:25 +0100 Subject: [PATCH 0081/1914] MCH: add re-alignment option in workflow (#13969) * MCH: add re-alignment option in workflow * Move new geometry reading to init --- .../MUON/MCH/Align/src/AlignmentSpec.cxx | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx b/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx index 8a1df9c98bb9b..9d92f18024d88 100644 --- a/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx +++ b/Detectors/MUON/MCH/Align/src/AlignmentSpec.cxx @@ -159,9 +159,6 @@ class AlignmentTask } doReAlign = ic.options().get("do-realign"); - if (doReAlign) { - LOG(info) << "Re-alignment mode"; - } if (mCCDBRequest) { LOG(info) << "Loading magnetic field and reference geometry from CCDB"; @@ -181,9 +178,9 @@ class AlignmentTask LOG(fatal) << "No GRP file"; } - auto geoIdealFile = ic.options().get("geo-file-ideal"); - if (std::filesystem::exists(geoIdealFile)) { - base::GeometryManager::loadGeometry(geoIdealFile.c_str()); + IdealGeoFileName = ic.options().get("geo-file-ideal"); + if (std::filesystem::exists(IdealGeoFileName)) { + base::GeometryManager::loadGeometry(IdealGeoFileName.c_str()); transformation = geo::transformationFromTGeoManager(*gGeoManager); for (int i = 0; i < 156; i++) { int iDEN = GetDetElemId(i); @@ -193,9 +190,9 @@ class AlignmentTask LOG(fatal) << "No ideal geometry"; } - auto geoRefFile = ic.options().get("geo-file-ref"); - if (std::filesystem::exists(geoRefFile)) { - base::GeometryManager::loadGeometry(geoRefFile.c_str()); + RefGeoFileName = ic.options().get("geo-file-ref"); + if (std::filesystem::exists(RefGeoFileName)) { + base::GeometryManager::loadGeometry(RefGeoFileName.c_str()); transformation = geo::transformationFromTGeoManager(*gGeoManager); for (int i = 0; i < 156; i++) { int iDEN = GetDetElemId(i); @@ -204,6 +201,22 @@ class AlignmentTask } else { LOG(fatal) << "No reference geometry"; } + + if (doReAlign) { + LOG(info) << "Re-alignment mode"; + LOG(info) << "Loading re-alignment geometry"; + NewGeoFileName = ic.options().get("geo-file-new"); + if (std::filesystem::exists(NewGeoFileName)) { + base::GeometryManager::loadGeometry(NewGeoFileName.c_str()); + transformation = geo::transformationFromTGeoManager(*gGeoManager); + for (int i = 0; i < 156; i++) { + int iDEN = GetDetElemId(i); + transformNew[iDEN] = transformation(iDEN); + } + } else { + LOG(fatal) << "No re-alignment geometry"; + } + } } auto doEvaluation = ic.options().get("do-evaluation"); @@ -387,21 +400,6 @@ class AlignmentTask } } - // Load new geometry if we need to do re-align - if (doReAlign) { - if (NewGeoFileName != "") { - LOG(info) << "Loading re-alignment geometry"; - base::GeometryManager::loadGeometry(NewGeoFileName.c_str()); - transformation = geo::transformationFromTGeoManager(*gGeoManager); - for (int i = 0; i < 156; i++) { - int iDEN = GetDetElemId(i); - transformNew[iDEN] = transformation(iDEN); - } - } else { - LOG(fatal) << "No re-alignment geometry"; - } - } - if (!readFromRec) { // Loading input data LOG(info) << "Loading MCH tracks"; @@ -875,6 +873,7 @@ class AlignmentTask const string mchFileName{"mchtracks.root"}; const string muonFileName{"muontracks.root"}; string outFileName{"Alignment"}; + string IdealGeoFileName{""}; string RefGeoFileName{""}; string NewGeoFileName{""}; bool doAlign{false}; @@ -918,6 +917,7 @@ o2::framework::DataProcessorSpec getAlignmentSpec(bool disableCCDB) outputSpecs, AlgorithmSpec{o2::framework::adaptFromTask(ccdbRequest)}, Options{{"geo-file-ref", VariantType::String, o2::base::NameConf::getAlignedGeomFileName(), {"Name of the reference geometry file"}}, + {"geo-file-new", VariantType::String, "", {"Name of the new geometry file"}}, {"geo-file-ideal", VariantType::String, o2::base::NameConf::getGeomFileName(), {"Name of the ideal geometry file"}}, {"grp-file", VariantType::String, o2::base::NameConf::getGRPFileName(), {"Name of the grp file"}}, {"do-align", VariantType::Bool, false, {"Switch for alignment, otherwise only residuals will be stored"}}, From 337a7fc83944e0601b801cf6f55787ba53cef201 Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 16 Feb 2025 17:30:01 +0100 Subject: [PATCH 0082/1914] PVfinder can accept AB tracks --- Detectors/GlobalTrackingWorkflow/src/PrimaryVertexingSpec.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/src/PrimaryVertexingSpec.cxx b/Detectors/GlobalTrackingWorkflow/src/PrimaryVertexingSpec.cxx index f810e1921e46c..d71a4fad7ab78 100644 --- a/Detectors/GlobalTrackingWorkflow/src/PrimaryVertexingSpec.cxx +++ b/Detectors/GlobalTrackingWorkflow/src/PrimaryVertexingSpec.cxx @@ -15,6 +15,7 @@ #include #include "DataFormatsGlobalTracking/RecoContainer.h" #include "DataFormatsGlobalTracking/RecoContainerCreateTracksVariadic.h" +#include "DataFormatsITSMFT/TrkClusRef.h" #include "DataFormatsCalibration/MeanVertexObject.h" #include "ReconstructionDataFormats/TrackTPCITS.h" #include "ReconstructionDataFormats/GlobalTrackID.h" @@ -113,7 +114,8 @@ void PrimaryVertexingSpec::run(ProcessingContext& pc) return true; // just in case this selection was not done on RecoContainer filling level } auto itsID = recoData.getITSContributorGID(_origID); - if (!itsID.isSourceSet() || o2::math_utils::numberOfBitsSet(recoData.getITSTrack(itsID).getPattern() & 7) < minIBHits) { + if ((itsID.getSource() == GTrackID::ITS && o2::math_utils::numberOfBitsSet(recoData.getITSTrack(itsID).getPattern() & 7) < minIBHits) || + (itsID.getSource() == GTrackID::ITSAB && o2::math_utils::numberOfBitsSet(recoData.getITSABRef(itsID).pattern & 7) < minIBHits)) { // do not accept ITSAB tracklets return true; } if constexpr (isITSTrack()) { From 8069cf620f3806fc8108e5fc802dee57eb1f72a0 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 17 Feb 2025 11:08:42 +0100 Subject: [PATCH 0083/1914] DPL Analysis: fix corner case in index builder algorithm (#13961) --- Framework/Core/src/IndexBuilderHelpers.cxx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Framework/Core/src/IndexBuilderHelpers.cxx b/Framework/Core/src/IndexBuilderHelpers.cxx index 05aedca9d92d4..52d6080690fe1 100644 --- a/Framework/Core/src/IndexBuilderHelpers.cxx +++ b/Framework/Core/src/IndexBuilderHelpers.cxx @@ -159,6 +159,10 @@ bool IndexColumnBuilder::findSingle(int idx) } } + if (mPosition < mSourceSize && valueAt(mPosition) < idx) { + ++mPosition; + } + return (mPosition < mSourceSize && valueAt(mPosition) == idx); } @@ -176,6 +180,10 @@ bool IndexColumnBuilder::findSlice(int idx) } } + if (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) <= idx) { + ++mPosition; + } + return (mValuePos < mValuesArrow->length() && mValuesArrow->Value(mValuePos) == idx); } From 2b55eb10a379af76813946ec40846b8c951a4493 Mon Sep 17 00:00:00 2001 From: swenzel Date: Mon, 17 Feb 2025 17:58:57 +0100 Subject: [PATCH 0084/1914] Fix mother and daughter indices in generator cocktails Particles in a generated event carry indices to refer to mother and dauther particles. These indices need to be adjusted when we combine multiple events into a cocktail. --- Generators/src/GeneratorHybrid.cxx | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index f968a9c4b3513..729d69527c384 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -390,6 +390,26 @@ bool GeneratorHybrid::importParticles() for (auto subIndex : subGenIndex) { LOG(info) << "Importing particles for task " << subIndex; auto subParticles = gens[subIndex]->getParticles(); + + // The particles carry mother and daughter indices, which are relative + // to the sub-generator. We need to adjust these indices to reflect that particles + // are now embedded into a cocktail. + auto offset = mParticles.size(); + for (auto& p : subParticles) { + for (int i = 0; i < 2; ++i) { + if (p.GetMother(i) != -1) { + const auto newindex = p.GetMother(i) + offset; + p.SetMother(i, newindex); + } + } + if (p.GetNDaughters() > 0) { + for (int i = 0; i < 2; ++i) { + const auto newindex = p.GetDaughter(i) + offset; + p.SetDaughter(i, newindex); + } + } + } + mParticles.insert(mParticles.end(), subParticles.begin(), subParticles.end()); // fetch the event Header information from the underlying generator gens[subIndex]->updateHeader(&mMCEventHeader); From 539933960ddc70b03b00e254e4250c1c21e70cc6 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 14 Feb 2025 19:33:04 +0100 Subject: [PATCH 0085/1914] Modify CA async opts, possible set from config.param Print selected setthigs only once from the 1st pipeline. Rescale pT cutoffs by actual/nominal B-field. For this reason CA ITSTrackingInterface::initialise is moved inside ITSTrackingInterface::updateTimeDependentParams 1st call (during 1st TF processing). --- .../include/ITStracking/Configuration.h | 1 + .../include/ITStracking/TrackingConfigParam.h | 5 ++ .../include/ITStracking/TrackingInterface.h | 3 + .../ITSMFT/ITS/tracking/src/Configuration.cxx | 24 +++++++- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 1 - .../ITS/tracking/src/TrackingInterface.cxx | 57 +++++++++++++++++-- .../ITSMFT/ITS/tracking/src/Vertexer.cxx | 1 - .../ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 2 +- 8 files changed, 85 insertions(+), 9 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h index e99f0c1f4d13f..b63a01cb8cd29 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Configuration.h @@ -63,6 +63,7 @@ struct TrackingParameters { int CellMinimumLevel(); int CellsPerRoad() const { return NLayers - 2; } int TrackletsPerRoad() const { return NLayers - 1; } + std::string asString() const; int NLayers = 7; int DeltaROF = 0; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 20e9a4362b77c..0cf44d08cac19 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -55,9 +55,14 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { // Use TGeo for mat. budget + static const int MaxIter = 4; + static const int MinTrackLenght = 4; + static const int MaxTrackLenght = 7; bool useMatCorrTGeo = false; // use full geometry to corect for material budget accounting in the fits. Default is to use the material budget LUT. bool useFastMaterial = false; // use faster material approximation for material budget accounting in the fits. int deltaRof = 0; // configure the width of the window in ROFs to be considered for the tracking. + int minTrackLgtIter[MaxIter] = {}; // minimum track length at each iteration, used only if >0, otherwise use code defaults + float minPtIterLgt[MaxIter * (MaxTrackLenght - MinTrackLenght + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer float maxChi2ClusterAttachment = -1.f; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index 717ee892816ee..b584bf6b8008b 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -70,6 +70,9 @@ class ITSTrackingInterface mMode = mode; } + auto getTracker() const { return mTracker.get(); } + auto getVertexer() const { return mVertexer.get(); } + TimeFrame* mTimeFrame = nullptr; protected: diff --git a/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx b/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx index 13cf6b472db9a..7d348505c61d3 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Configuration.cxx @@ -28,9 +28,31 @@ std::string asString(TrackingMode mode) return "unknown"; } +std::string TrackingParameters::asString() const +{ + std::string str = fmt::format("NZb:{} NPhB:{} NROFIt:{} PerVtx:{} DropFail:{} ClSh:{} TtklMinPt:{:.2f} MinCl:{}", + ZBins, PhiBins, nROFsPerIterations, PerPrimaryVertexProcessing, DropTFUponFailure, ClusterSharing, TrackletMinPt, MinTrackLength); + bool first = true; + for (int il = NLayers; il >= MinTrackLength; il--) { + int slot = NLayers - il; + if (slot < (int)MinPt.size() && MinPt[slot] > 0) { + if (first) { + first = false; + str += " MinPt: "; + } + str += fmt::format("L{}:{:.2f} ", il, MinPt[slot]); + } + } + str += " SystErrY/Z:"; + for (size_t i = 0; i < SystErrorY2.size(); i++) { + str += fmt::format("{:.2e}/{:.2e} ", SystErrorY2[i], SystErrorZ2[i]); + } + return str; +} + std::ostream& operator<<(std::ostream& os, TrackingMode v) { os << asString(v); return os; } -} // namespace o2::its \ No newline at end of file +} // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index bc642015b1fcd..50dc1f5dfd039 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -481,7 +481,6 @@ void Tracker::rectifyClusterIndices() void Tracker::getGlobalConfiguration() { auto& tc = o2::its::TrackerParamConfig::Instance(); - tc.printKeyValues(true, true); if (tc.useMatCorrTGeo) { mTraits->setCorrType(o2::base::PropagatorImpl::MatCorrType::USEMatCorrTGeo); } else if (tc.useFastMaterial) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 7305e205ecb3a..f625b77a013b0 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -23,6 +23,7 @@ #include "CommonDataFormat/IRFrame.h" #include "DetectorsBase/GRPGeomHelper.h" #include "ITStracking/TrackingConfigParam.h" +#include "Framework/DeviceSpec.h" namespace o2 { @@ -35,31 +36,55 @@ void ITSTrackingInterface::initialise() mCosmicsProcessing = false; std::vector vertParams; std::vector trackParams; + const auto& trackConf = o2::its::TrackerParamConfig::Instance(); + float bFactor = std::abs(o2::base::Propagator::Instance()->getNominalBz()) / 5.0066791; if (mMode == TrackingMode::Unset) { - mMode = (TrackingMode)(o2::its::TrackerParamConfig::Instance().trackingMode); + mMode = (TrackingMode)(trackConf.trackingMode); LOGP(info, "Tracking mode not set, trying to fetch it from configurable params to: {}", asString(mMode)); } if (mMode == TrackingMode::Async) { - trackParams.resize(o2::its::TrackerParamConfig::Instance().doUPCIteration ? 4 : 3); + trackParams.resize(trackConf.doUPCIteration ? 4 : 3); vertParams.resize(2); // The number of actual iterations will be set as a configKeyVal to allow for pp/PbPb choice trackParams[1].TrackletMinPt = 0.2f; trackParams[1].CellDeltaTanLambdaSigma *= 2.; trackParams[2].TrackletMinPt = 0.1f; trackParams[2].CellDeltaTanLambdaSigma *= 4.; + + trackParams[0].MinPt[0] = 1.f / 12; // 7cl + + trackParams[1].MinPt[0] = 1.f / 12; // 7cl + trackParams[2].MinTrackLength = 4; - trackParams[2].MinPt[3] = 0.2f; + trackParams[2].MinPt[0] = 1.f / 12; // 7cl + trackParams[2].MinPt[1] = 1.f / 5; // 6cl + trackParams[2].MinPt[2] = 1.f / 1; // 5cl + trackParams[2].MinPt[3] = 1.f / 6; // 4cl + trackParams[2].StartLayerMask = (1 << 6) + (1 << 3); if (o2::its::TrackerParamConfig::Instance().doUPCIteration) { + trackParams[3].MinTrackLength = 4; trackParams[3].TrackletMinPt = 0.1f; trackParams[3].CellDeltaTanLambdaSigma *= 4.; - trackParams[3].MinTrackLength = 4; trackParams[3].DeltaROF = 0; // UPC specific setting } - for (auto& param : trackParams) { + for (size_t ip = 0; ip < trackParams.size(); ip++) { + auto& param = trackParams[ip]; param.ZBins = 64; param.PhiBins = 32; param.CellsPerClusterLimit = 1.e3f; param.TrackletsPerClusterLimit = 1.e3f; + // check if something was overridden via configurable params + if (ip < trackConf.MaxIter) { + if (trackConf.minTrackLgtIter[ip] > 0) { + param.MinTrackLength = trackConf.minTrackLgtIter[ip]; + } + for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { + int lslot0 = (trackConf.MaxTrackLenght - ilg), lslot = lslot0 + ip * (trackConf.MaxTrackLenght - trackConf.MinTrackLenght + 1); + if (trackConf.minPtIterLgt[lslot] > 0.) { + param.MinPt[lslot0] = trackConf.minPtIterLgt[lslot]; + } + } + } } LOGP(info, "Initializing tracker in async. phase reconstruction with {} passes for tracking and {}/{} for vertexing", trackParams.size(), o2::its::VertexerParamConfig::Instance().nIterations, vertParams.size()); vertParams[1].phiCut = 0.015f; @@ -95,6 +120,17 @@ void ITSTrackingInterface::initialise() for (auto& params : trackParams) { params.CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrLUT; } + + // adjust pT settings to actual mag. field + for (size_t ip = 0; ip < trackParams.size(); ip++) { + auto& param = trackParams[ip]; + for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { + int lslot = trackConf.MaxTrackLenght - ilg; + param.MinPt[lslot] *= bFactor; + param.TrackletMinPt *= bFactor; + } + } + mTracker->setParameters(trackParams); mVertexer->setParameters(vertParams); } @@ -345,7 +381,18 @@ void ITSTrackingInterface::updateTimeDependentParams(framework::ProcessingContex } GeometryTGeo* geom = GeometryTGeo::Instance(); geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::T2GRot, o2::math_utils::TransformType::T2G)); + initialise(); getConfiguration(pc); + // + if (pc.services().get().inputTimesliceId == 0) { // print settings only for the 1st pipeling + o2::its::VertexerParamConfig::Instance().printKeyValues(); + o2::its::TrackerParamConfig::Instance().printKeyValues(); + const auto& trParams = mTracker->getParameters(); + for (size_t it = 0; it < trParams.size(); it++) { + const auto& par = trParams[it]; + LOGP(info, "recoIter#{} : {}", it, par.asString()); + } + } } } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx index e87e2289b49e7..13ce03e9fba4f 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Vertexer.cxx @@ -103,7 +103,6 @@ float Vertexer::clustersToVerticesHybrid(std::function logg void Vertexer::getGlobalConfiguration() { auto& vc = o2::its::VertexerParamConfig::Instance(); - vc.printKeyValues(true, true); auto& grc = o2::its::ITSGpuTrackingParamConfig::Instance(); // This is odd: we override only the parameters for the first iteration. diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index ec0b0d26f873c..dd4c40a2141d9 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -44,7 +44,7 @@ void TrackerDPL::init(InitContext& ic) mITSTrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), mChainITS->GetITSTrackerTraits(), mChainITS->GetITSTimeframe()); - mITSTrackingInterface.initialise(); + // mITSTrackingInterface.initialise() will be called from the ITSTrackingInterface::updateTimeDependentParams at 1st initialization since it needs some run conditions } void TrackerDPL::stop() From 5835f44cc6d69e003347fd1c2b7f5fccfcdd944d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Feb 2025 13:51:02 +0100 Subject: [PATCH 0086/1914] GPU: Preparation to make memset on host multi-threaded --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 33 ++++++++++++------- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 1 + 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 944fcb32e4eda..1365429245fdc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -60,6 +60,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU() Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor } +int32_t GPUReconstructionCPUBackend::getNOMPThreads() +{ + int32_t ompThreads = 0; + if (mProcessingSettings.ompKernels == 2) { + ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor; + if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) { + ompThreads++; + } + ompThreads = std::max(1, ompThreads); + } else { + ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1; + } + return ompThreads; +} + template inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { @@ -73,16 +88,7 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS } uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num; for (uint32_t k = 0; k < num; k++) { - int32_t ompThreads = 0; - if (mProcessingSettings.ompKernels == 2) { - ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor; - if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) { - ompThreads++; - } - ompThreads = std::max(1, ompThreads); - } else { - ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1; - } + int32_t ompThreads = getNOMPThreads(); if (ompThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { printf("Running %d ompThreads\n", ompThreads); @@ -105,7 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS template <> inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - memset(ptr, 0, size); + int32_t ompThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNOMPThreads())); + if (ompThreads > 1) { + memset(ptr, 0, size); + } else { + memset(ptr, 0, size); + } return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 27959382e7b67..7903be44907df 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -46,6 +46,7 @@ class GPUReconstructionCPUBackend : public GPUReconstruction uint32_t mNestedLoopOmpFactor = 1; static int32_t getOMPThreadNum(); static int32_t getOMPMaxThreads(); + int32_t getNOMPThreads(); }; class GPUReconstructionCPU : public GPUReconstructionKernels From 43d48b1a6b241344e67038bd406d09b98e040f1f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Feb 2025 13:51:33 +0100 Subject: [PATCH 0087/1914] GPU: Fix linker warning with GCC 14, no need to make stack executable for including the binary code --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- GPU/GPUTracking/cmake/helpers.cmake | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index bd6b3b6e51928..e4e336130afa0 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -160,7 +160,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o COMMAND cp -u $ ${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/ - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/cuda_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> DEPENDS GPUTrackingCUDAKernels $ COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o" VERBATIM diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 727019fa13755..1952c7a0e3567 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -217,7 +217,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o COMMAND cp -u $ ${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/ - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o $>,PREPEND,${CMAKE_CURRENT_BINARY_DIR}/hip_kernel_module_fatbin/>,${CMAKE_CURRENT_BINARY_DIR}> DEPENDS GPUTrackingHIPKernels $ COMMENT "Compiling fatbin kernels ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o" VERBATIM diff --git a/GPU/GPUTracking/cmake/helpers.cmake b/GPU/GPUTracking/cmake/helpers.cmake index 8d8cf592d8295..f725b870040eb 100644 --- a/GPU/GPUTracking/cmake/helpers.cmake +++ b/GPU/GPUTracking/cmake/helpers.cmake @@ -17,7 +17,7 @@ function(create_binary_resource RESOURCE OUTPUTFILE) FILE(RELATIVE_PATH input-file-rel ${CMAKE_CURRENT_BINARY_DIR} ${input-file-abs}) add_custom_command( OUTPUT ${OUTPUTFILE} - COMMAND ${CMAKE_LINKER} --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel} + COMMAND ${CMAKE_LINKER} -z noexecstack --relocatable --format binary --output ${OUTPUTFILE} ${input-file-rel} DEPENDS ${input-file-rel} COMMENT "Adding binary resource ${input-file-rel}" VERBATIM From 3d967a15424e81883d9d694ee0626c552c32a507 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:19:33 +0100 Subject: [PATCH 0088/1914] DPL Analysis: workaround to publish histograms also with pipelining (#13980) For some reason if the histograms arrive all at once, they get dropped. Not yet sure why that happens. It clearly cannot merely be a matter of "older possible timeframe" being wrong, nor a problem with the order of the end of stream, because otherwise I would expect also this to fail. --- Framework/Core/include/Framework/AnalysisManagers.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 30ebf1799b227..ca42a0aca42db 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -287,6 +287,7 @@ struct OutputManager { auto& deviceSpec = context.services().get(); context.outputs().snapshot(what.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *(what.getListOfHistograms())); what.clean(); + sleep(deviceSpec.inputTimesliceId); return true; } }; @@ -314,6 +315,7 @@ struct OutputManager> { { auto& deviceSpec = context.services().get(); context.outputs().snapshot(what.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *what); + sleep(deviceSpec.inputTimesliceId); return true; } }; From a1faad5c76db252b9a867f1b4c29df584e85e446 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 19 Feb 2025 23:35:14 +0100 Subject: [PATCH 0089/1914] GPU CMake: Fix some add_custom_command COMMENTs after new warning in CMake 3.31 --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index e4e336130afa0..bab5ff912c575 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -78,7 +78,7 @@ create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" - COMMENT Checking CUDA RTC File ${GPU_RTC_BIN}.src + COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 1952c7a0e3567..d34777f0bef3e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -55,7 +55,7 @@ if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIPIFIED_CHK.done COMMAND diff -u ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip ${CMAKE_CURRENT_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIPIFIED_CHK.done DEPENDS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip ${CMAKE_CURRENT_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip - COMMENT Checking HIPified file ${CMAKE_CURRENT_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip) + COMMENT "Checking HIPified file ${CMAKE_CURRENT_SOURCE_DIR}/GPUReconstructionHIPkernel.template.hip") add_custom_target(${MODULE}_HIPIFIED_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIPIFIED_CHK.done) else() get_filename_component(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE) @@ -116,7 +116,7 @@ create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" - COMMENT Checking HIP RTC File ${GPU_RTC_BIN}.src + COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) From 1ed6f518304e86f8b65cf9c63ed30e2bbe30c4cb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 20 Feb 2025 00:11:02 +0100 Subject: [PATCH 0090/1914] GPU Display: Adapt Vulkan Backend to Vulkan headers >= 1.4 --- .../backend/GPUDisplayBackendVulkan.cxx | 24 +++++++++++-------- .../display/backend/GPUDisplayBackendVulkan.h | 2 -- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 64167afaa536d..6f0ebb9baf945 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -12,15 +12,16 @@ /// \file GPUDisplayBackendVulkan.cxx /// \author David Rohr +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 #include -#include - VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #include "GPUCommonDef.h" #include "GPUDisplayBackendVulkan.h" #include "GPUDisplay.h" +#include + using namespace o2::gpu; #include "utils/qGetLdBinarySymbols.h" @@ -337,6 +338,7 @@ double GPUDisplayBackendVulkan::checkDevice(vk::PhysicalDevice device, const std void GPUDisplayBackendVulkan::createDevice() { + VULKAN_HPP_DEFAULT_DISPATCHER.init(); vk::ApplicationInfo appInfo{}; appInfo.pApplicationName = "GPU CA Standalone display"; appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); @@ -353,28 +355,28 @@ void GPUDisplayBackendVulkan::createDevice() const std::vector reqValidationLayers = { "VK_LAYER_KHRONOS_validation"}; - auto debugCallback = [](VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, VkDebugUtilsMessageTypeFlagsEXT messageType, const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) -> VkBool32 { + auto debugCallback = [](vk::DebugUtilsMessageSeverityFlagBitsEXT messageSeverity, vk::DebugUtilsMessageTypeFlagsEXT messageType, const vk::DebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) -> VkBool32 { static int32_t throwOnError = getenv("GPUCA_VULKAN_VALIDATION_THROW") ? atoi(getenv("GPUCA_VULKAN_VALIDATION_THROW")) : 0; static bool showVulkanValidationInfo = getenv("GPUCA_VULKAN_VALIDATION_INFO") && atoi(getenv("GPUCA_VULKAN_VALIDATION_INFO")); switch (messageSeverity) { - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT: + case vk::DebugUtilsMessageSeverityFlagBitsEXT::eVerbose: if (showVulkanValidationInfo) { GPUInfo("%s", pCallbackData->pMessage); } break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT: + case vk::DebugUtilsMessageSeverityFlagBitsEXT::eWarning: GPUWarning("%s", pCallbackData->pMessage); if (throwOnError > 1) { throw std::logic_error("break_on_validation_warning"); } break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT: + case vk::DebugUtilsMessageSeverityFlagBitsEXT::eError: GPUError("%s", pCallbackData->pMessage); if (throwOnError) { throw std::logic_error("break_on_validation_error"); } break; - case VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT: + case vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo: default: GPUInfo("%s", pCallbackData->pMessage); break; @@ -403,10 +405,11 @@ void GPUDisplayBackendVulkan::createDevice() instanceCreateInfo.ppEnabledExtensionNames = reqInstanceExtensions.data(); mInstance = vk::createInstance(instanceCreateInfo, nullptr); - mDLD = {mInstance, mDL.getProcAddress("vkGetInstanceProcAddr")}; + VULKAN_HPP_DEFAULT_DISPATCHER.init(mInstance); if (mEnableValidationLayers) { - mDebugMessenger = mInstance.createDebugUtilsMessengerEXT(debugCreateInfo, nullptr, mDLD); + GPUInfo("Enabling Vulkan Validation Layers"); + mDebugMessenger = mInstance.createDebugUtilsMessengerEXT(debugCreateInfo, nullptr); } std::vector extensions = vk::enumerateInstanceExtensionProperties(nullptr); if (mDisplay->param()->par.debugLevel >= 3) { @@ -480,6 +483,7 @@ void GPUDisplayBackendVulkan::createDevice() deviceCreateInfo.enabledLayerCount = instanceCreateInfo.enabledLayerCount; deviceCreateInfo.ppEnabledLayerNames = instanceCreateInfo.ppEnabledLayerNames; mDevice = mPhysicalDevice.createDevice(deviceCreateInfo, nullptr); + VULKAN_HPP_DEFAULT_DISPATCHER.init(mDevice); mGraphicsQueue = mDevice.getQueue(mGraphicsFamily, 0); vk::CommandPoolCreateInfo poolInfo{}; @@ -494,7 +498,7 @@ void GPUDisplayBackendVulkan::clearDevice() mDevice.destroy(nullptr); mInstance.destroySurfaceKHR(mSurface, nullptr); if (mEnableValidationLayers) { - mInstance.destroyDebugUtilsMessengerEXT(mDebugMessenger, nullptr, mDLD); + mInstance.destroyDebugUtilsMessengerEXT(mDebugMessenger, nullptr); } } diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h index 97c24084915f6..caefdd25388c1 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.h @@ -133,8 +133,6 @@ class GPUDisplayBackendVulkan : public GPUDisplayBackend bool mEnableValidationLayers = false; vk::Instance mInstance; - vk::DynamicLoader mDL; - vk::DispatchLoaderDynamic mDLD; vk::DebugUtilsMessengerEXT mDebugMessenger; vk::PhysicalDevice mPhysicalDevice; vk::Device mDevice; From 4ed4dde32893ed7d9cf3d4ad668b66767527ec88 Mon Sep 17 00:00:00 2001 From: pillot Date: Thu, 20 Feb 2025 10:56:48 +0100 Subject: [PATCH 0091/1914] fix wire position on station 1 (#13983) --- .../MUON/MCH/Simulation/include/MCHSimulation/Response.h | 1 + Detectors/MUON/MCH/Simulation/src/Response.cxx | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Detectors/MUON/MCH/Simulation/include/MCHSimulation/Response.h b/Detectors/MUON/MCH/Simulation/include/MCHSimulation/Response.h index 09faf3af2e279..bb0ba0aa464e4 100644 --- a/Detectors/MUON/MCH/Simulation/include/MCHSimulation/Response.h +++ b/Detectors/MUON/MCH/Simulation/include/MCHSimulation/Response.h @@ -70,6 +70,7 @@ class Response float inclandbfield(float thetawire, float betagamma, float bx) const; private: + Station mStation{}; ///< Station type MathiesonOriginal mMathieson{}; ///< Mathieson function float mPitch = 0.f; ///< anode-cathode pitch (cm) float mChargeSlope = 0.f; ///< charge slope used in E to charge conversion diff --git a/Detectors/MUON/MCH/Simulation/src/Response.cxx b/Detectors/MUON/MCH/Simulation/src/Response.cxx index 9cc4956772edc..c280981f94a07 100644 --- a/Detectors/MUON/MCH/Simulation/src/Response.cxx +++ b/Detectors/MUON/MCH/Simulation/src/Response.cxx @@ -26,7 +26,7 @@ using namespace o2::mch; //_____________________________________________________________________ -Response::Response(Station station) +Response::Response(Station station) : mStation(station) { if (station == Station::Type1) { mMathieson.setPitch(ResponseParam::Instance().pitchSt1); @@ -68,9 +68,9 @@ float Response::etocharge(float edepos) const //_____________________________________________________________________ float Response::getAnod(float x) const { - int n = int(x / mPitch); - float wire = (x > 0) ? n + 0.5 : n - 0.5; - return wire * mPitch; + return (mStation == Station::Type1) + ? std::round(x / mPitch) * mPitch + : (std::floor(x / mPitch) + 0.5f) * mPitch; } //_____________________________________________________________________ From a2d7b83d6f184bccfe57f58daa3a3b92e023df58 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 19 Feb 2025 18:12:31 +0100 Subject: [PATCH 0092/1914] CTP RAW decoder always requests CCDB inputs --- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 81a927b3caee1..753f88114a14b 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -201,9 +201,9 @@ o2::framework::DataProcessorSpec o2::ctp::reco_workflow::getRawDecoderSpec(bool } std::vector outputs; + inputs.emplace_back("ctpconfig", "CTP", "CTPCONFIG", 0, o2::framework::Lifetime::Condition, o2::framework::ccdbParamSpec("CTP/Config/Config", 1)); + inputs.emplace_back("trigoffset", "CTP", "Trig_Offset", 0, o2::framework::Lifetime::Condition, o2::framework::ccdbParamSpec("CTP/Config/TriggerOffsets")); if (digits) { - inputs.emplace_back("ctpconfig", "CTP", "CTPCONFIG", 0, o2::framework::Lifetime::Condition, o2::framework::ccdbParamSpec("CTP/Config/Config", 1)); - inputs.emplace_back("trigoffset", "CTP", "Trig_Offset", 0, o2::framework::Lifetime::Condition, o2::framework::ccdbParamSpec("CTP/Config/TriggerOffsets")); outputs.emplace_back("CTP", "DIGITS", 0, o2::framework::Lifetime::Timeframe); } if (lumi) { @@ -230,12 +230,10 @@ void RawDecoderSpec::updateTimeDependentParams(framework::ProcessingContext& pc) pc.inputs().get("trigoffset"); const auto& trigOffsParam = o2::ctp::TriggerOffsetsParam::Instance(); LOG(info) << "updateing TroggerOffsetsParam: inputs L0_L1:" << trigOffsParam.L0_L1 << " classes L0_L1:" << trigOffsParam.L0_L1_classes; - if (mDecodeinputs) { - const auto ctpcfg = pc.inputs().get("ctpconfig"); - if (ctpcfg != nullptr) { - mDecoder.setCTPConfig(*ctpcfg); - LOG(info) << "ctpconfig for run done:" << mDecoder.getCTPConfig().getRunNumber(); - } + const auto ctpcfg = pc.inputs().get("ctpconfig"); + if (ctpcfg != nullptr) { + mDecoder.setCTPConfig(*ctpcfg); + LOG(info) << "ctpconfig for run done:" << mDecoder.getCTPConfig().getRunNumber(); } } } From adf1bdaeffa4feb44bddc1455358a3fe45d88e91 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 20 Feb 2025 10:27:08 +0100 Subject: [PATCH 0093/1914] GPU: Implement parallel memset for host code --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 22 +++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 270f092a1fd29..e3522d2d7242d 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -246,7 +246,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.deterministicGPUReconstruction) { #ifndef GPUCA_NO_FAST_MATH - GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH, otherwise results will never be deterministic!"); + GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 1365429245fdc..187792b3ba2e7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -111,10 +111,24 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS template <> inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - int32_t ompThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNOMPThreads())); - if (ompThreads > 1) { - memset(ptr, 0, size); - } else { +#ifdef WITH_OPENMP + int32_t nOMPThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNOMPThreads())); + if (nOMPThreads > 1) { + GPUCA_OPENMP(parallel num_threads(nOMPThreads)) + { + size_t threadSize = size / omp_get_num_threads(); + if (threadSize % 4096) { + threadSize += 4096 - threadSize % 4096; + } + size_t offset = threadSize * omp_get_thread_num(); + size_t mySize = std::min(threadSize, size - offset); + if (mySize) { + memset((char*)ptr + offset, 0, mySize); + } + } + } else +#endif + { memset(ptr, 0, size); } return 0; From 5e43c1751bd7e81101cfd8e23275493cbdeabb77 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 20 Feb 2025 12:55:19 +0100 Subject: [PATCH 0094/1914] Fix for PV contributor being AB track --- Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index 871fdc00555e8..4d20654d07c83 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -1288,7 +1288,7 @@ class TPCTimeSeries : public Task // make cut around DCA to vertex due to gammas if ((std::abs(dcaITSTPCTmp[0]) < maxITSTPCDCAr_comb) && (std::abs(dcaITSTPCTmp[1]) < maxITSTPCDCAz_comb)) { // propagate TPC track to ITS track and store delta track parameters - if (track.rotate(tracksITS[idxITSTrack].getAlpha()) && propagator->propagateTo(track, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType)) { + if (idxITSTrack >= 0 && track.rotate(tracksITS[idxITSTrack].getAlpha()) && propagator->propagateTo(track, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType)) { o2::track::TrackPar trackITS(tracksITS[idxITSTrack]); const bool propITSOk = propagator->propagateTo(trackITS, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType); if (propITSOk) { From 4717d6b96d5c5cd30dad3f39d3d66ac9685bd18e Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 20 Feb 2025 18:01:49 +0100 Subject: [PATCH 0095/1914] DPL Analysis: replace SFINAE with overloaded restricted templates (#13947) --- Framework/Core/include/Framework/ASoA.h | 10 + .../Core/include/Framework/AnalysisHelpers.h | 45 + .../Core/include/Framework/AnalysisManagers.h | 1031 ++++++++--------- .../Core/include/Framework/AnalysisTask.h | 103 +- Framework/Core/include/Framework/Condition.h | 10 + .../Core/include/Framework/Configurable.h | 19 +- .../Core/include/Framework/Expressions.h | 3 + .../include/Framework/GroupedCombinations.h | 17 + .../include/Framework/HistogramRegistry.h | 3 + Framework/Core/include/Framework/SliceCache.h | 5 +- 10 files changed, 617 insertions(+), 629 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index f72d26e84d82f..ca9c49e8bc6c8 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1416,6 +1416,7 @@ template struct PresliceBase : public Policy { constexpr static bool optional = OPT; using target_t = T; + using policy_t = Policy; const std::string binding; PresliceBase(expressions::BindingNode index_) @@ -1453,6 +1454,15 @@ using Preslice = PresliceBase; template using PresliceOptional = PresliceBase; +template +concept is_preslice = requires(T t) { + requires std::same_as; + requires std::same_as; + &T::isMising; + &T::updateSliceInfo; + &T::getSliceFor; +}; + } // namespace o2::framework namespace o2::soa diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index bb7e5e14aaa75..59bade6d43cd0 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -190,6 +190,9 @@ template struct Produces : WritingCursor { }; +template +concept is_produces = requires(T t) { typename T::cursor_t; typename T::persistent_table_t; &T::cursor; }; + /// Use this to group together produces. Useful to separate them logically /// or simply to stay within the 100 elements per Task limit. /// Use as: @@ -201,6 +204,9 @@ struct Produces : WritingCursor { struct ProducesGroup { }; +template +concept is_produces_group = std::derived_from; + /// Helper template for table transformations template struct TableTransform { @@ -250,6 +256,7 @@ constexpr auto transformBase() template struct Spawns : decltype(transformBase()) { + using spawnable_t = T; using metadata = decltype(transformBase())::metadata; using extension_t = typename metadata::extension_table_t; using base_table_t = typename metadata::base_table_t; @@ -277,6 +284,12 @@ struct Spawns : decltype(transformBase()) { std::shared_ptr extension = nullptr; }; +template +concept is_spawns = requires(T t) { + typename T::metadata; + requires std::same_as; +}; + /// Policy to control index building /// Exclusive index: each entry in a row has a valid index /// Sparse index: values in a row can be (-1), index table is isomorphic (joinable) @@ -420,6 +433,7 @@ constexpr auto transformBase() template struct Builds : decltype(transformBase()) { + using buildable_t = T; using metadata = decltype(transformBase())::metadata; using IP = std::conditional_t, IndexBuilder>; using Key = metadata::Key; @@ -455,6 +469,13 @@ struct Builds : decltype(transformBase()) { } }; +template +concept is_builds = requires(T t) { + typename T::metadata; + typename T::Key; + requires std::same_as; +}; + /// This helper class allows you to declare things which will be created by a /// given analysis task. Currently wrapped objects are limited to be TNamed /// descendants. Objects will be written to a ROOT file at the end of the @@ -550,11 +571,21 @@ struct OutputObj { uint32_t mTaskHash; }; +template +concept is_outputobj = requires(T t) { + &T::setHash; + &T::spec; + &T::ref; + requires std::same_as()), typename T::obj_t*>; + requires std::same_as>; +}; + /// This helper allows you to fetch a Sevice from the context or /// by using some singleton. This hopefully will hide the Singleton and /// We will be able to retrieve it in a more thread safe manner later on. template struct Service { + using service_t = T; T* service; decltype(auto) operator->() const @@ -567,6 +598,12 @@ struct Service { } }; +template +concept is_service = requires(T t) { + requires std::same_as; + &T::operator->; +}; + auto getTableFromFilter(soa::is_filtered_table auto const& table, soa::SelectionVector&& selection) { return std::make_unique>>(std::vector{table}, std::forward(selection)); @@ -581,6 +618,7 @@ void initializePartitionCaches(std::set const& hashes, std::shared_ptr template struct Partition { + using content_t = T; Partition(expressions::Node&& filter_) : filter{std::forward(filter_)} { } @@ -690,6 +728,13 @@ struct Partition { return mFiltered->size(); } }; + +template +concept is_partition = requires(T t) { + &T::updatePlaceholders; + requires std::same_as; + requires std::same_as>>; +}; } // namespace o2::framework namespace o2::soa diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index ca42a0aca42db..1d326e338099f 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -31,678 +31,573 @@ namespace o2::framework { -template -struct GroupedCombinationManager { - template - static void setGroupedCombination(ANY&, TG&, T2s&...) - { - } -}; - -template -struct GroupedCombinationManager> { - template - static void setGroupedCombination(GroupedCombinationsGenerator& comb, TG& grouping, std::tuple& associated) - { - static_assert(sizeof...(T2s) > 0, "There must be associated tables in process() for a correct pair"); - if constexpr (std::same_as) { - static_assert((framework::has_type(pack{}) && ...), "You didn't subscribed to all tables requested for mixing"); - comb.setTables(grouping, associated); - } - } -}; +namespace +{ +template +static inline auto extractOriginal(ProcessingContext& pc) +{ + return pc.inputs().get(aod::MetadataTrait::metadata::tableLabel())->asArrowTable(); +} -template -struct PartitionManager { - template - static void setPartition(ANY&, T2s&...) - { - } +template +static inline std::vector> extractOriginals(framework::pack, ProcessingContext& pc) +{ + return {extractOriginal(pc)...}; +} - template - static void bindExternalIndices(ANY&, Ts*...) - { - } +template refs> +static inline auto extractOriginals(ProcessingContext& pc) +{ + return [&](std::index_sequence) -> std::vector> { + return {pc.inputs().get(o2::aod::label())->asArrowTable()...}; + }(std::make_index_sequence()); +} +} // namespace - template - static void bindInternalIndices(ANY&, E*) - { - } +namespace analysis_task_parsers +{ - template - static void getBoundToExternalIndices(ANY&, Ts&...) - { - } +/// Options handling +template +bool appendOption(std::vector&, O&) +{ + return false; +} + +template +bool appendOption(std::vector& options, O& option) +{ + return ConfigurableHelpers::appendOption(options, option); +} - static void updatePlaceholders(ANY&, InitContext&) - { +template +bool appendOption(std::vector& options, O& optionGroup) +{ + if constexpr (requires { optionGroup.prefix; }) { + homogeneous_apply_refs([prefix = optionGroup.prefix](C& option) { // apend group prefix if set + if constexpr (requires { option.name; }) { + option.name.insert(0, 1, '.'); + option.name.insert(0, prefix); + } + return true; + }, + optionGroup); } + homogeneous_apply_refs([&options](auto& option) { return appendOption(options, option); }, optionGroup); + return true; +} + +template +bool prepareOption(InitContext&, O&) +{ + return false; +} - static bool newDataframe(ANY&) - { - return false; +template +bool prepareOption(InitContext& context, O& configurable) +{ + if constexpr (variant_trait_v != VariantType::Unknown) { + configurable.value = context.options().get(configurable.name.c_str()); + } else { + auto pt = context.options().get(configurable.name.c_str()); + configurable.value = RootConfigParamHelpers::as(pt); } -}; + return true; +} + +template +bool prepareOption(InitContext& context, O& configurableGroup) +{ + homogeneous_apply_refs([&context](auto&& configurable) { return prepareOption(context, configurable); }, configurableGroup); + return true; +} + +/// Conditions handling +template +bool appendCondition(std::vector&, C&) +{ + return false; +} + +template +bool appendCondition(std::vector& inputs, C& condition) +{ + inputs.emplace_back(InputSpec{condition.path, "AODC", runtime_hash(condition.path.c_str()), Lifetime::Condition, ccdbParamSpec(condition.path)}); + return true; +} + +template +bool appendCondition(std::vector& inputs, C& conditionGroup) +{ + homogeneous_apply_refs([&inputs](auto& condition) { return appendCondition(inputs, condition); }, conditionGroup); + return true; +} +/// Table auto-creation handling template -struct PartitionManager> { - template - static void doSetPartition(Partition& partition, T2& table) - { - if constexpr (std::same_as) { - partition.bindTable(table); - } - } +bool requestInputs(std::vector&, T const&) +{ + return false; +} - template - static void setPartition(Partition& partition, T2s&... tables) - { - (doSetPartition(partition, tables), ...); +template +bool requestInputs(std::vector& inputs, T const& spawns) +{ + auto base_specs = spawns.base_specs(); + for (auto base_spec : base_specs) { + base_spec.metadata.push_back(ConfigParamSpec{std::string{"control:spawn"}, VariantType::Bool, true, {"\"\""}}); + DataSpecUtils::updateInputList(inputs, std::forward(base_spec)); } + return true; +} - template - static void bindExternalIndices(Partition& partition, Ts*... tables) - { - partition.bindExternalIndices(tables...); +template +bool requestInputs(std::vector& inputs, T const& builds) +{ + auto base_specs = builds.base_specs(); + for (auto base_spec : base_specs) { + base_spec.metadata.push_back(ConfigParamSpec{std::string{"control:build"}, VariantType::Bool, true, {"\"\""}}); + DataSpecUtils::updateInputList(inputs, std::forward(base_spec)); } + return true; +} - template - static void bindInternalIndices(Partition& partition, E* table) - { - if constexpr (o2::soa::is_binding_compatible_v>()) { - partition.bindInternalIndicesTo(table); - } - } +template +bool newDataframeCondition(InputRecord&, C&) +{ + return false; +} - static void updatePlaceholders(Partition& partition, InitContext& context) - { - partition.updatePlaceholders(context); - } +template +bool newDataframeCondition(InputRecord& record, C& condition) +{ + condition.instance = (typename C::type*)record.get(condition.path).get(); + return true; +} - static bool newDataframe(Partition& partition) - { - partition.dataframeChanged = true; - return true; - } -}; +template +bool newDataframeCondition(InputRecord& record, C& conditionGroup) +{ + homogeneous_apply_refs([&record](auto&& condition) { return newDataframeCondition(record, condition); }, conditionGroup); + return true; +} -template -struct FilterManager { - static bool createExpressionTrees(ANY&, std::vector&) - { - return false; - } +/// Outputs handling +template +bool appendOutput(std::vector&, T&, uint32_t) +{ + return false; +} - static bool updatePlaceholders(ANY&, InitContext&) - { - return false; - } -}; +template +bool appendOutput(std::vector& outputs, T&, uint32_t) +{ + outputs.emplace_back(OutputForTable::spec()); + return true; +} -template <> -struct FilterManager { - static bool createExpressionTrees(expressions::Filter const& filter, std::vector& expressionInfos) - { - expressions::updateExpressionInfos(filter, expressionInfos); - return true; - } +template +bool appendOutput(std::vector& outputs, T& producesGroup, uint32_t hash) +{ + homogeneous_apply_refs([&outputs, hash](auto& produces) { return appendOutput(outputs, produces, hash); }, producesGroup); + return true; +} - static bool updatePlaceholders(expressions::Filter& filter, InitContext& ctx) - { - expressions::updatePlaceholders(filter, ctx); - return true; - } -}; +template +bool appendOutput(std::vector& outputs, T& hr, uint32_t hash) +{ + hr.setHash(hash); + outputs.emplace_back(hr.spec()); + return true; +} + +template +bool appendOutput(std::vector& outputs, T& obj, uint32_t hash) +{ + obj.setHash(hash); + outputs.emplace_back(obj.spec()); + return true; +} + +template +bool appendOutput(std::vector& outputs, T& spawns, uint32_t) +{ + outputs.emplace_back(spawns.spec()); + return true; +} + +template +bool appendOutput(std::vector& outputs, T& builds, uint32_t) +{ + outputs.emplace_back(builds.spec()); + return true; +} -/// A manager which takes care of condition objects template -struct ConditionManager { - template - static bool appendCondition(std::vector& inputs, ANY& x) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&inputs](auto& y) { return ConditionManager>::appendCondition(inputs, y); }, x); - return true; - } else { - return false; - } - } +bool postRunOutput(EndOfStreamContext&, T&) +{ + return false; +} - template - static bool newDataframe(InputRecord& record, ANY& x) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&record](auto&& y) { return ConditionManager>::newDataframe(record, y); }, x); - return true; - } else { - return false; - } - } -}; +template +bool postRunOutput(EndOfStreamContext& context, T& hr) +{ + auto& deviceSpec = context.services().get(); + context.outputs().snapshot(hr.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *(hr.getListOfHistograms())); + hr.clean(); + return true; +} -template -struct ConditionManager> { - static bool appendCondition(std::vector& inputs, Condition& what) - { - inputs.emplace_back(InputSpec{what.path, "AODC", runtime_hash(what.path.c_str()), Lifetime::Condition, ccdbParamSpec(what.path)}); - return true; - } - static bool newDataframe(InputRecord& inputs, Condition& what) - { - what.instance = (OBJ*)inputs.get(what.path).get(); - return true; - } -}; +template +bool postRunOutput(EndOfStreamContext& context, T& obj) +{ + auto& deviceSpec = context.services().get(); + context.outputs().snapshot(obj.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *obj); + return true; +} -/// SFINAE placeholder, also handles recursion in ProcessGroup template -struct OutputManager { - template - static bool appendOutput(std::vector& outputs, ANY& what, uint32_t v) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&outputs, v](auto& p) { return OutputManager>::appendOutput(outputs, p, v); }, what); - return true; - } - return false; - } +bool prepareOutput(ProcessingContext&, T&) +{ + return false; +} - template - static bool prepare(ProcessingContext& context, ANY& what) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&context](auto& p) { return OutputManager>::prepare(context, p); }, what); - return true; - } - return false; - } +template +bool prepareOutput(ProcessingContext& context, T& produces) +{ + produces.resetCursor(std::move(context.outputs().make(OutputForTable::ref()))); + return true; +} - template - static bool postRun(EndOfStreamContext& context, ANY& what) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&context](auto& p) { return OutputManager>::postRun(context, p); }, what); - return true; - } - return true; - } +template +bool prepareOutput(ProcessingContext& context, T& producesGroup) +{ + homogeneous_apply_refs([&context](auto& produces) { return prepareOutput(context, produces); }, producesGroup); + return true; +} - template - static bool finalize(ProcessingContext& context, ANY& what) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&context](auto& p) { return OutputManager>::finalize(context, p); }, what); - return true; - } - return true; - } -}; - -/// Produces specialization -template -struct OutputManager> { - static bool appendOutput(std::vector& outputs, Produces& /*what*/, uint32_t) - { - outputs.emplace_back(OutputForTable::persistent_table_t>::spec()); - return true; - } - static bool prepare(ProcessingContext& context, Produces& what) - { - what.resetCursor(std::move(context.outputs().make(OutputForTable::persistent_table_t>::ref()))); - return true; - } - static bool finalize(ProcessingContext&, Produces& what) - { - what.setLabel(o2::aod::label::persistent_table_t::ref>()); - what.release(); - return true; - } - static bool postRun(EndOfStreamContext&, Produces&) - { - return true; - } -}; - -/// HistogramRegistry specialization -template <> -struct OutputManager { - static bool appendOutput(std::vector& outputs, HistogramRegistry& what, uint32_t hash) - { - what.setHash(hash); - outputs.emplace_back(what.spec()); - return true; - } - static bool prepare(ProcessingContext&, HistogramRegistry&) - { - return true; +template +bool prepareOutput(ProcessingContext& context, T& spawns) +{ + using metadata = o2::aod::MetadataTrait>::metadata; + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + if (originalTable->schema()->fields().empty() == true) { + using base_table_t = typename T::base_table_t::table_t; + originalTable = makeEmptyTable(o2::aod::label()); } - static bool finalize(ProcessingContext&, HistogramRegistry&) - { - return true; - } + spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label())); + spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable})); + return true; +} - static bool postRun(EndOfStreamContext& context, HistogramRegistry& what) - { - auto& deviceSpec = context.services().get(); - context.outputs().snapshot(what.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *(what.getListOfHistograms())); - what.clean(); - sleep(deviceSpec.inputTimesliceId); - return true; - } -}; +template +bool prepareOuput(ProcessingContext& context, T& builds) +{ + using metadata = o2::aod::MetadataTrait>::metadata; + return builds.template build(builds.pack(), extractOriginals(context)); +} -/// OutputObj specialization template -struct OutputManager> { - static bool appendOutput(std::vector& outputs, OutputObj& what, uint32_t hash) - { - what.setHash(hash); - outputs.emplace_back(what.spec()); - return true; - } - static bool prepare(ProcessingContext&, OutputObj&) - { - return true; - } +bool finalizeOutput(ProcessingContext&, T&) +{ + return false; +} - static bool finalize(ProcessingContext&, OutputObj&) - { - return true; - } +template +bool finalizeOutput(ProcessingContext&, T& produces) +{ + produces.setLabel(o2::aod::label()); + produces.release(); + return true; +} - static bool postRun(EndOfStreamContext& context, OutputObj& what) - { - auto& deviceSpec = context.services().get(); - context.outputs().snapshot(what.ref(deviceSpec.inputTimesliceId, deviceSpec.maxInputTimeslices), *what); - sleep(deviceSpec.inputTimesliceId); - return true; - } -}; +template +bool finalizeOutput(ProcessingContext& context, T& producesGroup) +{ + homogeneous_apply_refs([&context](auto& produces) { return finalizeOutput(context, produces); }, producesGroup); + return true; +} -/// Spawns specializations -template -static inline auto extractOriginal(ProcessingContext& pc) +template +bool finalizeOutput(ProcessingContext& context, T& spawns) { - return pc.inputs().get(aod::MetadataTrait::metadata::tableLabel())->asArrowTable(); + context.outputs().adopt(spawns.output(), spawns.asArrowTable()); + return true; } -template -static inline std::vector> extractOriginals(framework::pack, ProcessingContext& pc) +template +bool finalizeOutput(ProcessingContext& context, T& builds) { - return {extractOriginal(pc)...}; + context.outputs().adopt(builds.output(), builds.asArrowTable()); + return true; } -template refs> -static inline auto extractOriginals(ProcessingContext& pc) +/// Service handling +template +bool addService(std::vector&, T&) { - return [&](std::index_sequence) -> std::vector> { - return {pc.inputs().get(o2::aod::label())->asArrowTable()...}; - }(std::make_index_sequence()); + return false; } -template -struct OutputManager> { - static bool appendOutput(std::vector& outputs, Spawns& what, uint32_t) - { - outputs.emplace_back(what.spec()); - return true; +template +bool addService(std::vector& specs, T&) +{ + if constexpr (o2::framework::base_of_template) { + auto p = typename T::service_t{}; + auto loadableServices = PluginManager::parsePluginSpecString(p.loadSpec.c_str()); + PluginManager::loadFromPlugin(loadableServices, specs); } + return true; +} - static bool prepare(ProcessingContext& pc, Spawns& what) - { - using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(pc)); - if (originalTable->schema()->fields().empty() == true) { - using base_table_t = typename Spawns::base_table_t::table_t; - originalTable = makeEmptyTable(o2::aod::label()); - } +template +bool prepareService(InitContext&, T&) +{ + return false; +} - what.extension = std::make_shared::extension_t>(o2::framework::spawner>(originalTable, o2::aod::label())); - what.table = std::make_shared(soa::ArrowHelpers::joinTables({what.extension->asArrowTable(), originalTable})); +template +bool prepareService(InitContext& context, T& service) +{ + using S = typename T::service_t; + if constexpr (requires { &S::instance; }) { + service.service = &(S::instance()); // Sigh... return true; - } - - static bool finalize(ProcessingContext& pc, Spawns& what) - { - pc.outputs().adopt(what.output(), what.asArrowTable()); + } else { + service.service = &(context.services().get()); return true; } + return false; +} - static bool postRun(EndOfStreamContext&, Spawns&) - { - return true; - } -}; +template +bool postRunService(EndOfStreamContext&, T&) +{ + return false; +} -/// Builds specialization -template -static inline auto doExtractOriginal(framework::pack, ProcessingContext& pc) +template +bool postRunService(EndOfStreamContext&, T& service) { - if constexpr (sizeof...(Ts) == 1) { - return pc.inputs().get(aod::MetadataTrait>>::metadata::tableLabel())->asArrowTable(); - } else { - return std::vector{pc.inputs().get(aod::MetadataTrait::metadata::tableLabel())->asArrowTable()...}; + // FIXME: for the moment we only need endOfStream to be + // stateless. In the future we might want to pass it EndOfStreamContext + if constexpr (requires { &T::service_t::endOfStream; }) { + service.service->endOfStream(); + return true; } + return false; } -template -static inline auto extractOriginalsVector(framework::pack, ProcessingContext& pc) +/// Filter handling +template +bool updatePlaceholders(InitContext&, T&) { - return std::vector{extractOriginalJoined(pc)...}; + return false; } -template -struct OutputManager> { - static bool appendOutput(std::vector& outputs, Builds& what, uint32_t) - { - outputs.emplace_back(what.spec()); - return true; - } +template +bool updatePlaceholders(InitContext& context, T& filter) +{ + expressions::updatePlaceholders(filter, context); + return true; +} - static bool prepare(ProcessingContext& pc, Builds& what) - { - using metadata = o2::aod::MetadataTrait>::metadata; - return what.template build(what.pack(), extractOriginals(pc)); - } +template +bool updatePlaceholders(InitContext& context, T& partition) +{ + partition.updatePlaceholders(context); + return true; +} - static bool finalize(ProcessingContext& pc, Builds& what) - { - pc.outputs().adopt(what.output(), what.asArrowTable()); - return true; - } +template +bool createExpressionTrees(std::vector&, T&) +{ + return false; +} - static bool postRun(EndOfStreamContext&, Builds&) - { - return true; - } -}; +template +bool createExpressionTrees(std::vector& expressionInfos, T& filter) +{ + expressions::updateExpressionInfos(filter, expressionInfos); + return true; +} template -struct ServiceManager { - template - static bool add(std::vector& /*specs*/, ANY& /*any*/) - { - return false; - } +bool newDataframePartition(T&) +{ + return false; +} - template - static bool prepare(InitContext&, ANY&) - { - return false; - } +template +bool newDataframePartition(T& partition) +{ + partition.dataframeChanged = true; + return true; +} - template - static bool postRun(EndOfStreamContext&, ANY&) - { - return false; - } -}; +template +void setPartition(P&, T&...) +{ +} -template -struct ServiceManager> { - static bool add(std::vector& specs, Service& /*service*/) - { - if constexpr (o2::framework::base_of_template) { - T p = T{}; - auto loadableServices = PluginManager::parsePluginSpecString(p.loadSpec.c_str()); - PluginManager::loadFromPlugin(loadableServices, specs); - } - return true; - } +template +void setPartition(P& partition, T&... tables) +{ + ([&]() { if constexpr (std::same_as) {partition.bindTable(tables);} }(), ...); +} - static bool prepare(InitContext& context, Service& service) - { - if constexpr (requires { &T::instance; }) { - service.service = &(T::instance()); // Sigh... - return true; - } else { - service.service = &(context.services().get()); - return true; - } - return false; - } +template +void bindInternalIndicesPartition(P&, T*) +{ +} - /// If a service has a method endOfStream, it is called at the end of the stream. - static bool postRun(EndOfStreamContext& /*context*/, Service& service) - { - // FIXME: for the moment we only need endOfStream to be - // stateless. In the future we might want to pass it EndOfStreamContext - if constexpr (requires { &T::endOfStream; }) { - service.service->endOfStream(); - return true; - } - return false; +template +void bindInternalIndicesPartition(P& partition, T* table) +{ + if constexpr (o2::soa::is_binding_compatible_v>()) { + partition.bindInternalIndicesTo(table); } -}; +} -template -struct CacheManager { - template - static bool initialize(InitContext&, ANY&) - { - return false; - } - template - static bool initialize(ProcessingContext&, ANY&) - { - return false; - } -}; +template +void bindExternalIndicesPartition(P&, T*...) +{ +} -template <> -struct CacheManager { - static bool initialize(InitContext&, SliceCache&) - { - return false; - } - static bool initialize(ProcessingContext& pc, SliceCache& cache) - { - if (cache.ptr == nullptr) { - cache.ptr = &pc.services().get(); - } - return true; - } -}; +template +void bindExternalIndicesPartition(P& partition, T*... tables) +{ + partition.bindExternalIndices(tables...); +} +/// Cache handling template -struct OptionManager { - template - static bool appendOption(std::vector& options, ANY& x) - { - /// Recurse, in case we are brace constructible - if constexpr (std::derived_from) { - if constexpr (requires { x.prefix; }) { - homogeneous_apply_refs([prefix = x.prefix](C& y) { // apend group prefix if set - if constexpr (requires { y.name; }) { - y.name.insert(0, 1, '.'); - y.name.insert(0, prefix); - } - return true; - }, - x); - } - homogeneous_apply_refs([&options](auto& y) { return OptionManager>::appendOption(options, y); }, x); - return true; - } else { - return false; - } - } +bool preInitializeCache(InitContext&, T&) +{ + return false; +} - template - static bool prepare(InitContext& ic, ANY& x) - { - if constexpr (std::derived_from) { - homogeneous_apply_refs([&ic](auto&& y) { return OptionManager>::prepare(ic, y); }, x); - return true; - } else { - return false; - } - } -}; +template +bool initializeCache(ProcessingContext&, T&) +{ + return false; +} -template -struct OptionManager> { - static bool appendOption(std::vector& options, Configurable& what) - { - return ConfigurableHelpers::appendOption(options, what); +template +bool initializeCache(ProcessingContext& context, T& cache) +{ + if (cache.ptr == nullptr) { + cache.ptr = &context.services().get(); } + return true; +} - static bool prepare(InitContext& context, Configurable& what) - { - if constexpr (variant_trait_v::type> != VariantType::Unknown) { - what.value = context.options().get(what.name.c_str()); - } else { - auto pt = context.options().get(what.name.c_str()); - what.value = RootConfigParamHelpers::as(pt); - } - return true; - } -}; +/// Combinations handling +template + requires(!is_combinations_generator) +void setGroupedCombination(C&, TG&, Ts&...) +{ +} -template -struct OptionManager> { - static bool appendOption(std::vector& options, ProcessConfigurable& what) - { - options.emplace_back(ConfigParamSpec{what.name, variant_trait_v>, what.value, {what.help}, what.kind}); - return true; +template + requires((sizeof...(Ts) > 0) && (C::compatible(framework::pack{}))) +static void setGroupedCombination(C& comb, TG& grouping, std::tuple& associated) +{ + if constexpr (std::same_as) { + comb.setTables(grouping, associated); } +} - static bool prepare(InitContext& context, ProcessConfigurable& what) - { - what.value = context.options().get(what.name.c_str()); - return true; - } -}; +/// Preslice handling +template +bool registerCache(T&, std::vector&, std::vector&) +{ + return false; +} -template -struct UpdateProcessSwitches { - static bool set(std::pair, ANY&) - { - return false; - } -}; - -template -struct UpdateProcessSwitches> { - static bool set(std::pair setting, ProcessConfigurable& what) - { - if (what.name == setting.first) { - what.value = setting.second; +template + requires std::same_as +bool registerCache(T& preslice, std::vector& bsks, std::vector&) +{ + if constexpr (T::optional) { + if (preslice.binding == "[MISSING]") { return true; } - return false; } -}; - -/// Manager template to facilitate extended tables spawning -template -struct SpawnManager { - static bool requestInputs(std::vector&, T const&) { return false; } -}; - -template -struct SpawnManager> { - static bool requestInputs(std::vector& inputs, Spawns& spawns) - { - auto base_specs = spawns.base_specs(); - for (auto base_spec : base_specs) { - base_spec.metadata.push_back(ConfigParamSpec{std::string{"control:spawn"}, VariantType::Bool, true, {"\"\""}}); - DataSpecUtils::updateInputList(inputs, std::forward(base_spec)); - } - return true; + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + if (locate == bsks.end()) { + bsks.emplace_back(preslice.getBindingKey()); } -}; + return true; +} -/// Manager template for building index tables -template -struct IndexManager { - static bool requestInputs(std::vector&, T const&) { return false; }; -}; - -template -struct IndexManager> { - static bool requestInputs(std::vector& inputs, Builds& builds) - { - auto base_specs = builds.base_specs(); - for (auto base_spec : base_specs) { - base_spec.metadata.push_back(ConfigParamSpec{std::string{"control:build"}, VariantType::Bool, true, {"\"\""}}); - DataSpecUtils::updateInputList(inputs, std::forward(base_spec)); +template + requires std::same_as +bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +{ + if constexpr (T::optional) { + if (preslice.binding == "[MISSING]") { + return true; } - return true; } -}; + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + if (locate == bsksU.end()) { + bsksU.emplace_back(preslice.getBindingKey()); + } + return true; +} -/// Manager template to handle slice caching template -struct PresliceManager { - static bool registerCache(T&, std::vector&, std::vector&) - { - return false; - } +bool updateSliceInfo(T&, ArrowTableSlicingCache&) +{ + return false; +} - static bool updateSliceInfo(T&, ArrowTableSlicingCache&) - { - return false; - } -}; - -template -struct PresliceManager> { - static bool registerCache(PresliceBase& container, std::vector& bsks, std::vector&) - requires std::same_as - { - if constexpr (OPT) { - if (container.binding == "[MISSING]") { - return true; - } - } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); - if (locate == bsks.end()) { - bsks.emplace_back(container.getBindingKey()); +template +static bool updateSliceInfo(T& preslice, ArrowTableSlicingCache& cache) + requires std::same_as +{ + if constexpr (T::optional) { + if (preslice.binding == "[MISSING]") { + return true; } - return true; } + preslice.updateSliceInfo(cache.getCacheFor(preslice.getBindingKey())); + return true; +} - static bool registerCache(PresliceBase& container, std::vector&, std::vector& bsksU) - requires std::same_as - { - if constexpr (OPT) { - if (container.binding == "[MISSING]") { - return true; - } - } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == container.bindingKey.first) && (entry.second == container.bindingKey.second); }); - if (locate == bsksU.end()) { - bsksU.emplace_back(container.getBindingKey()); +template +static bool updateSliceInfo(T& preslice, ArrowTableSlicingCache& cache) + requires std::same_as +{ + if constexpr (T::optional) { + if (preslice.binding == "[MISSING]") { + return true; } - return true; } + preslice.updateSliceInfo(cache.getCacheUnsortedFor(preslice.getBindingKey())); + return true; +} - static bool updateSliceInfo(PresliceBase& container, ArrowTableSlicingCache& cache) - requires std::same_as - { - if constexpr (OPT) { - if (container.binding == "[MISSING]") { - return true; - } - } - container.updateSliceInfo(cache.getCacheFor(container.getBindingKey())); - return true; - } +/// Process switches handling +template +static bool setProcessSwitch(std::pair, T&) +{ + return false; +} - static bool updateSliceInfo(PresliceBase& container, ArrowTableSlicingCache& cache) - requires std::same_as - { - if constexpr (OPT) { - if (container.binding == "[MISSING]") { - return true; - } - } - container.updateSliceInfo(cache.getCacheUnsortedFor(container.getBindingKey())); +template +static bool setProcessSwitch(std::pair setting, T& pc) +{ + if (pc.name == setting.first) { + pc.value = setting.second; return true; } -}; + return false; +} + +} // namespace analysis_task_parsers } // namespace o2::framework #endif // ANALYSISMANAGERS_H diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 5d18a31dcb955..b78bf61aea558 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -319,18 +319,18 @@ struct AnalysisDataProcessorBuilder { auto groupingTable = AnalysisDataProcessorBuilder::bindGroupingTable(inputs, processingFunction, infos); // set filtered tables for partitions with grouping - homogeneous_apply_refs([&groupingTable](auto& x) { - PartitionManager>::setPartition(x, groupingTable); - PartitionManager>::bindInternalIndices(x, &groupingTable); + homogeneous_apply_refs([&groupingTable](auto& element) { + analysis_task_parsers::setPartition(element, groupingTable); + analysis_task_parsers::bindInternalIndicesPartition(element, &groupingTable); return true; }, task); if constexpr (sizeof...(Associated) == 0) { // single argument to process - homogeneous_apply_refs([&groupingTable](auto& x) { - PartitionManager>::bindExternalIndices(x, &groupingTable); - GroupedCombinationManager>::setGroupedCombination(x, groupingTable); + homogeneous_apply_refs([&groupingTable](auto& element) { + analysis_task_parsers::bindExternalIndicesPartition(element, &groupingTable); + analysis_task_parsers::setGroupedCombination(element, groupingTable); return true; }, task); @@ -353,7 +353,7 @@ struct AnalysisDataProcessorBuilder { [&task](auto&... t) mutable { (homogeneous_apply_refs( [&t](auto& p) { - PartitionManager>::bindInternalIndices(p, &t); + analysis_task_parsers::bindInternalIndicesPartition(p, &t); return true; }, task), @@ -364,8 +364,8 @@ struct AnalysisDataProcessorBuilder { auto binder = [&task, &groupingTable, &associatedTables](auto& x) mutable { x.bindExternalIndices(&groupingTable, &std::get>(associatedTables)...); homogeneous_apply_refs([&x](auto& t) mutable { - PartitionManager>::setPartition(t, x); - PartitionManager>::bindExternalIndices(t, &x); + analysis_task_parsers::setPartition(t, x); + analysis_task_parsers::bindExternalIndicesPartition(t, &x); return true; }, task); @@ -381,7 +381,7 @@ struct AnalysisDataProcessorBuilder { // GroupedCombinations bound separately, as they should be set once for all associated tables homogeneous_apply_refs([&groupingTable, &associatedTables](auto& t) { - GroupedCombinationManager>::setGroupedCombination(t, groupingTable, associatedTables); + analysis_task_parsers::setGroupedCombination(t, groupingTable, associatedTables); return true; }, task); @@ -399,7 +399,7 @@ struct AnalysisDataProcessorBuilder { // bind partitions and grouping table homogeneous_apply_refs([&groupingTable](auto& x) { - PartitionManager>::bindExternalIndices(x, &groupingTable); + analysis_task_parsers::bindExternalIndicesPartition(x, &groupingTable); return true; }, task); @@ -409,7 +409,7 @@ struct AnalysisDataProcessorBuilder { } else { // bind partitions and grouping table homogeneous_apply_refs([&groupingTable](auto& x) { - PartitionManager>::bindExternalIndices(x, &groupingTable); + analysis_task_parsers::bindExternalIndicesPartition(x, &groupingTable); return true; }, task); @@ -444,8 +444,8 @@ auto getTaskNameSetProcesses(std::string& outputName, TaskName first, SetDefault auto task = std::make_shared(std::forward(args)...); for (auto& setting : second.map) { homogeneous_apply_refs( - [&](auto& x) { - return UpdateProcessSwitches>::set(setting, x); + [&](auto& element) { + return analysis_task_parsers::setProcessSwitch(setting, element); }, *task.get()); } @@ -459,8 +459,8 @@ auto getTaskNameSetProcesses(std::string& outputName, SetDefaultProcesses first, auto task = std::make_shared(std::forward(args)...); for (auto& setting : first.map) { homogeneous_apply_refs( - [&](auto& x) { - return UpdateProcessSwitches>::set(setting, x); + [&](auto& element) { + return analysis_task_parsers::setProcessSwitch(setting, element); }, *task.get()); } @@ -474,8 +474,8 @@ auto getTaskNameSetProcesses(std::string& outputName, SetDefaultProcesses first, auto task = std::make_shared(std::forward(args)...); for (auto& setting : first.map) { homogeneous_apply_refs( - [&](auto& x) { - return UpdateProcessSwitches>::set(setting, x); + [&](auto& element) { + return analysis_task_parsers::setProcessSwitch(setting, element); }, *task.get()); } @@ -529,9 +529,9 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created - homogeneous_apply_refs([&options, &hash](auto& x) { return OptionManager>::appendOption(options, x); }, *task.get()); + homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); /// extract conditions and append them as inputs - homogeneous_apply_refs([&inputs](auto& x) { return ConditionManager>::appendCondition(inputs, x); }, *task.get()); + homogeneous_apply_refs([&inputs](auto& element) { return analysis_task_parsers::appendCondition(inputs, element); }, *task.get()); /// parse process functions defined by corresponding configurables if constexpr (requires { &T::process; }) { @@ -550,18 +550,12 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) *task.get()); // add preslice declarations to slicing cache definition - homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& x) { return PresliceManager>::registerCache(x, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); + homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); - // request base tables for spawnable extended tables + // request base tables for spawnable extended tables and indices to be built // this checks for duplications - homogeneous_apply_refs([&inputs](auto& x) { - return SpawnManager>::requestInputs(inputs, x); - }, - *task.get()); - - // request base tables for indices to be built - homogeneous_apply_refs([&inputs](auto& x) { - return IndexManager>::requestInputs(inputs, x); + homogeneous_apply_refs([&inputs](auto& element) { + return analysis_task_parsers::requestInputs(inputs, element); }, *task.get()); @@ -570,40 +564,36 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) LOG(warn) << "Task " << name_str << " has no inputs"; } - homogeneous_apply_refs([&outputs, &hash](auto& x) { return OutputManager>::appendOutput(outputs, x, hash); }, *task.get()); + homogeneous_apply_refs([&outputs, &hash](auto& element) { return analysis_task_parsers::appendOutput(outputs, element, hash); }, *task.get()); auto requiredServices = CommonServices::defaultServices(); auto arrowServices = CommonServices::arrowServices(); requiredServices.insert(requiredServices.end(), arrowServices.begin(), arrowServices.end()); - homogeneous_apply_refs([&requiredServices](auto& x) { return ServiceManager>::add(requiredServices, x); }, *task.get()); + homogeneous_apply_refs([&requiredServices](auto& element) { return analysis_task_parsers::addService(requiredServices, element); }, *task.get()); auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos, bindingsKeys, bindingsKeysUnsorted](InitContext& ic) mutable { - homogeneous_apply_refs([&ic](auto&& x) { return OptionManager>::prepare(ic, x); }, *task.get()); - homogeneous_apply_refs([&ic](auto&& x) { return ServiceManager>::prepare(ic, x); }, *task.get()); + homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareOption(ic, element); }, *task.get()); + homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareService(ic, element); }, *task.get()); auto& callbacks = ic.services().get(); auto endofdatacb = [task](EndOfStreamContext& eosContext) { - homogeneous_apply_refs([&eosContext](auto&& x) { - using X = std::decay_t; - ServiceManager::postRun(eosContext, x); - return OutputManager::postRun(eosContext, x); }, + homogeneous_apply_refs([&eosContext](auto& element) { + analysis_task_parsers::postRunService(eosContext, element); + analysis_task_parsers::postRunOutput(eosContext, element); + return true; }, *task.get()); eosContext.services().get().readyToQuit(QuitRequest::Me); }; callbacks.set(endofdatacb); - /// update configurables in filters + /// update configurables in filters and partitions homogeneous_apply_refs( - [&ic](auto& x) -> bool { return FilterManager>::updatePlaceholders(x, ic); }, - *task.get()); - /// update configurables in partitions - homogeneous_apply_refs( - [&ic](auto& x) -> bool { PartitionManager>::updatePlaceholders(x, ic); return true; }, + [&ic](auto& element) -> bool { return analysis_task_parsers::updatePlaceholders(ic, element); }, *task.get()); /// create for filters gandiva trees matched to schemas and store the pointers into expressionInfos - homogeneous_apply_refs([&expressionInfos](auto& x) { - return FilterManager>::createExpressionTrees(x, expressionInfos); + homogeneous_apply_refs([&expressionInfos](auto& element) { + return analysis_task_parsers::createExpressionTrees(expressionInfos, element); }, *task.get()); @@ -614,33 +604,30 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) ic.services().get().setCaches(std::move(bindingsKeys)); ic.services().get().setCachesUnsorted(std::move(bindingsKeysUnsorted)); // initialize global caches - homogeneous_apply_refs([&ic](auto& x) { - return CacheManager>::initialize(ic, x); + homogeneous_apply_refs([&ic](auto& element) { + return analysis_task_parsers::preInitializeCache(ic, element); }, *(task.get())); return [task, expressionInfos](ProcessingContext& pc) mutable { // load the ccdb object from their cache - homogeneous_apply_refs([&pc](auto&& x) { return ConditionManager>::newDataframe(pc.inputs(), x); }, *task.get()); + homogeneous_apply_refs([&pc](auto& element) { return analysis_task_parsers::newDataframeCondition(pc.inputs(), element); }, *task.get()); // reset partitions once per dataframe - homogeneous_apply_refs([](auto&& x) { return PartitionManager>::newDataframe(x); }, *task.get()); + homogeneous_apply_refs([](auto& element) { return analysis_task_parsers::newDataframePartition(element); }, *task.get()); // reset selections for the next dataframe for (auto& info : expressionInfos) { info.resetSelection = true; } // reset pre-slice for the next dataframe auto slices = pc.services().get(); - homogeneous_apply_refs([&pc, &slices](auto& x) { - return PresliceManager>::updateSliceInfo(x, slices); + homogeneous_apply_refs([&pc, &slices](auto& element) { + return analysis_task_parsers::updateSliceInfo(element, slices); }, *(task.get())); // initialize local caches - homogeneous_apply_refs([&pc](auto& x) { - return CacheManager>::initialize(pc, x); - }, - *(task.get())); + homogeneous_apply_refs([&pc](auto& element) { return analysis_task_parsers::initializeCache(pc, element); }, *(task.get())); // prepare outputs - homogeneous_apply_refs([&pc](auto&& x) { return OutputManager>::prepare(pc, x); }, *task.get()); + homogeneous_apply_refs([&pc](auto& element) { return analysis_task_parsers::prepareOutput(pc, element); }, *task.get()); // execute run() if constexpr (requires { task->run(pc); }) { task->run(pc); @@ -662,7 +649,7 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) }, *task.get()); // finalize outputs - homogeneous_apply_refs([&pc](auto&& x) { return OutputManager>::finalize(pc, x); }, *task.get()); + homogeneous_apply_refs([&pc](auto& element) { return analysis_task_parsers::finalizeOutput(pc, element); }, *task.get()); }; }}; diff --git a/Framework/Core/include/Framework/Condition.h b/Framework/Core/include/Framework/Condition.h index 92b2f9e03a23b..3f2edaf286626 100644 --- a/Framework/Core/include/Framework/Condition.h +++ b/Framework/Core/include/Framework/Condition.h @@ -42,6 +42,13 @@ struct Condition { } }; +template +concept is_condition = requires(T t) { + typename T::type; + requires std::same_as; + requires std::same_as; +}; + /// Can be used to group together a number of Configurables /// to overcome the limit of 100 Configurables per task. /// In order to do so you can do: @@ -58,5 +65,8 @@ struct Condition { struct ConditionGroup { }; +template +concept is_condition_group = std::derived_from; + } // namespace o2::framework #endif // O2_FRAMEWORK_CONDITION_H_ diff --git a/Framework/Core/include/Framework/Configurable.h b/Framework/Core/include/Framework/Configurable.h index 88e50cf3c7c26..f72d2f3a2a7d6 100644 --- a/Framework/Core/include/Framework/Configurable.h +++ b/Framework/Core/include/Framework/Configurable.h @@ -83,8 +83,22 @@ struct Configurable : IP { template using MutableConfigurable = Configurable>; +template +concept is_configurable = requires(T& t) { + typename T::type; + requires std::same_as; + &T::operator typename T::type; +}; + using ConfigurableAxis = Configurable, ConfigParamKind::kAxisSpec, ConfigurablePolicyConst, ConfigParamKind::kAxisSpec>>; +template +concept is_configurable_axis = is_configurable&& + requires() +{ + T::kind == ConfigParamKind::kAxisSpec; +}; + template struct ProcessConfigurable : Configurable { ProcessConfigurable(R (T::*process_)(As...), std::string const& name_, bool&& value_, std::string const& help_) @@ -97,7 +111,7 @@ struct ProcessConfigurable : Configurable { }; template -concept is_process_configurable = base_of_template; +concept is_process_configurable = is_configurable && requires(T& t) { t.process; }; #define PROCESS_SWITCH(_Class_, _Name_, _Help_, _Default_) \ decltype(ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}) do##_Name_ = ProcessConfigurable{&_Class_ ::_Name_, #_Name_, _Default_, _Help_}; @@ -128,5 +142,8 @@ std::ostream& operator<<(std::ostream& os, Configurable const& c) struct ConfigurableGroup { }; +template +concept is_configurable_group = std::derived_from; + } // namespace o2::framework #endif // O2_FRAMEWORK_CONFIGURABLE_H_ diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index b9775f031c65c..ff22a35a00a23 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -418,6 +418,9 @@ struct Filter { size_t designateSubtrees(Node* node, size_t index = 0); }; +template +concept is_filter = std::same_as; + using Projector = Filter; /// Function for creating gandiva selection from our internal filter tree diff --git a/Framework/Core/include/Framework/GroupedCombinations.h b/Framework/Core/include/Framework/GroupedCombinations.h index 21d8384e3aa6e..bdbddee871baa 100644 --- a/Framework/Core/include/Framework/GroupedCombinations.h +++ b/Framework/Core/include/Framework/GroupedCombinations.h @@ -49,6 +49,16 @@ expressions::BindingNode getMatchingIndexNode() template struct GroupedCombinationsGenerator { + using grouping_policy_t = GroupingPolicy; + using g_t = G; + using associated_pack_t = framework::pack; + + template + static consteval bool compatible(framework::pack p) + { + return (framework::has_type(p) && ...); + } + using GroupedIteratorType = pack_to_tuple_t, pack>>; struct GroupedIterator : public GroupingPolicy { @@ -230,6 +240,13 @@ struct GroupedCombinationsGenerator { iterator mEnd; }; +template +concept is_combinations_generator = requires(T t) { + typename T::GroupedIterator; + &T::begin; + &T::end; +}; + // Aliases for 2-particle correlations // 'Pair' and 'Triple' can be used for same kind pair/triple, too, just specify the same type twice template > diff --git a/Framework/Core/include/Framework/HistogramRegistry.h b/Framework/Core/include/Framework/HistogramRegistry.h index 9f272be38da0c..6db4bd0a2d0e2 100644 --- a/Framework/Core/include/Framework/HistogramRegistry.h +++ b/Framework/Core/include/Framework/HistogramRegistry.h @@ -252,6 +252,9 @@ class HistogramRegistry std::array mRegistryValue{}; }; +template +concept is_histogram_registry = std::same_as; + //-------------------------------------------------------------------------------------------------- //-------------------------------------------------------------------------------------------------- // Implementation of HistFiller template functions. diff --git a/Framework/Core/include/Framework/SliceCache.h b/Framework/Core/include/Framework/SliceCache.h index f7312b364b630..db5af94cd5967 100644 --- a/Framework/Core/include/Framework/SliceCache.h +++ b/Framework/Core/include/Framework/SliceCache.h @@ -12,10 +12,8 @@ #ifndef SLICECACHE_H #define SLICECACHE_H -#include "Framework/ServiceHandle.h" #include "Framework/ArrowTableSlicingCache.h" #include -#include #include namespace o2::framework @@ -23,6 +21,9 @@ namespace o2::framework struct SliceCache { ArrowTableSlicingCache* ptr = nullptr; }; + +template +concept is_slice_cache = std::same_as; } // namespace o2::framework #endif // SLICECACHE_H From a88d10cb8517efd570db61bb0d948bc1dc9bd1a6 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 18 Feb 2025 11:20:29 +0100 Subject: [PATCH 0096/1914] [MUON] added option for storing nCandidates in the MFT-MCH matching --- prodtests/full-system-test/dpl-workflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 77f16c2ea51fd..ef09bb76c9658 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -332,6 +332,7 @@ fi ( workflow_has_parameter AOD || [[ -z "$DISABLE_ROOT_OUTPUT" ]] || needs_root_output o2-emcal-cell-writer-workflow ) && has_detector EMC && RAW_EMC_SUBSPEC=" --subspecification 1 " has_detector_reco MID && has_detector_matching MCHMID && MFTMCHConf="FwdMatching.useMIDMatch=true;" || MFTMCHConf="FwdMatching.useMIDMatch=false;" +[[ ! -z ${MFTMCH_NCANDIDATES_OPT:-} ]] && MFTMCHConf+="${MFTMCH_NCANDIDATES_OPT}" [[ $IS_SIMULATED_DATA == "1" ]] && EMCRAW2C_CONFIG+=" --no-checkactivelinks" From 769e674d76262ad59e96f7c504be01e9952116d9 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 21 Feb 2025 10:14:14 +0100 Subject: [PATCH 0097/1914] DPL Analysis: fix `is_preslice` concept (#13990) --- Framework/Core/include/Framework/ASoA.h | 8 +------- Framework/Core/include/Framework/AnalysisManagers.h | 2 ++ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index ca9c49e8bc6c8..cb2ff11a8e901 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1455,13 +1455,7 @@ template using PresliceOptional = PresliceBase; template -concept is_preslice = requires(T t) { - requires std::same_as; - requires std::same_as; - &T::isMising; - &T::updateSliceInfo; - &T::getSliceFor; -}; +concept is_preslice = std::derived_from; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 1d326e338099f..ccabbd7a0a197 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -511,6 +511,7 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template + requires(!is_preslice) bool registerCache(T&, std::vector&, std::vector&) { return false; @@ -549,6 +550,7 @@ bool registerCache(T& preslice, std::vector&, std::vector + requires(!is_preslice) bool updateSliceInfo(T&, ArrowTableSlicingCache&) { return false; From ad63c3218542ffccea487a81d93197e0eacd09bf Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Fri, 21 Feb 2025 11:46:15 +0100 Subject: [PATCH 0098/1914] Ctpdev: readout consistency (#13989) * fix: adding return in orbit reset code * dev: adding consistency check classes vs inputs to readout * clang --- .../include/DataFormatsCTP/Configuration.h | 1 + .../Detectors/CTP/src/Configuration.cxx | 11 ++++ .../CTPReconstruction/RawDataDecoder.h | 3 + .../CTP/reconstruction/src/RawDataDecoder.cxx | 60 +++++++++++++++++-- .../workflowScalers/src/ctp-ccdb-orbit.cxx | 9 +-- 5 files changed, 74 insertions(+), 10 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index e06c77ad3dfc2..4ff0256f33827 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -162,6 +162,7 @@ class CTPConfiguration int getInputIndex(const std::string& name) const; std::string getClassNameFromIndex(int index) { return mCTPClasses[index].name; }; std::string getClassNameFromHWIndex(int index); + const CTPClass* getCTPClassFromHWIndex(const int index) const; bool isMaskInInputs(const uint64_t& mask) const; bool isBCMaskInConfig(const std::string maskname) const; const BCMask* isBCMaskInConfigP(const std::string bcmask) const; diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 2ae8fc1b03ffc..03f9b38db0e9f 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -790,6 +790,17 @@ std::string CTPConfiguration::getClassNameFromHWIndex(int index) std::string ret = "not found"; return ret; } +const CTPClass* CTPConfiguration::getCTPClassFromHWIndex(int index) const +{ + const CTPClass* clsfound = nullptr; + for (auto const& cls : mCTPClasses) { + if (index == cls.getIndex()) { + clsfound = &cls; + break; + } + } + return clsfound; +} bool CTPConfiguration::isMaskInInputs(const uint64_t& mask) const { for (auto const& inp : mInputs) { diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 16a8ec6a6bef1..7579e9dc1d6f5 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -45,6 +45,7 @@ class RawDataDecoder void setMAXErrors(int m) { mErrorMax = m; } int setLumiInp(int lumiinp, std::string inp); void setCTPConfig(CTPConfiguration cfg) { mCTPConfig = std::move(cfg); }; + void setCheckConsistency(bool check) { mCheckConsistency = check; } uint32_t getIRRejected() const { return mIRRejected; } uint32_t getTCRRejected() const { return mTCRRejected; } std::vector& getTFOrbits() { return mTFOrbits; } @@ -54,12 +55,14 @@ class RawDataDecoder int init(); static int shiftNew(const o2::InteractionRecord& irin, uint32_t TFOrbit, std::bitset<48>& inpmask, int64_t shift, int level, std::map& digmap); static int shiftInputs(std::map& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask = 0xffffffffffffffff); + int checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask = 0xffffffffffffffff); private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; static constexpr uint32_t HB_TRIGGERTYPE_MASK = 0x2; // true: full inps decoding includine latency shifts here; false: latency shifts in CTF decoder bool mDecodeInps = false; + bool mCheckConsistency = false; // for digits bool mDoDigits = true; std::vector mOutputDigits; diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 74e5b7481163d..faa3dbac3e934 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -79,13 +79,13 @@ int RawDataDecoder::addCTPDigit(uint32_t linkCRU, uint32_t orbit, gbtword80_t& d if (mErrorIR < mErrorMax) { LOG(error) << "Two CTP IRs with the same timestamp:" << ir.bc << " " << ir.orbit << " pld:" << pld << " dig:" << digits[ir]; } - ret = 2; + ret = 4; mErrorIR++; mStickyError = true; } } else { LOG(error) << "Two digits with the same timestamp:" << ir.bc << " " << ir.orbit; - ret = 2; + ret = 8; } } else if (linkCRU == o2::ctp::GBTLinkIDClassRec) { int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; @@ -113,11 +113,11 @@ int RawDataDecoder::addCTPDigit(uint32_t linkCRU, uint32_t orbit, gbtword80_t& d mStickyError = true; } mErrorTCR++; - ret = 3; + ret = 16; } } else { LOG(error) << "Two digits with the same timestamp:" << ir.bc << " " << ir.orbit; - ret = 3; + ret = 32; } } else { LOG(error) << "Unxpected CTP CRU link:" << linkCRU; @@ -298,7 +298,10 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digit } digits.push_back(dig.second); } + int ret = 0; if (nTwoI) { // Trigger class wo Input LOG(error) << "LM:" << nLM << " L0:" << nL0 << " L1:" << nL1 << " TwI:" << nTwI << " Trigger classes wo input:" << nTwoI; + ret = 64; } if (nTwoIlost) { LOG(warn) << " Trigger classes wo input from diff latency 1:" << nTwoIlost; } - return 0; + return ret; +} +// +int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask) +{ + int ret = 0; + int lost = 0; + for (auto const& digit : digits) { + // if class mask => inps + for (int i = 0; i < digit.CTPClassMask.size(); i++) { + if (digit.CTPClassMask[i]) { + const CTPClass* cls = mCTPConfig.getCTPClassFromHWIndex(i); + uint64_t clsinpmask = cls->descriptor->getInputsMask(); + uint64_t diginpmask = digit.CTPInputMask.to_ullong(); + if (!((clsinpmask & diginpmask) == clsinpmask)) { + LOG(error) << "CTP class:" << cls->name << " inpmask:" << clsinpmask << " not compatible with inputs mask:" << diginpmask; + ret = 128; + } + } + } + // if inps => class mask + for (auto const& cls : mCTPConfig.getCTPClasses()) { + uint64_t clsinpmask = cls.descriptor->getInputsMask(); + uint64_t diginpmask = digit.CTPInputMask.to_ullong(); + uint64_t digclsmask = digit.CTPClassMask.to_ullong(); + if ((clsinpmask & diginpmask) == clsinpmask) { + if ((cls.classMask & digclsmask) == 0) { + int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; + int32_t offset = BCShiftCorrection + o2::ctp::TriggerOffsetsParam::Instance().LM_L0 + o2::ctp::TriggerOffsetsParam::Instance().L0_L1_classes - 1; + offset = o2::constants::lhc::LHCMaxBunches - offset; + if (digit.intRecord.bc < offset) { + LOG(error) << "CTP class:" << cls.name << " inpmask:" << clsinpmask << " cls mask:" << cls.classMask << " not found in digit:" << digit; + ret = 256; + } else { + lost++; + } + } + } + } + } + if (lost) { + LOG(info) << "LOST classes because of shift:" << lost; + } + return ret; } // int RawDataDecoder::setLumiInp(int lumiinp, std::string inp) diff --git a/Detectors/CTP/workflowScalers/src/ctp-ccdb-orbit.cxx b/Detectors/CTP/workflowScalers/src/ctp-ccdb-orbit.cxx index 0f4203d994402..7dedcacbf6047 100644 --- a/Detectors/CTP/workflowScalers/src/ctp-ccdb-orbit.cxx +++ b/Detectors/CTP/workflowScalers/src/ctp-ccdb-orbit.cxx @@ -76,6 +76,7 @@ int main(int argc, char** argv) std::cerr << e.what() << ", application will now exit" << std::endl; exit(2); } + int ret = 0; std::string action = vm["action"].as(); std::vector vect; std::string ccdbPath; @@ -120,10 +121,10 @@ int main(int argc, char** argv) int64_t runnum = vm["run-number"].as(); metadata["runNumber"] = std::to_string(runnum); std::cout << "Storing:" << ccdbPath << " " << metadata["runNumber"] << " tmin:" << tmin << " tmax:" << tmax << " ts:" << tt << std::endl; - api.storeAsTFileAny(&(vect), ccdbPath, metadata, tmin, tmax); + ret = api.storeAsTFileAny(&(vect), ccdbPath, metadata, tmin, tmax); } else { std::cout << "Storing:" << ccdbPath << " tmin:" << tmin << " tmax:" << tmax << " ts:" << tt << std::endl; - api.storeAsTFileAny(&(vect), ccdbPath, metadata, tmin, tmax); + ret = api.storeAsTFileAny(&(vect), ccdbPath, metadata, tmin, tmax); } } // @@ -132,7 +133,7 @@ int main(int argc, char** argv) TFile* f = TFile::Open(file.c_str(), "RECREATE"); if (f == nullptr) { std::cout << "Error: File" << file << " could not be open for writing !!!" << std::endl; - return 1; + ret++; } else { std::cout << "File" << file << " being writen." << std::endl; f->WriteObject(&vect, "ccdb_object"); @@ -141,5 +142,5 @@ int main(int argc, char** argv) } else { std::cout << "No file created" << std::endl; } - return 0; + return ret; } From ff802a40280896c1fb20ba2f2efa5794ab7f2ed4 Mon Sep 17 00:00:00 2001 From: Francesco Noferini Date: Fri, 21 Feb 2025 14:49:22 +0100 Subject: [PATCH 0099/1914] adding HV maps to TOF FEELIGHT (#13973) --- .../TOFCalibration/TOFFEElightConfig.h | 16 ++++++++++++-- .../calibration/src/TOFCalibrationLinkDef.h | 1 + .../TOF/calibration/src/TOFFEElightReader.cxx | 22 +++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/Detectors/TOF/calibration/include/TOFCalibration/TOFFEElightConfig.h b/Detectors/TOF/calibration/include/TOFCalibration/TOFFEElightConfig.h index 4706c29570288..49ed9e456f3e6 100644 --- a/Detectors/TOF/calibration/include/TOFCalibration/TOFFEElightConfig.h +++ b/Detectors/TOF/calibration/include/TOFCalibration/TOFFEElightConfig.h @@ -49,6 +49,16 @@ struct TOFFEEtriggerConfig { //_____________________________________________________________________________ +struct TOFFEEmapHVConfig { + + unsigned int mHVstat[Geo::NPLATES]; // 1 bit per strip status inside 5 modules + TOFFEEmapHVConfig() = default; + + ClassDefNV(TOFFEEmapHVConfig, 1); +}; + +//_____________________________________________________________________________ + struct TOFFEElightConfig { static constexpr int NCHANNELS = 172800; @@ -61,11 +71,13 @@ struct TOFFEElightConfig { // std::array mChannelConfig; TOFFEEchannelConfig mChannelConfig[Geo::kNCrate][Geo::kNTRM - 2][Geo::kNChain][Geo::kNTdc][Geo::kNCh]; // in O2, the number of TRMs is 12, but in the FEE world it is 10 TOFFEEtriggerConfig mTriggerConfig[NTRIGGERMAPS]; + TOFFEEmapHVConfig mHVConfig[Geo::NSECTORS]; TOFFEElightConfig() = default; const TOFFEEchannelConfig* getChannelConfig(int icrate, int itrm, int ichain, int itdc, int ich) const; const TOFFEEtriggerConfig* getTriggerConfig(int idx) const { return idx < NTRIGGERMAPS ? &mTriggerConfig[idx] : nullptr; } - - ClassDefNV(TOFFEElightConfig, 1); + const TOFFEEmapHVConfig* getHVConfig(int isector) const { return (isector < Geo::NSECTORS) ? &mHVConfig[isector] : nullptr; } + unsigned int getHVConfig(int isector, int iplate) const { return (isector < Geo::NSECTORS && iplate < Geo::NPLATES) ? mHVConfig[isector].mHVstat[iplate] : 0; } + ClassDefNV(TOFFEElightConfig, 2); }; } // namespace tof diff --git a/Detectors/TOF/calibration/src/TOFCalibrationLinkDef.h b/Detectors/TOF/calibration/src/TOFCalibrationLinkDef.h index 68380fead30fc..27d24fd187ad2 100644 --- a/Detectors/TOF/calibration/src/TOFCalibrationLinkDef.h +++ b/Detectors/TOF/calibration/src/TOFCalibrationLinkDef.h @@ -44,6 +44,7 @@ #pragma link C++ struct TOFFEEchannelConfig + ; #pragma link C++ struct TOFFEEtriggerConfig + ; +#pragma link C++ struct TOFFEEmapHVConfig + ; #pragma link C++ struct TOFFEElightConfig + ; #pragma link C++ struct TOFFEElightReader + ; diff --git a/Detectors/TOF/calibration/src/TOFFEElightReader.cxx b/Detectors/TOF/calibration/src/TOFFEElightReader.cxx index 4d7fa786e6e25..9f82d787a78f0 100644 --- a/Detectors/TOF/calibration/src/TOFFEElightReader.cxx +++ b/Detectors/TOF/calibration/src/TOFFEElightReader.cxx @@ -93,6 +93,28 @@ int TOFFEElightReader::parseFEElightConfig(bool verbose) } } + const int istripInPlate[Geo::NSECTORS] = {Geo::NSTRIPC, Geo::NSTRIPB, Geo::NSTRIPA, Geo::NSTRIPB, Geo::NSTRIPC}; + const int channelInSector = Geo::NPADS * Geo::NSTRIPXSECTOR; + for (int isector = 0; isector < Geo::NSECTORS; isector++) { + int nstripInPrevPlates = 0; + for (int iplate = 0; iplate < Geo::NPLATES; iplate++) { + unsigned int mask = mFEElightConfig->getHVConfig(isector, iplate); + for (int istrip = 0; istrip < istripInPlate[iplate]; istrip++) { + bool isActive = mask & 1; // check first bit/current_strip + mask /= 2; // move to the next bit/strip + + if (!isActive) { // switch off all channels in this strip + int index0 = isector * channelInSector + (nstripInPrevPlates + istrip) * Geo::NPADS; + int indexF = index0 + Geo::NPADS; + for (int index = index0; index < indexF; index++) { + mFEElightInfo.mChannelEnabled[index] = 0; + } + } + } + nstripInPrevPlates += istripInPlate[iplate]; + } + } + const TOFFEEtriggerConfig* triggerConfig = nullptr; for (Int_t iddl = 0; iddl < TOFFEElightConfig::NTRIGGERMAPS; iddl++) { triggerConfig = mFEElightConfig->getTriggerConfig(iddl); From 96d683bb132451d77366998d06591efd17a431f2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 21 Feb 2025 15:05:45 +0100 Subject: [PATCH 0100/1914] Framework: adapt deprecated boost function to new method --- Framework/DataInspector/src/DISocket.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/DataInspector/src/DISocket.cxx b/Framework/DataInspector/src/DISocket.cxx index 913e42aad8d9b..d257ea1cce956 100644 --- a/Framework/DataInspector/src/DISocket.cxx +++ b/Framework/DataInspector/src/DISocket.cxx @@ -73,7 +73,7 @@ DIMessage::~DIMessage() DISocket::DISocket(const std::string& address, int port) : ioContext(), socket(ioContext) { try { - auto ip_address = boost::asio::ip::address::from_string(address); + auto ip_address = boost::asio::ip::make_address(address); socket.connect(boost::asio::ip::tcp::endpoint(ip_address, port)); } ASIO_CATCH("DISocket::DISocket") From fb5baaeca9c4315682521e50e9527b3283cf67c6 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 22 Feb 2025 16:28:04 +0100 Subject: [PATCH 0101/1914] DPL: add ability to create arrow::RecordBatches directly in shared memory without allocations (#13993) --- Framework/Core/CMakeLists.txt | 1 + .../Core/include/Framework/EmptyFragment.h | 116 ++++++++++++++ Framework/Core/src/EmptyFragment.cxx | 151 ++++++++++++++++++ 3 files changed, 268 insertions(+) create mode 100644 Framework/Core/include/Framework/EmptyFragment.h create mode 100644 Framework/Core/src/EmptyFragment.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 103b559f642e2..c006a4135557b 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -48,6 +48,7 @@ o2_add_library(Framework src/DataProcessingStates.cxx src/DefaultsHelpers.cxx src/DomainInfoHeader.cxx + src/EmptyFragment.cxx src/ProcessingPoliciesHelpers.cxx src/ConfigParamDiscovery.cxx src/ConfigParamStore.cxx diff --git a/Framework/Core/include/Framework/EmptyFragment.h b/Framework/Core/include/Framework/EmptyFragment.h new file mode 100644 index 0000000000000..d0e86ab8e23c0 --- /dev/null +++ b/Framework/Core/include/Framework/EmptyFragment.h @@ -0,0 +1,116 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#ifndef O2_FRAMEWORK_DEFERREDFRAGMENT_H +#define O2_FRAMEWORK_DEFERREDFRAGMENT_H + +#include + +namespace o2::framework +{ + +// A Fragment which will create a preallocated batch in shared memory +// and fill it directly in place. +class EmptyFragment : public arrow::dataset::Fragment +{ + public: + // @a numRows is the number of rows in the final result. + // @a physical_schema the schema of the resulting batch + // @a fillers helper functions to fill the given buffer. + EmptyFragment(size_t rows, + arrow::compute::Expression partition_expression, + std::shared_ptr physical_schema) + : Fragment(std::move(partition_expression), physical_schema) + { + } + + // Scanner function which returns a batch where the space is not actually used. + arrow::Result ScanBatchesAsync( + const std::shared_ptr& options) override; + + private: + /// The pointer to each allocation is an incremental number, indexing a collection to track + /// the size of each allocation. + std::shared_ptr GetPlaceholderForOp(size_t size) + { + mSizes.push_back(size); + return std::make_shared((uint8_t*)(mSizes.size() - 1), size); + } + std::vector mSizes; + size_t mRows; +}; + +/// An OutputStream which does the reading of the input buffers directly +/// on writing, if needed. Each deferred operation is encoded in the source +/// buffer by an incremental number which can be used to lookup in the @a ops +/// vector the operation to perform. +class PreallocatedOutputStream : public arrow::io::OutputStream +{ + public: + explicit PreallocatedOutputStream(std::vector& sizes, + const std::shared_ptr& buffer); + + /// \brief Create in-memory output stream with indicated capacity using a + /// memory pool + /// \param[in] initial_capacity the initial allocated internal capacity of + /// the OutputStream + /// \param[in,out] pool a MemoryPool to use for allocations + /// \return the created stream + static arrow::Result> Create( + std::vector& sizes, + int64_t initial_capacity = 4096, + arrow::MemoryPool* pool = arrow::default_memory_pool()); + + // By the time we call the destructor, the contents + // of the buffer are already moved to fairmq + // for being sent. + ~PreallocatedOutputStream() override = default; + + // Implement the OutputStream interface + + /// Close the stream, preserving the buffer (retrieve it with Finish()). + arrow::Status Close() override; + [[nodiscard]] bool closed() const override; + [[nodiscard]] arrow::Result Tell() const override; + arrow::Status Write(const void* data, int64_t nbytes) override; + + /// \cond FALSE + using OutputStream::Write; + /// \endcond + + /// Close the stream and return the buffer + arrow::Result> Finish(); + + /// \brief Initialize state of OutputStream with newly allocated memory and + /// set position to 0 + /// \param[in] initial_capacity the starting allocated capacity + /// \param[in,out] pool the memory pool to use for allocations + /// \return Status + arrow::Status Reset(std::vector sizes, + int64_t initial_capacity, arrow::MemoryPool* pool); + + [[nodiscard]] int64_t capacity() const { return capacity_; } + + private: + std::vector sizes_; + PreallocatedOutputStream(); + + // Ensures there is sufficient space available to write nbytes + arrow::Status Reserve(int64_t nbytes); + + std::shared_ptr buffer_; + bool is_open_; + int64_t capacity_; + int64_t position_; + uint8_t* mutable_data_; +}; +} // namespace o2::framework + +#endif diff --git a/Framework/Core/src/EmptyFragment.cxx b/Framework/Core/src/EmptyFragment.cxx new file mode 100644 index 0000000000000..588f605fb429e --- /dev/null +++ b/Framework/Core/src/EmptyFragment.cxx @@ -0,0 +1,151 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/EmptyFragment.h" +#include +#include +#include +#include + +static constexpr int64_t kBufferMinimumSize = 256; + +namespace o2::framework +{ + +// Scanner function which returns a batch where the space is not actually used. +arrow::Result EmptyFragment::ScanBatchesAsync( + const std::shared_ptr& options) +{ + auto generator = [this]() -> arrow::Future> { + std::vector> columns; + columns.reserve(this->physical_schema_->fields().size()); + + for (auto& field : this->physical_schema_->fields()) { + if (auto listType = std::dynamic_pointer_cast(field->type())) { + size_t size = mRows * listType->list_size(); + if (field->type()->field(0)->type()->byte_width() == 0) { + size /= 8; + } else { + size *= field->type()->field(0)->type()->byte_width(); + } + auto varray = std::make_shared(field->type()->field(0)->type(), mRows * listType->list_size(), GetPlaceholderForOp(size)); + columns.push_back(std::make_shared(field->type(), (int32_t)mRows, varray)); + } else { + size_t size = mRows; + if (field->type()->byte_width() == 0) { + size /= 8; + } else { + size *= field->type()->byte_width(); + } + columns.push_back(std::make_shared(field->type(), mRows, GetPlaceholderForOp(size))); + } + } + return arrow::RecordBatch::Make(physical_schema_, mRows, columns); + }; + return generator; +} + +PreallocatedOutputStream::PreallocatedOutputStream() + : is_open_(false), capacity_(0), position_(0), mutable_data_(nullptr) {} + +PreallocatedOutputStream::PreallocatedOutputStream(std::vector& sizes, + const std::shared_ptr& buffer) + : sizes_(sizes), + buffer_(buffer), + is_open_(true), + capacity_(buffer->size()), + position_(0), + mutable_data_(buffer->mutable_data()) {} + +arrow::Result> PreallocatedOutputStream::Create( + std::vector& ops, + int64_t initial_capacity, arrow::MemoryPool* pool) +{ + // ctor is private, so cannot use make_shared + auto ptr = std::shared_ptr(new PreallocatedOutputStream); + RETURN_NOT_OK(ptr->Reset(ops, initial_capacity, pool)); + return ptr; +} + +arrow::Status PreallocatedOutputStream::Reset(std::vector sizes, + int64_t initial_capacity, arrow::MemoryPool* pool) +{ + ARROW_ASSIGN_OR_RAISE(buffer_, AllocateResizableBuffer(initial_capacity, pool)); + sizes_ = sizes; + is_open_ = true; + capacity_ = initial_capacity; + position_ = 0; + mutable_data_ = buffer_->mutable_data(); + return arrow::Status::OK(); +} + +arrow::Status PreallocatedOutputStream::Close() +{ + if (is_open_) { + is_open_ = false; + if (position_ < capacity_) { + RETURN_NOT_OK(buffer_->Resize(position_, false)); + } + } + return arrow::Status::OK(); +} + +bool PreallocatedOutputStream::closed() const { return !is_open_; } + +arrow::Result> PreallocatedOutputStream::Finish() +{ + RETURN_NOT_OK(Close()); + buffer_->ZeroPadding(); + is_open_ = false; + return std::move(buffer_); +} + +arrow::Result PreallocatedOutputStream::Tell() const { return position_; } + +arrow::Status PreallocatedOutputStream::Write(const void* data, int64_t nbytes) +{ + if (ARROW_PREDICT_FALSE(!is_open_)) { + return arrow::Status::IOError("OutputStream is closed"); + } + if (ARROW_PREDICT_TRUE(nbytes == 0)) { + return arrow::Status::OK(); + } + if (ARROW_PREDICT_FALSE(position_ + nbytes >= capacity_)) { + RETURN_NOT_OK(Reserve(nbytes)); + } + // This is a real address which needs to be copied. Do it! + auto ref = (int64_t)data; + if (ref >= sizes_.size()) { + memcpy(mutable_data_ + position_, data, nbytes); + position_ += nbytes; + return arrow::Status::OK(); + } + + position_ += nbytes; + return arrow::Status::OK(); +} + +arrow::Status PreallocatedOutputStream::Reserve(int64_t nbytes) +{ + // Always overallocate by doubling. It seems that it is a better growth + // strategy, at least for memory_benchmark.cc. + // This may be because it helps match the allocator's allocation buckets + // more exactly. Or perhaps it hits a sweet spot in jemalloc. + int64_t new_capacity = std::max(kBufferMinimumSize, capacity_); + new_capacity = position_ + nbytes; + if (new_capacity > capacity_) { + RETURN_NOT_OK(buffer_->Resize(new_capacity)); + capacity_ = new_capacity; + mutable_data_ = buffer_->mutable_data(); + } + return arrow::Status::OK(); +} + +} // namespace o2::framework From 709601af0ed2280a59d21949e793dc3f901c8c21 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 21 Feb 2025 15:06:02 +0100 Subject: [PATCH 0102/1914] GPU: Replace OpenMP parallization with TBB --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 79 ++-- GPU/GPUTracking/Base/GPUReconstruction.h | 19 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 93 ++--- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 18 +- .../Base/GPUReconstructionConvert.cxx | 122 +++--- .../Base/GPUReconstructionLibrary.cxx | 4 - .../Base/GPUReconstructionThreading.h | 60 +++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 7 +- .../Base/cuda/GPUReconstructionCUDA.cu | 4 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 13 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 6 +- .../Base/opencl/GPUReconstructionOCL.cxx | 4 +- GPU/GPUTracking/CMakeLists.txt | 8 +- .../GPUTPCClusterStatistics.cxx | 4 +- .../TPCClusterDecompressor.cxx | 34 +- ...andalone-cluster-dump-entropy-analysed.cxx | 4 +- GPU/GPUTracking/Definitions/GPUDefMacros.h | 6 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 7 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 12 +- .../Global/GPUChainTrackingClusterizer.cxx | 395 +++++++++--------- .../Global/GPUChainTrackingCompression.cxx | 2 +- .../Global/GPUChainTrackingSliceTracker.cxx | 194 ++++----- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 14 +- GPU/GPUTracking/SliceTracker/GPUTPCDef.h | 2 +- .../SliceTracker/GPUTPCSliceData.cxx | 4 +- .../SliceTracker/GPUTPCTracker.cxx | 2 +- .../Standalone/Benchmark/standalone.cxx | 8 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 17 +- GPU/GPUTracking/Standalone/cmake/config.cmake | 1 - GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 11 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 2 +- .../TRDTracking/GPUTRDTrackerKernels.cxx | 10 +- GPU/GPUTracking/display/CMakeLists.txt | 5 +- GPU/GPUTracking/display/GPUDisplay.cxx | 3 - GPU/GPUTracking/display/GPUDisplay.h | 2 +- .../display/helpers/GPUDisplayHelpers.cxx | 13 +- .../display/render/GPUDisplayDraw.cxx | 110 ++--- .../display/render/GPUDisplayImportEvent.cxx | 178 ++++---- .../display/shaders/GPUDisplayShaders.h | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 80 ++-- 40 files changed, 789 insertions(+), 770 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionThreading.h diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index e3522d2d7242d..481494f268494 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -23,12 +23,9 @@ #include #include -#ifdef WITH_OPENMP -#include -#endif - #include "GPUReconstruction.h" #include "GPUReconstructionIncludes.h" +#include "GPUReconstructionThreading.h" #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" @@ -121,17 +118,18 @@ void GPUReconstruction::GetITSTraits(std::unique_ptr* tr } } -int32_t GPUReconstruction::SetNOMPThreads(int32_t n) +void GPUReconstruction::SetNActiveThreads(int32_t n) { -#ifdef WITH_OPENMP - omp_set_num_threads(mProcessingSettings.ompThreads = std::max(1, n < 0 ? mMaxOMPThreads : std::min(n, mMaxOMPThreads))); + mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Set number of OpenMP threads to %d (%d requested)", mProcessingSettings.ompThreads, n); + GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); } - return n > mMaxOMPThreads; -#else - return 1; -#endif +} + +int32_t GPUReconstruction::getHostThreadIndex() +{ + return std::max(0, tbb::this_task_arena::current_thread_index()); } int32_t GPUReconstruction::Init() @@ -197,6 +195,24 @@ int32_t GPUReconstruction::Init() return 0; } +namespace o2::gpu::internal +{ +static uint32_t getDefaultNThreads() +{ + const char* tbbEnv = getenv("TBB_NUM_THREADS"); + uint32_t tbbNum = tbbEnv ? atoi(tbbEnv) : 0; + if (tbbNum) { + return tbbNum; + } + const char* ompEnv = getenv("OMP_NUM_THREADS"); + uint32_t ompNum = ompEnv ? atoi(ompEnv) : 0; + if (ompNum) { + return tbbNum; + } + return tbb::info::default_concurrency(); +} +} // namespace o2::gpu::internal + int32_t GPUReconstruction::InitPhaseBeforeDevice() { if (mProcessingSettings.printSettings) { @@ -299,32 +315,37 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mMemoryScalers->rescaleMaxMem(mProcessingSettings.forceMaxMemScalers); } -#ifdef WITH_OPENMP - if (mProcessingSettings.ompThreads <= 0) { - mProcessingSettings.ompThreads = omp_get_max_threads(); - } else { - mProcessingSettings.ompAutoNThreads = false; - omp_set_num_threads(mProcessingSettings.ompThreads); + if (mProcessingSettings.nHostThreads != -1 && mProcessingSettings.ompThreads != -1) { + GPUFatal("Must not use both nHostThreads and ompThreads at the same time!"); + } else if (mProcessingSettings.ompThreads != -1) { + mProcessingSettings.nHostThreads = mProcessingSettings.ompThreads; + GPUWarning("You are using the deprecated ompThreads option, please switch to nHostThreads!"); } - if (mProcessingSettings.ompKernels) { - if (omp_get_max_active_levels() < 2) { - omp_set_max_active_levels(2); - } + + if (mProcessingSettings.nHostThreads <= 0) { + mProcessingSettings.nHostThreads = internal::getDefaultNThreads(); + } else { + mProcessingSettings.autoAdjustHostThreads = false; + } + mMaxHostThreads = mActiveHostKernelThreads = mProcessingSettings.nHostThreads; + if (mMaster == nullptr) { + mThreading = std::make_shared(); + mThreading->control = std::make_unique(tbb::global_control::max_allowed_parallelism, mMaxHostThreads); + mThreading->allThreads = std::make_unique(mMaxHostThreads); + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); + } else { + mThreading = mMaster->mThreading; } -#else - mProcessingSettings.ompThreads = 1; -#endif - mMaxOMPThreads = mProcessingSettings.ompThreads; - mMaxThreads = std::max(mMaxThreads, mProcessingSettings.ompThreads); + mMaxBackendThreads = std::max(mMaxBackendThreads, mMaxHostThreads); if (IsGPU()) { mNStreams = std::max(mProcessingSettings.nStreams, 3); } if (mProcessingSettings.nTPCClustererLanes == -1) { - mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSLICES, mProcessingSettings.ompKernels ? (mProcessingSettings.ompThreads >= 4 ? std::min(mProcessingSettings.ompThreads / 2, mProcessingSettings.ompThreads >= 32 ? GPUCA_NSLICES : 4) : 1) : mProcessingSettings.ompThreads)); + mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSLICES, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSLICES : 4) : 1) : mMaxHostThreads)); } if (mProcessingSettings.overrideClusterizerFragmentLen == -1) { - mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mProcessingSettings.ompThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; + mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; } if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSLICES) { GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 6fd00e1fda207..1fdfabb11211a 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -51,6 +51,7 @@ namespace gpu class GPUChain; struct GPUMemorySizeScalers; struct GPUReconstructionPipelineContext; +struct GPUReconstructionThreading; class GPUROOTDumpCore; namespace gpu_reconstruction_kernels @@ -206,8 +207,8 @@ class GPUReconstruction void SetOutputControl(void* ptr, size_t size); void SetInputControl(void* ptr, size_t size); GPUOutputControl& OutputControl() { return mOutputControl; } - int32_t GetMaxThreads() const { return mMaxThreads; } - int32_t SetNOMPThreads(int32_t n); + int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; } + void SetNActiveThreads(int32_t n); int32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } @@ -234,6 +235,9 @@ class GPUReconstruction double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + std::shared_ptr mThreading; + static int32_t getHostThreadIndex(); + protected: void AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool); void FreeRegisteredMemory(GPUMemoryResource* res); @@ -343,11 +347,12 @@ class GPUReconstruction std::shared_ptr mROOTDump; std::vector>* mOutputErrorCodes = nullptr; - int32_t mMaxThreads = 0; // Maximum number of threads that may be running, on CPU or GPU - int32_t mThreadId = -1; // Thread ID that is valid for the local CUDA context - int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events - int32_t mNStreams = 1; // Number of parallel GPU streams - int32_t mMaxOMPThreads = 0; // Maximum number of OMP threads + int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU + int32_t mThreadId = -1; // Thread ID that is valid for the local CUDA context + int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events + int32_t mNStreams = 1; // Number of parallel GPU streams + int32_t mMaxHostThreads = 0; // Maximum number of OMP threads + int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels // Management for GPUProcessors struct ProcessorData { diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 187792b3ba2e7..b5f9d591fd9a6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -14,6 +14,7 @@ #include "GPUReconstructionCPU.h" #include "GPUReconstructionIncludes.h" +#include "GPUReconstructionThreading.h" #include "GPUChain.h" #include "GPUTPCClusterData.h" @@ -40,13 +41,6 @@ #include #endif -#if defined(WITH_OPENMP) || defined(_OPENMP) -#include -#else -static inline int32_t omp_get_thread_num() { return 0; } -static inline int32_t omp_get_max_threads() { return 1; } -#endif - using namespace o2::gpu; using namespace o2::gpu::gpu_reconstruction_kernels; @@ -60,19 +54,21 @@ GPUReconstructionCPU::~GPUReconstructionCPU() Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor } -int32_t GPUReconstructionCPUBackend::getNOMPThreads() +int32_t GPUReconstructionCPUBackend::getNKernelHostThreads(bool splitCores) { - int32_t ompThreads = 0; - if (mProcessingSettings.ompKernels == 2) { - ompThreads = mProcessingSettings.ompThreads / mNestedLoopOmpFactor; - if ((uint32_t)getOMPThreadNum() < mProcessingSettings.ompThreads % mNestedLoopOmpFactor) { - ompThreads++; + int32_t nThreads = 0; + if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { + if (splitCores) { + nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; + nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; + } else { + nThreads = mMaxHostThreads; } - ompThreads = std::max(1, ompThreads); + nThreads = std::max(1, nThreads); } else { - ompThreads = mProcessingSettings.ompKernels ? mProcessingSettings.ompThreads : 1; + nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; } - return ompThreads; + return nThreads; } template @@ -88,16 +84,19 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS } uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num; for (uint32_t k = 0; k < num; k++) { - int32_t ompThreads = getNOMPThreads(); - if (ompThreads > 1) { + int32_t nThreads = getNKernelHostThreads(false); + if (nThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d ompThreads\n", ompThreads); - } - GPUCA_OPENMP(parallel for num_threads(ompThreads)) - for (uint32_t iB = 0; iB < x.nBlocks; iB++) { - typename T::GPUSharedMemory smem; - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); + printf("Running %d Threads\n", nThreads); } + mThreading->activeThreads->execute([&] { + tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { + typename T::GPUSharedMemory smem; + for (uint32_t iB = r.begin(); iB < r.end(); iB++) { + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); + } + }); + }); } else { for (uint32_t iB = 0; iB < x.nBlocks; iB++) { typename T::GPUSharedMemory smem; @@ -111,24 +110,20 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS template <> inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { -#ifdef WITH_OPENMP - int32_t nOMPThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNOMPThreads())); - if (nOMPThreads > 1) { - GPUCA_OPENMP(parallel num_threads(nOMPThreads)) - { - size_t threadSize = size / omp_get_num_threads(); + int32_t nnThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); + if (nnThreads > 1) { + tbb::parallel_for(0, nnThreads, [&](int iThread) { + size_t threadSize = size / nnThreads; if (threadSize % 4096) { threadSize += 4096 - threadSize % 4096; } - size_t offset = threadSize * omp_get_thread_num(); + size_t offset = threadSize * iThread; size_t mySize = std::min(threadSize, size - offset); if (mySize) { memset((char*)ptr + offset, 0, mySize); - } - } - } else -#endif - { + } // clang-format off + }, tbb::static_partitioner()); // clang-format on + } else { memset(ptr, 0, size); } return 0; @@ -213,8 +208,8 @@ int32_t GPUReconstructionCPU::InitDevice() mHostMemoryPermanent = mHostMemoryBase; ClearAllocatedMemory(); } - if (mProcessingSettings.ompKernels) { - mBlockCount = getOMPMaxThreads(); + if (mProcessingSettings.inKernelParallel) { + mBlockCount = mMaxHostThreads; } mThreadId = GetThread(); mProcShadow.mProcessorsProc = processors(); @@ -351,16 +346,6 @@ void GPUReconstructionCPU::ResetDeviceProcessorTypes() } } -int32_t GPUReconstructionCPUBackend::getOMPThreadNum() -{ - return omp_get_thread_num(); -} - -int32_t GPUReconstructionCPUBackend::getOMPMaxThreads() -{ - return omp_get_max_threads(); -} - static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) @@ -402,17 +387,17 @@ uint32_t GPUReconstructionCPU::getNextTimerId() return id.fetch_add(1); } -uint32_t GPUReconstructionCPU::SetAndGetNestedLoopOmpFactor(bool condition, uint32_t max) +uint32_t GPUReconstructionCPU::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) { - if (condition && mProcessingSettings.ompKernels != 1) { - mNestedLoopOmpFactor = mProcessingSettings.ompKernels == 2 ? std::min(max, mProcessingSettings.ompThreads) : mProcessingSettings.ompThreads; + if (condition && mProcessingSettings.inKernelParallel != 1) { + mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; } else { - mNestedLoopOmpFactor = 1; + mNActiveThreadsOuterLoop = 1; } if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d OMP threads in outer loop\n", mNestedLoopOmpFactor); + printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); } - return mNestedLoopOmpFactor; + return mNActiveThreadsOuterLoop; } void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 7903be44907df..f82f481df6a63 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -43,10 +43,8 @@ class GPUReconstructionCPUBackend : public GPUReconstruction int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); - uint32_t mNestedLoopOmpFactor = 1; - static int32_t getOMPThreadNum(); - static int32_t getOMPMaxThreads(); - int32_t getNOMPThreads(); + uint32_t mNActiveThreadsOuterLoop = 1; + int32_t getNKernelHostThreads(bool splitCores); }; class GPUReconstructionCPU : public GPUReconstructionKernels @@ -81,8 +79,8 @@ class GPUReconstructionCPU : public GPUReconstructionKernels= 1) { - t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getOMPThreadNum() : stream); - if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNestedLoopOmpFactor < 2 || getOMPThreadNum() == 0)) { + t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); + if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) { t->Start(); } } @@ -287,11 +285,11 @@ HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id); if (timer == nullptr) { - int32_t max = std::max({getOMPMaxThreads(), mProcessingSettings.nStreams}); + int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); } if (num == -1) { - num = getOMPThreadNum(); + num = getHostThreadIndex(); } if (num < 0 || num >= timer->num) { throw std::runtime_error("Invalid timer requested"); diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index ca1c46766b9da..629d23075d9bc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -40,6 +40,8 @@ #include "TPCBase/CRU.h" #include "DetectorsRaw/RDHUtils.h" +#include + using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -1306,6 +1308,17 @@ size_t zsEncoderRun::compare(std::vector* buffer, std::vector void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr* outBuffer, uint32_t* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int32_t version, bool verify, float threshold, bool padding, std::function&)> digitsFilter) { @@ -1316,67 +1329,68 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr buffer[NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - uint32_t totalPages = 0; - size_t totalSize = 0; - size_t nErrors = 0; - size_t digitsInput = 0; - size_t digitsEncoded = 0; - // clang-format off - GPUCA_OPENMP(parallel for reduction(+ : totalPages, nErrors, totalSize, digitsInput, digitsEncoded)) - // clang-format on - for (uint32_t i = 0; i < NSLICES; i++) { - std::vector tmpBuffer; - digitsInput += ZSEncoderGetNDigits(in, i); - tmpBuffer.resize(ZSEncoderGetNDigits(in, i)); - if (threshold > 0.f && !digitsFilter) { - auto it = std::copy_if(ZSEncoderGetDigits(in, i), ZSEncoderGetDigits(in, i) + ZSEncoderGetNDigits(in, i), tmpBuffer.begin(), [threshold](auto& v) { return v.getChargeFloat() >= threshold; }); - tmpBuffer.resize(std::distance(tmpBuffer.begin(), it)); - } else { - std::copy(ZSEncoderGetDigits(in, i), ZSEncoderGetDigits(in, i) + ZSEncoderGetNDigits(in, i), tmpBuffer.begin()); - } - - if (digitsFilter) { - digitsFilter(tmpBuffer); - if (threshold > 0.f) { - std::vector tmpBuffer2 = std::move(tmpBuffer); - tmpBuffer = std::vector(tmpBuffer2.size()); - auto it = std::copy_if(tmpBuffer2.begin(), tmpBuffer2.end(), tmpBuffer.begin(), [threshold](auto& v) { return v.getChargeFloat() >= threshold; }); + auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) { + for (uint32_t i = range.begin(); i < range.end(); i++) { + std::vector tmpBuffer; + red.digitsInput += ZSEncoderGetNDigits(in, i); + tmpBuffer.resize(ZSEncoderGetNDigits(in, i)); + if (threshold > 0.f && !digitsFilter) { + auto it = std::copy_if(ZSEncoderGetDigits(in, i), ZSEncoderGetDigits(in, i) + ZSEncoderGetNDigits(in, i), tmpBuffer.begin(), [threshold](auto& v) { return v.getChargeFloat() >= threshold; }); tmpBuffer.resize(std::distance(tmpBuffer.begin(), it)); + } else { + std::copy(ZSEncoderGetDigits(in, i), ZSEncoderGetDigits(in, i) + ZSEncoderGetNDigits(in, i), tmpBuffer.begin()); } - } - digitsEncoded += tmpBuffer.size(); - - auto runZS = [&](auto& encoder) { - encoder.zsVersion = version; - encoder.init(); - totalPages += encoder.run(buffer[i], tmpBuffer, &totalSize); - if (verify) { - nErrors += encoder.compare(buffer[i], tmpBuffer); // Verification + + if (digitsFilter) { + digitsFilter(tmpBuffer); + if (threshold > 0.f) { + std::vector tmpBuffer2 = std::move(tmpBuffer); + tmpBuffer = std::vector(tmpBuffer2.size()); + auto it = std::copy_if(tmpBuffer2.begin(), tmpBuffer2.end(), tmpBuffer.begin(), [threshold](auto& v) { return v.getChargeFloat() >= threshold; }); + tmpBuffer.resize(std::distance(tmpBuffer.begin(), it)); + } } - }; + red.digitsEncoded += tmpBuffer.size(); + + auto runZS = [&](auto& encoder) { + encoder.zsVersion = version; + encoder.init(); + red.totalPages += encoder.run(buffer[i], tmpBuffer, &red.totalSize); + if (verify) { + red.nErrors += encoder.compare(buffer[i], tmpBuffer); // Verification + } + }; - if (version >= ZSVersion::ZSVersionRowBased10BitADC && version <= ZSVersion::ZSVersionRowBased12BitADC) { - zsEncoderRun enc{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}; - runZS(enc); - } else if (version >= ZSVersion::ZSVersionLinkBasedWithMeta && version <= ZSVersion::ZSVersionDenseLinkBasedV2) { -#ifdef GPUCA_O2_LIB - if (version == ZSVersion::ZSVersionLinkBasedWithMeta) { - zsEncoderRun enc{{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}}; + if (version >= ZSVersion::ZSVersionRowBased10BitADC && version <= ZSVersion::ZSVersionRowBased12BitADC) { + zsEncoderRun enc{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}; runZS(enc); - } else if (version >= ZSVersion::ZSVersionDenseLinkBased && version <= ZSVersion::ZSVersionDenseLinkBasedV2) { - zsEncoderRun enc{{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}}; - runZS(enc); - } + } else if (version >= ZSVersion::ZSVersionLinkBasedWithMeta && version <= ZSVersion::ZSVersionDenseLinkBasedV2) { +#ifdef GPUCA_O2_LIB + if (version == ZSVersion::ZSVersionLinkBasedWithMeta) { + zsEncoderRun enc{{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}}; + runZS(enc); + } else if (version >= ZSVersion::ZSVersionDenseLinkBased && version <= ZSVersion::ZSVersionDenseLinkBasedV2) { + zsEncoderRun enc{{{{.iSector = i, .raw = raw, .ir = ir, .param = ¶m, .padding = padding}}}}; + runZS(enc); + } #else - throw std::runtime_error("Link based ZS encoding not supported in standalone build"); + throw std::runtime_error("Link based ZS encoding not supported in standalone build"); #endif - } else { - throw std::runtime_error("Invalid ZS version "s + std::to_string(version) + ", cannot decode"s); + } else { + throw std::runtime_error("Invalid ZS version "s + std::to_string(version) + ", cannot decode"s); + } } - } + return red; }, [&](const auto& red1, const auto& red2) { + auto red = red1; + red.totalPages += red2.totalPages; + red.totalSize += red2.totalSize; + red.nErrors += red2.nErrors; + red.digitsInput += red2.digitsInput; + red.digitsEncoded += red2.digitsEncoded; + return red; }); if (outBuffer) { - outBuffer->reset(new uint64_t[totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(uint64_t)]); + outBuffer->reset(new uint64_t[reduced.totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(uint64_t)]); uint64_t offset = 0; for (uint32_t i = 0; i < NSLICES; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { @@ -1386,12 +1400,12 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr #endif -#ifdef WITH_OPENMP -#include -#endif - #include "GPUReconstruction.h" #include "GPUReconstructionAvailableBackends.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionThreading.h b/GPU/GPUTracking/Base/GPUReconstructionThreading.h new file mode 100644 index 0000000000000..374c7545e65da --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionThreading.h @@ -0,0 +1,60 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionThreading.h +/// \author David Rohr + +#if !defined(GPURECONSTRUCTIONTHREADING_H) +#define GPURECONSTRUCTIONTHREADING_H + +#if !defined(GPUCA_GPUCODE) +#include "GPUReconstruction.h" + +#include +#include + +namespace o2::gpu +{ + +struct GPUReconstructionThreading { + std::unique_ptr control; + std::unique_ptr allThreads; + std::unique_ptr activeThreads; + std::unique_ptr outerThreads; +}; + +} // namespace o2::gpu + +#endif + +#define GPUCA_TBB_KERNEL_LOOP_HOST(rec, vartype, varname, iEnd, code) \ + for (vartype varname = get_global_id(0); varname < iEnd; varname += get_global_size(0)) { \ + code \ + } + +#ifdef GPUCA_GPUCODE +#define GPUCA_TBB_KERNEL_LOOP GPUCA_TBB_KERNEL_LOOP_HOST +#else +#define GPUCA_TBB_KERNEL_LOOP(rec, vartype, varname, iEnd, code) \ + if (!rec.GetProcessingSettings().inKernelParallel) { \ + rec.mThreading->activeThreads->execute([&] { \ + tbb::parallel_for(tbb::blocked_range(get_global_id(0), iEnd, get_global_size(0)), [&](const tbb::blocked_range& _r_internal) { \ + for (vartype varname = _r_internal.begin(); varname < _r_internal.end(); varname += get_global_size(0)) { \ + code \ + } \ + }); \ + }); \ + } else { \ + GPUCA_TBB_KERNEL_LOOP_HOST(rec, vartype, varname, iEnd, code) \ + } +#endif + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index bab5ff912c575..5bc1e6e4e6783 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -136,12 +136,7 @@ set_target_cuda_arch(${targetName}) #target_link_options(${targetName} PRIVATE "LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/version_script.ld") #set_target_properties(${targetName} PROPERTIES LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/version_script.ld) -if(OpenMP_CXX_FOUND) - # Must be private, depending libraries might be compiled by compiler not understanding -fopenmp - target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) - target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fopenmp") -endif() +target_link_libraries(${targetName} PRIVATE TBB::tbb) # Special handling of GPU kernels in case of per-kernel compilation / RDC if(NOT DEFINED GPUCA_CUDA_COMPILE_MODE) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 26cbc282b6fc2..20ce23b578d84 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -265,7 +265,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() throw std::runtime_error("Invalid warp size on GPU"); } mBlockCount = deviceProp.multiProcessorCount; - mMaxThreads = std::max(mMaxThreads, deviceProp.maxThreadsPerBlock * mBlockCount); + mMaxBackendThreads = std::max(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount); #ifndef __HIPCC__ // CUDA mWarpSize = 32; #else // HIP @@ -409,7 +409,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceId = master->mDeviceId; mBlockCount = master->mBlockCount; mWarpSize = master->mWarpSize; - mMaxThreads = master->mMaxThreads; + mMaxBackendThreads = master->mMaxBackendThreads; mDeviceName = master->mDeviceName; mDeviceConstantMem = master->mDeviceConstantMem; mDeviceConstantMemList.resize(master->mDeviceConstantMemList.size()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 62ad57ae3497a..3bd3afc0ffc23 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -13,9 +13,6 @@ /// \author David Rohr #define GPUCA_GPUCODE_HOSTONLY -#ifdef WITH_OPENMP -#include -#endif #include "GPUReconstructionCUDA.h" #include "GPUParamRTC.h" #include "GPUDefMacros.h" @@ -25,6 +22,7 @@ #include #include +#include using namespace o2::gpu; #include "utils/qGetLdBinarySymbols.h" @@ -153,10 +151,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } HighResTimer rtcTimer; rtcTimer.ResetStart(); -#ifdef WITH_OPENMP -#pragma omp parallel for schedule(dynamic, 1) -#endif - for (uint32_t i = 0; i < nCompile; i++) { + tbb::parallel_for(0, nCompile, [&](auto i) { if (mProcessingSettings.debugLevel >= 3) { printf("Compiling %s\n", (filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); } @@ -190,8 +185,8 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) printf("Source code file: %s", filename.c_str()); } throw std::runtime_error("Error during CUDA compilation"); - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on if (mProcessingSettings.debugLevel >= 0) { GPUInfo("RTC Compilation finished (%f seconds)", rtcTimer.GetCurrentElapsedTime()); } diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d34777f0bef3e..10fbfa8d21ddf 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -189,11 +189,7 @@ endif() target_link_libraries(${targetName} PRIVATE hip::host hip::device hip::hipcub roc::rocthrust) set_target_hip_arch(${targetName}) -if(OpenMP_CXX_FOUND) - # Must be private, depending libraries might be compiled by compiler not understanding -fopenmp - target_compile_definitions(${MODULE}_CXX PRIVATE WITH_OPENMP) - target_link_libraries(${MODULE}_CXX PRIVATE OpenMP::OpenMP_CXX) -endif() +target_link_libraries(${MODULE}_CXX PRIVATE TBB::tbb) # Special handling of GPU kernels in case of per-kernel compilation / RDC if(NOT DEFINED GPUCA_HIP_COMPILE_MODE) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index ed985e31ab1b0..30a8fc193774b 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -267,7 +267,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mDeviceName += " (OpenCL)"; mBlockCount = shaders; mWarpSize = 32; - mMaxThreads = std::max(mMaxThreads, maxWorkGroup * mBlockCount); + mMaxBackendThreads = std::max(mMaxBackendThreads, maxWorkGroup * mBlockCount); mInternals->context = clCreateContext(nullptr, ContextForAllPlatforms() ? count : 1, ContextForAllPlatforms() ? mInternals->devices.get() : &mInternals->device, nullptr, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { @@ -380,7 +380,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUReconstructionOCL* master = dynamic_cast(mMaster); mBlockCount = master->mBlockCount; mWarpSize = master->mWarpSize; - mMaxThreads = master->mMaxThreads; + mMaxBackendThreads = master->mMaxBackendThreads; mDeviceName = master->mDeviceName; mDeviceConstantMem = master->mDeviceConstantMem; mInternals = master->mInternals; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index a5d335931af37..3e738fb6df5cb 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -105,6 +105,7 @@ set(HDRS_INSTALL Base/GPUParam.inc Base/GPUParamRTC.h Base/GPUReconstructionIncludes.h + Base/GPUReconstructionThreading.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h Base/GPUReconstructionKernels.h @@ -378,12 +379,7 @@ if(GPUCA_QA) target_compile_definitions(${targetName} PRIVATE GPUCA_BUILD_QA) endif() -if(OpenMP_CXX_FOUND) - message(STATUS "GPU: Using OpenMP: ${OpenMP_CXX_SPEC_DATE}") - # Must be private, depending libraries might be compiled by compiler not understanding -fopenmp - target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) - target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) -endif() +target_link_libraries(${targetName} PRIVATE TBB::tbb) target_compile_options(${targetName} PRIVATE -Wno-instantiation-after-specialization) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index e8f8de7658b28..794f4cb485f14 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -69,7 +69,7 @@ INode* BuildTree(const double* frequencies, uint32_t UniqueSymbols) { std::priority_queue, NodeCmp> trees; - for (uint32_t i = 0; i < UniqueSymbols; ++i) { + for (uint32_t i = 0; i < UniqueSymbols; i++) { if (frequencies[i] != 0) { trees.push(new LeafNode(frequencies[i], i)); } @@ -256,7 +256,7 @@ float GPUTPCClusterStatistics::Analyze(std::vector& p, const char* name GenerateCodes(root, HuffCode(), codes); delete root; - for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); ++it) { + for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); it++) { huffmanSize += it->second.size() * prob[it->first]; } diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index 22641774cd9ee..e3b8965c3e27b 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -22,6 +22,8 @@ #include #include "TPCClusterDecompressionCore.inc" +#include + using namespace o2::gpu; using namespace o2::tpc; @@ -51,23 +53,24 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom for (uint32_t i = 0; i < NSLICES * GPUCA_ROW_COUNT; i++) { (&locks[0][0])[i].clear(); } - uint32_t offset = 0, lasti = 0; const uint32_t maxTime = param.continuousMaxTimeBin > 0 ? ((param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1) : TPC_MAX_TIME_BIN_TRIGGERED; - GPUCA_OPENMP(parallel for firstprivate(offset, lasti)) - for (uint32_t i = 0; i < clustersCompressed->nTracks; i++) { - if (i < lasti) { - offset = lasti = 0; // dynamic OMP scheduling, need to reinitialize offset - } - while (lasti < i) { - offset += clustersCompressed->nTrackClusters[lasti++]; + tbb::parallel_for(tbb::blocked_range(0, clustersCompressed->nTracks), [&](const tbb::blocked_range& range) { + uint32_t offset = 0, lasti = 0; + for (uint32_t i = range.begin(); i < range.end(); i++) { + if (i < lasti) { + offset = lasti = 0; // dynamic scheduling order, need to reinitialize offset + } + while (lasti < i) { + offset += clustersCompressed->nTrackClusters[lasti++]; + } + lasti++; + TPCClusterDecompressionCore::decompressTrack(*clustersCompressed, param, maxTime, i, offset, clusters, locks); } - lasti++; - TPCClusterDecompressionCore::decompressTrack(*clustersCompressed, param, maxTime, i, offset, clusters, locks); - } + }); size_t nTotalClusters = clustersCompressed->nAttachedClusters + clustersCompressed->nUnattachedClusters; ClusterNative* clusterBuffer = allocator(nTotalClusters); uint32_t offsets[NSLICES][GPUCA_ROW_COUNT]; - offset = 0; + uint32_t offset = 0; uint32_t decodedAttachedClusters = 0; for (uint32_t i = 0; i < NSLICES; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { @@ -82,8 +85,7 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom } clustersNative.clustersLinear = clusterBuffer; clustersNative.setOffsetPtrs(); - GPUCA_OPENMP(parallel for) - for (uint32_t i = 0; i < NSLICES; i++) { + tbb::parallel_for(0, NSLICES, [&](auto i) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { ClusterNative* buffer = &clusterBuffer[clustersNative.clusterOffset[i][j]]; if (clusters[i][j].size()) { @@ -108,7 +110,7 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom if (deterministicRec) { std::sort(buffer, buffer + clustersNative.nClusters[i][j]); } - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on return 0; } diff --git a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx index 0d7ca5c6209a4..9cb49bf4c7ef5 100644 --- a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx +++ b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx @@ -166,7 +166,7 @@ INode* BuildTree(const double* frequencies, uint32_t UniqueSymbols) { std::priority_queue, NodeCmp> trees; - for (int32_t i = 0; i < UniqueSymbols; ++i) { + for (int32_t i = 0; i < UniqueSymbols; i++) { if (frequencies[i] != 0) { trees.push(new LeafNode(frequencies[i], i)); } @@ -621,7 +621,7 @@ int32_t main(int argc, char** argv) GenerateCodes(root, HuffCode(), codes); delete root; - for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); ++it) { + for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); it++) { huffmanSize += it->second.size() * probabilities[i][it->first]; } } diff --git a/GPU/GPUTracking/Definitions/GPUDefMacros.h b/GPU/GPUTracking/Definitions/GPUDefMacros.h index b47401c9f05aa..caf2d1670f84e 100644 --- a/GPU/GPUTracking/Definitions/GPUDefMacros.h +++ b/GPU/GPUTracking/Definitions/GPUDefMacros.h @@ -50,11 +50,5 @@ #define GPUCA_UNROLL(...) #endif -#if !defined(WITH_OPENMP) || defined(GPUCA_GPUCODE_DEVICE) -#define GPUCA_OPENMP(...) -#else -#define GPUCA_OPENMP(...) _Pragma(GPUCA_M_STR(omp __VA_ARGS__)) -#endif - #endif // clang-format on diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 10bb4797a1c15..905622de26ba9 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -250,9 +250,10 @@ AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conserv AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations") AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n") AddOption(registerStandaloneInputMemory, bool, false, "registerInputMemory", 0, "Automatically register input memory buffers for the GPU") -AddOption(ompThreads, int32_t, -1, "omp", 't', "Number of OMP threads to run (-1: all)", min(-1), message("Using %s OMP threads")) -AddOption(ompKernels, uint8_t, 2, "", 0, "Parallelize with OMP inside kernels instead of over slices, 2 for nested parallelization over TPC sectors and inside kernels") -AddOption(ompAutoNThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data") +AddOption(nHostThreads, int32_t, -1, "nThreads", 't', "Number of host threads to run (-1: all)", min(-1), message("Using %s CPU threads")) +AddOption(ompThreads, int32_t, -1, "", 0, "Deprecated synonym for nHostThreads") +AddOption(inKernelParallel, uint8_t, 2, "", 0, "Parallelize with multi-threading inside kernels on the host instead of over TPC sectors, 2 for nested parallelization over TPC sectors and inside kernels") +AddOption(autoAdjustHostThreads, bool, true, "", 0, "Auto-adjust number of OMP threads, decreasing the number for small input data") AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 889e12c258cb4..66f37e1122832 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -673,8 +673,8 @@ int32_t GPUChainTracking::RunChain() if ((((GetRecoSteps() & RecoStep::TRDTracking) && !GetProcessingSettings().trdTrackModelO2 && !GetProcessingSettings().willProvideO2PropagatorLate) || ((GetRecoSteps() & RecoStep::Refit) && !param().rec.trackingRefitGPUModel)) && processors()->calibObjects.o2Propagator == nullptr) { GPUFatal("Cannot run TRD tracking or refit with o2 track model without o2 propagator"); // This check must happen during run, since o2::Propagator cannot be available during init } - if (GetProcessingSettings().ompAutoNThreads && !mRec->IsGPU()) { - mRec->SetNOMPThreads(-1); + if (GetProcessingSettings().autoAdjustHostThreads && !mRec->IsGPU()) { + mRec->SetNActiveThreads(-1); } const auto threadContext = GetThreadContext(); if (GetProcessingSettings().runCompressionStatistics && mCompressionStatistics == nullptr) { @@ -717,8 +717,8 @@ int32_t GPUChainTracking::RunChain() } } - if (GetProcessingSettings().ompAutoNThreads && !mRec->IsGPU() && mIOPtrs.clustersNative) { - mRec->SetNOMPThreads(mIOPtrs.clustersNative->nClustersTotal / 5000); + if (GetProcessingSettings().autoAdjustHostThreads && !mRec->IsGPU() && mIOPtrs.clustersNative) { + mRec->SetNActiveThreads(mIOPtrs.clustersNative->nClustersTotal / 5000); } if (mIOPtrs.clustersNative && runRecoStep(RecoStep::TPCConversion, &GPUChainTracking::ConvertNativeToClusterData)) { @@ -768,8 +768,8 @@ int32_t GPUChainTracking::RunChain() SynchronizeStream(OutputStream()); } - if (GetProcessingSettings().ompAutoNThreads && !mRec->IsGPU()) { - mRec->SetNOMPThreads(-1); + if (GetProcessingSettings().autoAdjustHostThreads && !mRec->IsGPU()) { + mRec->SetNActiveThreads(-1); } int32_t retVal = 0; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index bec61d6b76f1e..6ca645808c5bd 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -35,6 +35,8 @@ #include "utils/strtag.h" +#include + #ifndef GPUCA_NO_VC #include #endif @@ -576,8 +578,8 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (RunTPCClusterizer_prepare(mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer)) { return 1; } - if (GetProcessingSettings().ompAutoNThreads && !doGPU) { - mRec->SetNOMPThreads(mRec->MemoryScalers()->nTPCdigits / 20000); + if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { + mRec->SetNActiveThreads(mRec->MemoryScalers()->nTPCdigits / 20000); } mRec->MemoryScalers()->nTPCHits = mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits); @@ -674,229 +676,232 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSliceBase, iSliceBase + GetProcessingSettings().nTPCClustererLanes - 1); } - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes))) - for (int32_t lane = 0; lane < maxLane; lane++) { - if (doGPU && fragment.index != 0) { - SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished - } - - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; - clusterer.mPmemory->fragment = fragment; - - if (mIOPtrs.tpcPackedDigits) { - bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; - bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; - auto* inDigits = mIOPtrs.tpcPackedDigits; - size_t numDigits = inDigits->nTPCDigits[iSlice]; - if (setDigitsOnGPU) { - GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); - } - if (setDigitsOnHost) { - clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { + tbb::parallel_for(0, maxLane, [&](auto lane) { + if (doGPU && fragment.index != 0) { + SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished } - clusterer.mPmemory->counters.nDigits = numDigits; - } - if (mIOPtrs.tpcZS) { - if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; - clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; - } else { - clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; + clusterer.mPmemory->fragment = fragment; + + if (mIOPtrs.tpcPackedDigits) { + bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; + bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; + auto* inDigits = mIOPtrs.tpcPackedDigits; + size_t numDigits = inDigits->nTPCDigits[iSlice]; + if (setDigitsOnGPU) { + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); + } + if (setDigitsOnHost) { + clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) + } + clusterer.mPmemory->counters.nDigits = numDigits; } - } - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - - using ChargeMapType = decltype(*clustererShadow.mPchargeMap); - using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); - if (fragment.index == 0) { - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); - } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); - if (doGPU) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); - SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); + if (mIOPtrs.tpcZS) { + if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { + clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; + clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; + } else { + clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; + } } - SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory - } + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + + using ChargeMapType = decltype(*clustererShadow.mPchargeMap); + using PeakMapType = decltype(*clustererShadow.mPpeakMap); + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); + if (fragment.index == 0) { + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); + } + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); - if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { - clusterer.mPmemory->counters.nPositions = 0; - continue; - } - if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { - clusterer.mPmemory->counters.nPositions = 0; - continue; - } + if (doGPU) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); + SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); + } + SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory + } - if (propagateMCLabels && fragment.index == 0) { - clusterer.PrepareMC(); - clusterer.mPinputLabels = digitsMC->v[iSlice]; - if (clusterer.mPinputLabels == nullptr) { - GPUFatal("MC label container missing, sector %d", iSlice); + if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { + clusterer.mPmemory->counters.nPositions = 0; + return; } - if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { - GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); + if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { + clusterer.mPmemory->counters.nPositions = 0; + return; } - } - if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { - if (not mIOPtrs.tpcZS) { - runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } else if (propagateMCLabels) { - runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + if (propagateMCLabels && fragment.index == 0) { + clusterer.PrepareMC(); + clusterer.mPinputLabels = digitsMC->v[iSlice]; + if (clusterer.mPinputLabels == nullptr) { + GPUFatal("MC label container missing, sector %d", iSlice); + } + if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { + GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); + } } - } - if (mIOPtrs.tpcZS) { - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) - : 0; - uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; + if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { + if (not mIOPtrs.tpcZS) { + runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + } else if (propagateMCLabels) { + runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + } + } - (void)tpcTimeBinCut; // TODO: To be used in decoding kernels - switch (mCFContext->zsVersion) { - default: - GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); - break; - case ZSVersionRowBased10BitADC: - case ZSVersionRowBased12BitADC: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionLinkBasedWithMeta: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionDenseLinkBased: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; + if (mIOPtrs.tpcZS) { + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0); + uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; + + (void)tpcTimeBinCut; // TODO: To be used in decoding kernels + switch (mCFContext->zsVersion) { + default: + GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); + break; + case ZSVersionRowBased10BitADC: + case ZSVersionRowBased12BitADC: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; + case ZSVersionLinkBasedWithMeta: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; + case ZSVersionDenseLinkBased: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; + } + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on + }); + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { + tbb::parallel_for(0, maxLane, [&](auto lane) { + uint32_t iSlice = iSliceBase + lane; + if (doGPU) { + SynchronizeStream(lane); } - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } - } - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes))) - for (int32_t lane = 0; lane < maxLane; lane++) { - uint32_t iSlice = iSliceBase + lane; - if (doGPU) { - SynchronizeStream(lane); - } - if (mIOPtrs.tpcZS) { - CfFragment f = fragment.next(); - int32_t nextSlice = iSlice; - if (f.isEnd()) { - nextSlice += GetProcessingSettings().nTPCClustererLanes; - f = mCFContext->fragmentFirst; + if (mIOPtrs.tpcZS) { + CfFragment f = fragment.next(); + int32_t nextSlice = iSlice; + if (f.isEnd()) { + nextSlice += GetProcessingSettings().nTPCClustererLanes; + f = mCFContext->fragmentFirst; + } + if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { + mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); + } } - if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { - mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (clusterer.mPmemory->counters.nPositions == 0) { + return; + } + if (!mIOPtrs.tpcZS) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + } + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + clusterer.DumpChargeMap(*mDebugFile, "Charges"); } - } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (clusterer.mPmemory->counters.nPositions == 0) { - continue; - } - if (!mIOPtrs.tpcZS) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { - clusterer.DumpChargeMap(*mDebugFile, "Charges"); - } - if (propagateMCLabels) { - runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); - } + if (propagateMCLabels) { + runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); + } - bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); - checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); - checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; + bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); + checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); + checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; - if (checkForNoisyPads) { - int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; + if (checkForNoisyPads) { + int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); - } + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Peaks"); - } + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); - } - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes))) - for (int32_t lane = 0; lane < maxLane; lane++) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } - if (clusterer.mPmemory->counters.nPeaks == 0) { - continue; - } - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); - } + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off + }, tbb::simple_partitioner()); // clang-format on + }); + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { + tbb::parallel_for(0, maxLane, [&](auto lane) { + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } + if (clusterer.mPmemory->counters.nPeaks == 0) { + return; + } + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); - } - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, GetProcessingSettings().nTPCClustererLanes))) - for (int32_t lane = 0; lane < maxLane; lane++) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off + }, tbb::simple_partitioner()); // clang-format on + }); + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { + tbb::parallel_for(0, maxLane, [&](auto lane) { + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } - if (fragment.index == 0) { - deviceEvent* waitEvent = nullptr; - if (transferRunning[lane] == 1) { - waitEvent = &mEvents->stream[lane]; - transferRunning[lane] = 2; + if (fragment.index == 0) { + deviceEvent* waitEvent = nullptr; + if (transferRunning[lane] == 1) { + waitEvent = &mEvents->stream[lane]; + transferRunning[lane] = 2; + } + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); } - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); - } - if (clusterer.mPmemory->counters.nClusters == 0) { - continue; - } + if (clusterer.mPmemory->counters.nClusters == 0) { + return; + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); - if (doGPU && propagateMCLabels) { - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); - if (doGPU) { - SynchronizeStream(lane); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); + if (doGPU && propagateMCLabels) { + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); + if (doGPU) { + SynchronizeStream(lane); + } + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); - } - TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); - laneHasData[lane] = true; - // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); - } - mRec->SetNestedLoopOmpFactor(1); + TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); + laneHasData[lane] = true; + // Include clusters in default debug mask, exclude other debug output by default + DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off + }, tbb::simple_partitioner()); // clang-format on + }); + mRec->SetNActiveThreadsOuterLoop(1); } size_t nClsFirst = nClsTotal; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index f3f3627573339..4ea7094416d5e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -268,7 +268,7 @@ int32_t GPUChainTracking::RunTPCDecompression() int32_t nStreams = doGPU ? mRec->NStreams() - 1 : 1; if (cmprClsHost.nAttachedClusters != 0) { std::exclusive_scan(cmprClsHost.nTrackClusters, cmprClsHost.nTrackClusters + cmprClsHost.nTracks, Decompressor.mAttachedClustersOffsets, 0u); // computing clusters offsets for first kernel - for (int32_t iStream = 0; iStream < nStreams; ++iStream) { + for (int32_t iStream = 0; iStream < nStreams; iStream++) { uint32_t startTrack = cmprClsHost.nTracks / nStreams * iStream; uint32_t endTrack = cmprClsHost.nTracks / nStreams * (iStream + 1) + (iStream < nStreams - 1 ? 0 : cmprClsHost.nTracks % nStreams); // index of last track (excluded from computation) uint32_t numTracks = endTrack - startTrack; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index b68f0797f425f..cab025b03e8b6 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -22,6 +22,8 @@ #include "utils/strtag.h" #include +#include + using namespace o2::gpu; int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) @@ -154,110 +156,110 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() int32_t streamMap[NSLICES]; bool error = false; - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, NSLICES))) - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; - GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; - int32_t useStream = (iSlice % mRec->NStreams()); + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { + tbb::parallel_for(0, NSLICES, [&](auto iSlice) { + GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; + GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; + int32_t useStream = (iSlice % mRec->NStreams()); - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Creating Slice Data (Slice %d)", iSlice); - } - if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - } else { - if (ReadEvent(iSlice, 0)) { - GPUError("Error reading event"); - error = 1; - continue; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Creating Slice Data (Slice %d)", iSlice); + } + if (doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; + } else { + if (ReadEvent(iSlice, 0)) { + GPUError("Error reading event"); + error = 1; + return; + } + } + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); + } + if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { + return; } - } - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); - } - if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { - continue; - } - if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; - if (GetProcessingSettings().debugMask & 1) { - if (doGPU) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + if (GetProcessingSettings().debugLevel >= 6) { + *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; + if (GetProcessingSettings().debugMask & 1) { + if (doGPU) { + TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + } + trk.DumpSliceData(*mDebugFile); } - trk.DumpSliceData(*mDebugFile); } - } - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); - } - runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); + // Initialize temporary memory where needed + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); + } + runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } + if (!doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory + } + if (GPUDebug("Initialization (3)", useStream)) { + throw std::runtime_error("memcpy failure"); + } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; - if (GetProcessingSettings().keepDisplayMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); - memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { - trk.DumpLinks(*mDebugFile, 0); + if (GetProcessingSettings().keepDisplayMemory) { + TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); + if (GetProcessingSettings().debugMask & 2) { + trk.DumpLinks(*mDebugFile, 0); + } } - } - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); + DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); #ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { - runKernel({GetGridAuto(useStream), {iSlice}}); - } + if (doGPU) { + runKernel({GetGridAuto(useStream), {iSlice}}); + } #endif - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); - } - DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); - - if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - trk.UpdateMaxData(); - AllocateRegisteredMemory(trk.MemoryResTracklets()); - AllocateRegisteredMemory(trk.MemoryResOutput()); - } - - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - trk.DumpHitWeights(*mDebugFile); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSlice}}); } - } + DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + trk.UpdateMaxData(); + AllocateRegisteredMemory(trk.MemoryResTracklets()); + AllocateRegisteredMemory(trk.MemoryResOutput()); } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); - streamMap[iSlice] = useStream; - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); + + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { + runKernel({GetGridAuto(useStream), {iSlice}}); + DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + trk.DumpHitWeights(*mDebugFile); + } } - DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); - } - } - mRec->SetNestedLoopOmpFactor(1); + + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { + runKernel({GetGridAuto(useStream), {iSlice}}); + runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSlice}}); + } + TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); + streamMap[iSlice] = useStream; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); + } + DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + } }, tbb::simple_partitioner()); + }); + mRec->SetNActiveThreadsOuterLoop(1); if (error) { return (3); } @@ -419,16 +421,16 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } else { mSliceSelectorReady = NSLICES; - GPUCA_OPENMP(parallel for if(!doGPU && GetProcessingSettings().ompKernels != 1) num_threads(mRec->SetAndGetNestedLoopOmpFactor(!doGPU, NSLICES))) - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSlice, 0); - } - if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSlice, 0); - } - } - mRec->SetNestedLoopOmpFactor(1); + tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { + tbb::parallel_for(0, NSLICES, [&](auto iSlice) { + if (param().rec.tpc.extrapolationTracking) { + ExtrapolationTracking(iSlice, 0); + } + if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { + WriteOutput(iSlice, 0); + } }, tbb::simple_partitioner()); + }); + mRec->SetNActiveThreadsOuterLoop(1); } if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index b6f11375328d0..4f654c0fa7beb 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -14,9 +14,7 @@ #include "GPUTPCGMMergerGPU.h" #include "GPUCommonAlgorithm.h" -#if defined(WITH_OPENMP) && !defined(GPUCA_GPUCODE) -#include "GPUReconstruction.h" -#endif +#include "GPUReconstructionThreading.h" using namespace o2::gpu; @@ -24,20 +22,18 @@ template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) { const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NOutputTracks(); - GPUCA_OPENMP(parallel for if(!merger.GetRec().GetProcessingSettings().ompKernels) num_threads(merger.GetRec().GetProcessingSettings().ompThreads)) - for (int32_t ii = get_global_id(0); ii < iEnd; ii += get_global_size(0)) { + GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); - } + }); } template <> GPUdii() void GPUTPCGMMergerFollowLoopers::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - GPUCA_OPENMP(parallel for if(!merger.GetRec().GetProcessingSettings().ompKernels) num_threads(merger.GetRec().GetProcessingSettings().ompThreads)) - for (uint32_t i = get_global_id(0); i < merger.Memory()->nLoopData; i += get_global_size(0)) { + GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), uint32_t, i, merger.Memory()->nLoopData, { GPUTPCGMTrackParam::RefitLoop(&merger, i); - } + }); } template <> diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h b/GPU/GPUTracking/SliceTracker/GPUTPCDef.h index 4b4f130faed65..3b53c3e66875a 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h +++ b/GPU/GPUTracking/SliceTracker/GPUTPCDef.h @@ -47,4 +47,4 @@ struct cahit2 { cahit x, y; }; #endif #endif //GPUDTPCEF_H -// clang format on +// clang-format on diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx index 8a727dc2da930..3cc3e3805dce8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx @@ -35,10 +35,10 @@ using namespace o2::gpu; void GPUTPCSliceData::InitializeRows(const GPUParam& p) { // initialisation of rows - for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; ++i) { + for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; i++) { new (&mRows[i]) GPUTPCRow; } - for (int32_t i = 0; i < GPUCA_ROW_COUNT; ++i) { + for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { mRows[i].mX = p.tpcGeometry.Row2X(i); mRows[i].mMaxY = CAMath::Tan(p.par.dAlpha / 2.f) * mRows[i].mX; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx index df0c7813fa0db..cece49073f11b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx @@ -98,7 +98,7 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || mRec->GetProcessingSettings().ompKernels == 1 || mRec->GetProcessingSettings().ompThreads == 1); + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceLinks", reLinks); mMemoryResSliceScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSliceScratch"); diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index e6017788144e0..53ed77fe62d8c 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -165,7 +165,7 @@ int32_t ReadConfiguration(int argc, char** argv) #endif #ifndef GPUCA_TPC_GEOMETRY_O2 #error Why was configStandalone.rec.tpc.mergerReadFromTrackerDirectly = 0 needed? - configStandalone.proc.ompKernels = false; + configStandalone.proc.inKernelParallel = false; configStandalone.proc.createO2Output = 0; if (configStandalone.rundEdx == -1) { configStandalone.rundEdx = 0; @@ -216,10 +216,10 @@ int32_t ReadConfiguration(int argc, char** argv) configStandalone.noprompt = 1; } if (configStandalone.proc.debugLevel >= 4) { - if (configStandalone.proc.ompKernels) { - configStandalone.proc.ompKernels = 1; + if (configStandalone.proc.inKernelParallel) { + configStandalone.proc.inKernelParallel = 1; } else { - configStandalone.proc.ompThreads = 1; + configStandalone.proc.nHostThreads = 1; } } if (configStandalone.setO2Settings) { diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1f11f0bacffac..32cdb246cf417 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -70,13 +70,8 @@ endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") -# Check and set settings for optional packages -if(CONFIG_OPENMP) - find_package(OpenMP REQUIRED) -else() - set(OpenMP_FOUND OFF) - set(OpenMP_CXX_FOUND OFF) -endif() +# Find mandatory packages +find_package(TBB REQUIRED) if(GPUCA_CONFIG_VC) find_package(Vc REQUIRED) @@ -252,14 +247,6 @@ if(GPUCA_CONFIG_ROOT) endif() target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) -if(OpenMP_CXX_FOUND) - target_link_libraries(ca PUBLIC OpenMP::OpenMP_CXX) - if (CMAKE_CXX_COMPILER STREQUAL "clang++") - target_link_libraries(ca PUBLIC -fopenmp) - target_link_libraries(GPUTracking PUBLIC -fopenmp) - endif() -endif() - # Installation install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 97091d833efd8..87716d700abc8 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -15,7 +15,6 @@ set(ENABLE_CUDA AUTO) set(ENABLE_HIP AUTO) set(ENABLE_OPENCL AUTO) -set(CONFIG_OPENMP 1) set(GPUCA_CONFIG_VC 1) set(GPUCA_CONFIG_FMT 1) set(GPUCA_CONFIG_ROOT 1) diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index c44b4c09a3d7a..4e8fcd13e0801 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -30,9 +30,6 @@ class GPUTPCGMPolynomialField; #ifndef GPUCA_GPUCODE #include "GPUMemoryResource.h" #include "GPUReconstruction.h" -#ifdef WITH_OPENMP -#include -#endif // WITH_OPENMP #include #include @@ -61,10 +58,10 @@ void* GPUTRDTracker_t::SetPointersBase(void* base) //-------------------------------------------------------------------- // Allocate memory for fixed size objects (needs to be done only once) //-------------------------------------------------------------------- - mMaxThreads = mRec->GetMaxThreads(); + mMaxBackendThreads = mRec->GetMaxBackendThreads(); computePointerWithAlignment(base, mR, kNChambers); - computePointerWithAlignment(base, mHypothesis, mNCandidates * mMaxThreads); - computePointerWithAlignment(base, mCandidates, mNCandidates * 2 * mMaxThreads); + computePointerWithAlignment(base, mHypothesis, mNCandidates * mMaxBackendThreads); + computePointerWithAlignment(base, mCandidates, mNCandidates * 2 * mMaxBackendThreads); return base; } @@ -94,7 +91,7 @@ void* GPUTRDTracker_t::SetPointersTracks(void* base) } template -GPUTRDTracker_t::GPUTRDTracker_t() : mR(nullptr), mIsInitialized(false), mGenerateSpacePoints(false), mProcessPerTimeFrame(false), mNAngleHistogramBins(25), mAngleHistogramRange(50), mMemoryPermanent(-1), mMemoryTracklets(-1), mMemoryTracks(-1), mNMaxCollisions(0), mNMaxTracks(0), mNMaxSpacePoints(0), mTracks(nullptr), mTrackAttribs(nullptr), mNCandidates(1), mNTracks(0), mNEvents(0), mMaxThreads(100), mTrackletIndexArray(nullptr), mHypothesis(nullptr), mCandidates(nullptr), mSpacePoints(nullptr), mGeo(nullptr), mRPhiA2(0), mRPhiB(0), mRPhiC2(0), mDyA2(0), mDyB(0), mDyC2(0), mAngleToDyA(0), mAngleToDyB(0), mAngleToDyC(0), mDebugOutput(false), mMaxEta(0.84f), mRoadZ(18.f), mZCorrCoefNRC(1.4f), mTPCVdrift(2.58f), mTPCTDriftOffset(0.f), mDebug(new GPUTRDTrackerDebug()) +GPUTRDTracker_t::GPUTRDTracker_t() : mR(nullptr), mIsInitialized(false), mGenerateSpacePoints(false), mProcessPerTimeFrame(false), mNAngleHistogramBins(25), mAngleHistogramRange(50), mMemoryPermanent(-1), mMemoryTracklets(-1), mMemoryTracks(-1), mNMaxCollisions(0), mNMaxTracks(0), mNMaxSpacePoints(0), mTracks(nullptr), mTrackAttribs(nullptr), mNCandidates(1), mNTracks(0), mNEvents(0), mMaxBackendThreads(100), mTrackletIndexArray(nullptr), mHypothesis(nullptr), mCandidates(nullptr), mSpacePoints(nullptr), mGeo(nullptr), mRPhiA2(0), mRPhiB(0), mRPhiC2(0), mDyA2(0), mDyB(0), mDyC2(0), mAngleToDyA(0), mAngleToDyB(0), mAngleToDyC(0), mDebugOutput(false), mMaxEta(0.84f), mRoadZ(18.f), mZCorrCoefNRC(1.4f), mTPCVdrift(2.58f), mTPCTDriftOffset(0.f), mDebug(new GPUTRDTrackerDebug()) { //-------------------------------------------------------------------- // Default constructor diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 59e753e239cf9..274dfd6668eaf 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -167,7 +167,7 @@ class GPUTRDTracker_t : public GPUProcessor int32_t mNCandidates; // max. track hypothesis per layer int32_t mNTracks; // number of TPC tracks to be matched int32_t mNEvents; // number of processed events - int32_t mMaxThreads; // maximum number of supported threads + int32_t mMaxBackendThreads; // maximum number of supported threads // index of first tracklet for each chamber within tracklets array, last entry is total number of tracklets for given collision // the array has (kNChambers + 1) * numberOfCollisions entries // note, that for collision iColl one has to add an offset corresponding to the index of the first tracklet of iColl to the index stored in mTrackletIndexArray diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index eb9eecfe6e846..d18f04e554043 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -16,9 +16,8 @@ #include "GPUTRDGeometry.h" #include "GPUConstantMem.h" #include "GPUCommonTypeTraits.h" -#if defined(WITH_OPENMP) && !defined(GPUCA_GPUCODE) -#include "GPUReconstruction.h" -#endif + +#include "GPUReconstructionThreading.h" using namespace o2::gpu; @@ -33,10 +32,9 @@ GPUdii() void GPUTRDTrackerKernels::Thread(int32_t nBlocks, int32_t nThreads, in } } #endif - GPUCA_OPENMP(parallel for if(!trdTracker->GetRec().GetProcessingSettings().ompKernels) num_threads(trdTracker->GetRec().GetProcessingSettings().ompThreads)) - for (int32_t i = get_global_id(0); i < trdTracker->NTracks(); i += get_global_size(0)) { + GPUCA_TBB_KERNEL_LOOP(trdTracker->GetRec(), int32_t, i, trdTracker->NTracks(), { trdTracker->DoTrackingThread(i, get_global_id(0)); - } + }); } #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 2c1814a1a26a0..68385d7916234 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -219,7 +219,4 @@ if(GPUCA_EVENT_DISPLAY_QT) target_link_libraries(${targetName} PRIVATE Qt5::Widgets) endif() -if(OpenMP_CXX_FOUND) - target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) - target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) -endif() +target_link_libraries(${targetName} PRIVATE TBB::tbb) diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 918011b85ea04..e42a4fa3e4bf1 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -24,9 +24,6 @@ #ifndef _WIN32 #include "../utils/linux_helpers.h" #endif -#ifdef WITH_OPENMP -#include -#endif #include "GPUChainTracking.h" #include "GPUQA.h" diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index d6a65f212ecf3..1c4b751bbf85b 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -207,7 +207,7 @@ class GPUDisplay : public GPUDisplayInterface void DrawTrackITS(int32_t trackId, int32_t iSlice); GPUDisplay::vboList DrawFinalITS(); template - void DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); + void DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); vboList DrawGrid(const GPUTPCTracker& tracker); vboList DrawGridTRD(int32_t sector); void DoScreenshot(const char* filename, std::vector& pixels, float animateTime = -1.f); diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index 764f659d07e64..ca9fd6be01703 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -14,25 +14,20 @@ #include "GPUDisplay.h" -#ifdef WITH_OPENMP -#include -#endif #ifndef _WIN32 #include "bitmapfile.h" #endif +#include "oneapi/tbb.h" + using namespace o2::gpu; int32_t GPUDisplay::getNumThreads() { if (mChain) { - return mChain->GetProcessingSettings().ompThreads; + return mChain->GetProcessingSettings().nHostThreads; } else { -#ifdef WITH_OPENMP - return omp_get_max_threads(); -#else - return 1; -#endif + return tbb::info::default_concurrency(); } } diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 5d4628cf0eb3f..8c42cfa46abb9 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -36,9 +36,7 @@ #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "GPUTrackParamConvert.h" -#ifdef WITH_OPENMP -#include -#endif +#include using namespace o2::gpu; @@ -325,7 +323,7 @@ GPUDisplay::vboList GPUDisplay::DrawFinalITS() } template -void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) +void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) { auto& vBuf = threadBuffer.vBuf; auto& buffer = threadBuffer.buffer; @@ -698,15 +696,15 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) if (trdsector >= 9) { alpha -= 2 * CAMath::Pi(); } - for (int32_t iLy = 0; iLy < GPUTRDTracker::EGPUTRDTracker::kNLayers; ++iLy) { - for (int32_t iStack = 0; iStack < GPUTRDTracker::EGPUTRDTracker::kNStacks; ++iStack) { + for (int32_t iLy = 0; iLy < GPUTRDTracker::EGPUTRDTracker::kNLayers; iLy++) { + for (int32_t iStack = 0; iStack < GPUTRDTracker::EGPUTRDTracker::kNStacks; iStack++) { int32_t iDet = geo->GetDetector(iLy, iStack, trdsector); auto matrix = geo->GetClusterMatrix(iDet); if (!matrix) { continue; } auto pp = geo->GetPadPlane(iDet); - for (int32_t i = 0; i < pp->GetNrows(); ++i) { + for (int32_t i = 0; i < pp->GetNrows(); i++) { float xyzLoc1[3]; float xyzLoc2[3]; float xyzGlb1[3]; @@ -776,26 +774,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLFinal[iSlice].resize(mNCollissions); } } - GPUCA_OPENMP(parallel num_threads(getNumThreads())) - { -#ifdef WITH_OPENMP - int32_t numThread = omp_get_thread_num(); - int32_t numThreads = omp_get_num_threads(); -#else - int32_t numThread = 0, numThreads = 1; -#endif + int32_t numThreads = getNumThreads(); + tbb::task_arena(numThreads).execute([&] { if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - GPUCA_OPENMP(for) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { GPUTPCTracker& tracker = (GPUTPCTracker&)sliceTracker(iSlice); tracker.SetPointersDataLinks(tracker.LinkTmpMemory()); mGlDLLines[iSlice][tINITLINK] = DrawLinks(tracker, tINITLINK, true); - tracker.SetPointersDataLinks(mChain->rec()->Res(tracker.MemoryResLinks()).Ptr()); - } - GPUCA_OPENMP(barrier) + tracker.SetPointersDataLinks(mChain->rec()->Res(tracker.MemoryResLinks()).Ptr()); // clang-format off + }, tbb::simple_partitioner()); // clang-format on - GPUCA_OPENMP(for) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { const GPUTPCTracker& tracker = sliceTracker(iSlice); mGlDLLines[iSlice][tLINK] = DrawLinks(tracker, tLINK); @@ -805,30 +794,28 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLGrid[iSlice] = DrawGrid(tracker); if (iSlice < NSLICES / 2) { mGlDLGridTRD[iSlice] = DrawGridTRD(iSlice); - } - } - GPUCA_OPENMP(barrier) + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on - GPUCA_OPENMP(for) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { const GPUTPCTracker& tracker = sliceTracker(iSlice); - mGlDLLines[iSlice][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); - } - GPUCA_OPENMP(barrier) + mGlDLLines[iSlice][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off + }, tbb::simple_partitioner()); // clang-format on } - mThreadTracks[numThread].resize(mNCollissions); - for (int32_t i = 0; i < mNCollissions; i++) { - for (int32_t j = 0; j < NSLICES; j++) { - for (int32_t k = 0; k < 2; k++) { - mThreadTracks[numThread][i][j][k].clear(); + tbb::parallel_for(0, numThreads, [&](int32_t iThread) { + mThreadTracks[iThread].resize(mNCollissions); + for (int32_t i = 0; i < mNCollissions; i++) { + for (int32_t j = 0; j < NSLICES; j++) { + for (int32_t k = 0; k < 2; k++) { + mThreadTracks[iThread][i][j][k].clear(); + } } - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on if (mConfig.showTPCTracksFromO2Format) { #ifdef GPUCA_TPC_GEOMETRY_O2 uint32_t col = 0; - GPUCA_OPENMP(for) - for (uint32_t i = 0; i < mIOPtrs->nOutputTracksTPCO2; i++) { + tbb::parallel_for(0, mIOPtrs->nOutputTracksTPCO2, [&](auto i) { uint8_t sector, row; if (mIOPtrs->clustersNative) { mIOPtrs->outputTracksTPCO2[i].getCluster(mIOPtrs->outputClusRefsTPCO2, 0, *mIOPtrs->clustersNative, sector, row); @@ -838,18 +825,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (mQA && mIOPtrs->outputTracksTPCO2MC) { col = mQA->GetMCLabelCol(mIOPtrs->outputTracksTPCO2MC[i]); } - mThreadTracks[numThread][col][sector][0].emplace_back(i); - } + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][0].emplace_back(i); + }); #endif } else { - GPUCA_OPENMP(for) - for (uint32_t i = 0; i < mIOPtrs->nMergedTracks; i++) { + tbb::parallel_for(0, mIOPtrs->nMergedTracks, [&](auto i) { const GPUTPCGMMergedTrack* track = &mIOPtrs->mergedTracks[i]; if (track->NClusters() == 0) { - continue; + return; } if (mCfgH.hideRejectedTracks && !track->OK()) { - continue; + return; } int32_t slice = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + track->NClusters() - 1].slice; uint32_t col = 0; @@ -863,18 +849,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } #endif } - mThreadTracks[numThread][col][slice][0].emplace_back(i); - } + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][0].emplace_back(i); + }); } for (uint32_t col = 0; col < mIOPtrs->nMCInfosTPCCol; col++) { - GPUCA_OPENMP(for) - for (uint32_t i = mIOPtrs->mcInfosTPCCol[col].first; i < mIOPtrs->mcInfosTPCCol[col].first + mIOPtrs->mcInfosTPCCol[col].num; i++) { + tbb::parallel_for(mIOPtrs->mcInfosTPCCol[col].first, mIOPtrs->mcInfosTPCCol[col].first + mIOPtrs->mcInfosTPCCol[col].num, [&](uint32_t i) { const GPUTPCMCInfo& mc = mIOPtrs->mcInfosTPC[i]; if (mc.charge == 0.f) { - continue; + return; } if (mc.pid < 0) { - continue; + return; } float alpha = atan2f(mc.y, mc.x); @@ -885,18 +870,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (mc.z < 0) { slice += 18; } - mThreadTracks[numThread][col][slice][1].emplace_back(i); - } + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][1].emplace_back(i); + }); } - GPUCA_OPENMP(barrier) GPUTPCGMPropagator prop; prop.SetMaxSinPhi(.999); prop.SetMaterialTPC(); prop.SetPolynomialField(&mParam->polynomialField); - GPUCA_OPENMP(for) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { + int32_t numThread = GPUReconstruction::getHostThreadIndex(); for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { mThreadBuffers[numThread].clear(); for (int32_t iSet = 0; iSet < numThreads; iSet++) { @@ -915,19 +899,17 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } list[i] = vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice); } - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on - GPUCA_OPENMP(barrier) - GPUCA_OPENMP(for) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { mGlDLPoints[iSlice][i][iCol] = DrawClusters(iSlice, i, iCol); } - } - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on + }); // End omp parallel mGlDLFinalITS = DrawFinalITS(); diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index aaa03b8a24d18..f53fa185029f8 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -33,6 +33,8 @@ #include "ITSMFTBase/DPLAlpideParam.h" #endif +#include + using namespace o2::gpu; void GPUDisplay::DrawGLScene_updateEventData() @@ -126,103 +128,107 @@ void GPUDisplay::DrawGLScene_updateEventData() } mUpdateTrackFilter = false; - mMaxClusterZ = 0; - GPUCA_OPENMP(parallel for num_threads(getNumThreads()) reduction(max : mMaxClusterZ)) - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - int32_t row = 0; - uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] - : 0; - for (uint32_t i = 0; i < nCls; i++) { - int32_t cid; - if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; - cid = cl.id; - row = cl.row; - } else { - cid = mIOPtrs->clustersNative->clusterOffset[iSlice][0] + i; - while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSlice][row + 1] <= (uint32_t)cid) { - row++; + mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { + for (int32_t iSlice = r.begin(); iSlice < r.end(); iSlice++) { + int32_t row = 0; + uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] : 0); + for (uint32_t i = 0; i < nCls; i++) { + int32_t cid; + if (mParam->par.earlyTpcTransform) { + const auto& cl = mIOPtrs->clusterData[iSlice][i]; + cid = cl.id; + row = cl.row; + } else { + cid = mIOPtrs->clustersNative->clusterOffset[iSlice][0] + i; + while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSlice][row + 1] <= (uint32_t)cid) { + row++; + } } - } - if (cid >= mNMaxClusters) { - throw std::runtime_error("Cluster Buffer Size exceeded"); - } - float4* ptr = &mGlobalPos[cid]; - if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; - mParam->Slice2Global(iSlice, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); - } else { - float x, y, z; - const auto& cln = mIOPtrs->clustersNative->clusters[iSlice][0][i]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSlice, row, cln.getPad(), cln.getTime(), x, y, z); - if (mCfgH.clustersOnNominalRow) { - x = mParam->tpcGeometry.Row2X(row); + if (cid >= mNMaxClusters) { + throw std::runtime_error("Cluster Buffer Size exceeded"); + } + float4* ptr = &mGlobalPos[cid]; + if (mParam->par.earlyTpcTransform) { + const auto& cl = mIOPtrs->clusterData[iSlice][i]; + mParam->Slice2Global(iSlice, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); + } else { + float x, y, z; + const auto& cln = mIOPtrs->clustersNative->clusters[iSlice][0][i]; + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSlice, row, cln.getPad(), cln.getTime(), x, y, z); + if (mCfgH.clustersOnNominalRow) { + x = mParam->tpcGeometry.Row2X(row); + } + mParam->Slice2Global(iSlice, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); } - mParam->Slice2Global(iSlice, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); + + if (fabsf(ptr->z) > maxClusterZ) { + maxClusterZ = fabsf(ptr->z); + } + ptr->z += iSlice < 18 ? mCfgH.zAdd : -mCfgH.zAdd; + ptr->x *= GL_SCALE_FACTOR; + ptr->y *= GL_SCALE_FACTOR; + ptr->z *= GL_SCALE_FACTOR; + ptr->w = tCLUSTER; } + } + return maxClusterZ; // clang-format off + }, [](const float a, const float b) { return std::max(a, b); }, tbb::simple_partitioner()); // clang-format on - if (fabsf(ptr->z) > mMaxClusterZ) { - mMaxClusterZ = fabsf(ptr->z); + mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentSpacePointsTRD, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { + int32_t trdTriggerRecord = -1; + float trdZoffset = 0; + for (int i = r.begin(); i < r.end(); i++) { + while (mParam->par.continuousTracking && trdTriggerRecord < (int32_t)mIOPtrs->nTRDTriggerRecords - 1 && mIOPtrs->trdTrackletIdxFirst[trdTriggerRecord + 1] <= i) { + trdTriggerRecord++; // This requires to go through the data in order I believe + float trdTime = mIOPtrs->trdTriggerTimes[trdTriggerRecord] * 1e3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; + trdZoffset = fabsf(mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(0, trdTime, mParam->continuousMaxTimeBin)); + } + const auto& sp = mIOPtrs->trdSpacePoints[i]; + int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); + float4* ptr = &mGlobalPosTRD[i]; + mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; + if (fabsf(ptr->z) > maxClusterZ) { + maxClusterZ = fabsf(ptr->z); } - ptr->z += iSlice < 18 ? mCfgH.zAdd : -mCfgH.zAdd; ptr->x *= GL_SCALE_FACTOR; ptr->y *= GL_SCALE_FACTOR; ptr->z *= GL_SCALE_FACTOR; - ptr->w = tCLUSTER; - } - } - - int32_t trdTriggerRecord = -1; - float trdZoffset = 0; - GPUCA_OPENMP(parallel for num_threads(getNumThreads()) reduction(max : mMaxClusterZ) firstprivate(trdTriggerRecord, trdZoffset)) - for (int32_t i = 0; i < mCurrentSpacePointsTRD; i++) { - while (mParam->par.continuousTracking && trdTriggerRecord < (int32_t)mIOPtrs->nTRDTriggerRecords - 1 && mIOPtrs->trdTrackletIdxFirst[trdTriggerRecord + 1] <= i) { - trdTriggerRecord++; - float trdTime = mIOPtrs->trdTriggerTimes[trdTriggerRecord] * 1e3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; - trdZoffset = fabsf(mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(0, trdTime, mParam->continuousMaxTimeBin)); - } - const auto& sp = mIOPtrs->trdSpacePoints[i]; - int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); - float4* ptr = &mGlobalPosTRD[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); - ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; - if (fabsf(ptr->z) > mMaxClusterZ) { - mMaxClusterZ = fabsf(ptr->z); - } - ptr->x *= GL_SCALE_FACTOR; - ptr->y *= GL_SCALE_FACTOR; - ptr->z *= GL_SCALE_FACTOR; - ptr->w = tTRDCLUSTER; - ptr = &mGlobalPosTRD2[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); - ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; - if (fabsf(ptr->z) > mMaxClusterZ) { - mMaxClusterZ = fabsf(ptr->z); + ptr->w = tTRDCLUSTER; + ptr = &mGlobalPosTRD2[i]; + mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; + if (fabsf(ptr->z) > maxClusterZ) { + maxClusterZ = fabsf(ptr->z); + } + ptr->x *= GL_SCALE_FACTOR; + ptr->y *= GL_SCALE_FACTOR; + ptr->z *= GL_SCALE_FACTOR; + ptr->w = tTRDCLUSTER; } - ptr->x *= GL_SCALE_FACTOR; - ptr->y *= GL_SCALE_FACTOR; - ptr->z *= GL_SCALE_FACTOR; - ptr->w = tTRDCLUSTER; - } + return maxClusterZ; // clang-format off + }, [](const float a, const float b) { return std::max(a, b); }, tbb::static_partitioner()); // clang-format on - GPUCA_OPENMP(parallel for num_threads(getNumThreads()) reduction(max : mMaxClusterZ)) - for (int32_t i = 0; i < mCurrentClustersTOF; i++) { - float4* ptr = &mGlobalPosTOF[i]; - mParam->Slice2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); - float ZOffset = 0; - if (mParam->par.continuousTracking) { - float tofTime = mIOPtrs->tofClusters[i].getTime() * 1e-3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; - ZOffset = fabsf(mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(0, tofTime, mParam->continuousMaxTimeBin)); - ptr->z += ptr->z > 0 ? ZOffset : -ZOffset; - } - if (fabsf(ptr->z) > mMaxClusterZ) { - mMaxClusterZ = fabsf(ptr->z); + mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentClustersTOF, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { + for (int32_t i = r.begin(); i < r.end(); i++) { + float4* ptr = &mGlobalPosTOF[i]; + mParam->Slice2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); + float ZOffset = 0; + if (mParam->par.continuousTracking) { + float tofTime = mIOPtrs->tofClusters[i].getTime() * 1e-3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; + ZOffset = fabsf(mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(0, tofTime, mParam->continuousMaxTimeBin)); + ptr->z += ptr->z > 0 ? ZOffset : -ZOffset; + } + if (fabsf(ptr->z) > maxClusterZ) { + maxClusterZ = fabsf(ptr->z); + } + ptr->x *= GL_SCALE_FACTOR; + ptr->y *= GL_SCALE_FACTOR; + ptr->z *= GL_SCALE_FACTOR; + ptr->w = tTOFCLUSTER; } - ptr->x *= GL_SCALE_FACTOR; - ptr->y *= GL_SCALE_FACTOR; - ptr->z *= GL_SCALE_FACTOR; - ptr->w = tTOFCLUSTER; - } + return maxClusterZ; // clang-format off + }, [](const float a, const float b) { return std::max(a, b); }); // clang-format on if (mCurrentClustersITS) { float itsROFhalfLen = 0; diff --git a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h index 63673505f4732..23d382466ba22 100644 --- a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h +++ b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h @@ -458,7 +458,7 @@ const float positionScale = 100.0f; void main() { vec3 position = gl_in[0].gl_Position.xyz; - for(uint32_t i = 0; i < field_config.StepCount; ++i) { + for(uint32_t i = 0; i < field_config.StepCount; i++) { gl_Position = um.ModelViewProj * vec4(position/positionScale, 1.0f); EmitVertex(); const vec3 b_vec = Field(position); diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 70a093c7f1de7..015159fee24d7 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -76,6 +76,8 @@ #include "utils/qconfig.h" #include "utils/timer.h" +#include + using namespace o2::gpu; #ifdef GPUCA_MERGER_BY_MC_LABEL @@ -919,49 +921,48 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } #endif } else { - auto acc = GPUTPCTrkLbl(GetClusterLabels(), 1.f - mConfig.recThreshold); -#if QA_DEBUG == 0 - GPUCA_OPENMP(parallel for firstprivate(acc)) -#endif - for (uint32_t i = 0; i < nReconstructedTracks; i++) { - acc.reset(); - int32_t nClusters = 0; - const GPUTPCGMMergedTrack& track = mTracking->mIOPtrs.mergedTracks[i]; - std::vector labels; - for (uint32_t k = 0; k < track.NClusters(); k++) { - if (mTracking->mIOPtrs.mergedTrackHits[track.FirstClusterRef() + k].state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - nClusters++; - uint32_t hitId = mTracking->mIOPtrs.mergedTrackHits[track.FirstClusterRef() + k].num; - if (hitId >= GetNMCLabels()) { - GPUError("Invalid hit id %u > %d (nClusters %d)", hitId, GetNMCLabels(), mTracking->mIOPtrs.clustersNative ? mTracking->mIOPtrs.clustersNative->nClustersTotal : 0); - throw std::runtime_error("qa error"); - } - acc.addLabel(hitId); - for (int32_t j = 0; j < GetMCLabelNID(hitId); j++) { - if (GetMCLabelID(hitId, j) >= (int32_t)GetNMCTracks(GetMCLabelCol(hitId, j))) { - GPUError("Invalid label %d > %d (hit %d, label %d, col %d)", GetMCLabelID(hitId, j), GetNMCTracks(GetMCLabelCol(hitId, j)), hitId, j, (int32_t)GetMCLabelCol(hitId, j)); + tbb::parallel_for(tbb::blocked_range(0, nReconstructedTracks, (QA_DEBUG == 0) ? 32 : nReconstructedTracks), [&](const tbb::blocked_range& range) { + auto acc = GPUTPCTrkLbl(GetClusterLabels(), 1.f - mConfig.recThreshold); + for (auto i = range.begin(); i < range.end(); i++) { + acc.reset(); + int32_t nClusters = 0; + const GPUTPCGMMergedTrack& track = mTracking->mIOPtrs.mergedTracks[i]; + std::vector labels; + for (uint32_t k = 0; k < track.NClusters(); k++) { + if (mTracking->mIOPtrs.mergedTrackHits[track.FirstClusterRef() + k].state & GPUTPCGMMergedTrackHit::flagReject) { + continue; + } + nClusters++; + uint32_t hitId = mTracking->mIOPtrs.mergedTrackHits[track.FirstClusterRef() + k].num; + if (hitId >= GetNMCLabels()) { + GPUError("Invalid hit id %u > %d (nClusters %d)", hitId, GetNMCLabels(), mTracking->mIOPtrs.clustersNative ? mTracking->mIOPtrs.clustersNative->nClustersTotal : 0); throw std::runtime_error("qa error"); } - if (GetMCLabelID(hitId, j) >= 0) { - if (QA_DEBUG >= 3 && track.OK()) { - GPUInfo("Track %d Cluster %u Label %d: %d (%f)", i, k, j, GetMCLabelID(hitId, j), GetMCLabelWeight(hitId, j)); + acc.addLabel(hitId); + for (int32_t j = 0; j < GetMCLabelNID(hitId); j++) { + if (GetMCLabelID(hitId, j) >= (int32_t)GetNMCTracks(GetMCLabelCol(hitId, j))) { + GPUError("Invalid label %d > %d (hit %d, label %d, col %d)", GetMCLabelID(hitId, j), GetNMCTracks(GetMCLabelCol(hitId, j)), hitId, j, (int32_t)GetMCLabelCol(hitId, j)); + throw std::runtime_error("qa error"); + } + if (GetMCLabelID(hitId, j) >= 0) { + if (QA_DEBUG >= 3 && track.OK()) { + GPUInfo("Track %d Cluster %u Label %d: %d (%f)", i, k, j, GetMCLabelID(hitId, j), GetMCLabelWeight(hitId, j)); + } } } } - } - float maxweight, sumweight; - int32_t maxcount; - auto maxLabel = acc.computeLabel(&maxweight, &sumweight, &maxcount); - mTrackMCLabels[i] = maxLabel; - if (QA_DEBUG && track.OK() && GetNMCTracks(maxLabel) > (uint32_t)maxLabel.getTrackID()) { - const mcInfo_t& mc = GetMCTrack(maxLabel); - GPUInfo("Track %d label %d (fake %d) weight %f clusters %d (fitted %d) (%f%% %f%%) Pt %f", i, maxLabel.getTrackID(), (int32_t)(maxLabel.isFake()), maxweight, nClusters, track.NClustersFitted(), 100.f * maxweight / sumweight, 100.f * (float)maxcount / (float)nClusters, - std::sqrt(mc.pX * mc.pX + mc.pY * mc.pY)); + float maxweight, sumweight; + int32_t maxcount; + auto maxLabel = acc.computeLabel(&maxweight, &sumweight, &maxcount); + mTrackMCLabels[i] = maxLabel; + if (QA_DEBUG && track.OK() && GetNMCTracks(maxLabel) > (uint32_t)maxLabel.getTrackID()) { + const mcInfo_t& mc = GetMCTrack(maxLabel); + GPUInfo("Track %d label %d (fake %d) weight %f clusters %d (fitted %d) (%f%% %f%%) Pt %f", i, maxLabel.getTrackID(), (int32_t)(maxLabel.isFake()), maxweight, nClusters, track.NClustersFitted(), 100.f * maxweight / sumweight, 100.f * (float)maxcount / (float)nClusters, + std::sqrt(mc.pX * mc.pX + mc.pY * mc.pY)); + } } - } + }); } if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { GPUInfo("QA Time: Assign Track Labels:\t\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); @@ -1135,8 +1136,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } // Compute MC Track Parameters for MC Tracks - GPUCA_OPENMP(parallel for) - for (uint32_t iCol = 0; iCol < GetNMCCollissions(); iCol++) { + tbb::parallel_for(0, GetNMCCollissions(), [&](auto iCol) { for (uint32_t i = 0; i < GetNMCTracks(iCol); i++) { const mcInfo_t& info = GetMCTrack(i, iCol); additionalMCParameters& mc2 = mMCParam[iCol][i]; @@ -1153,8 +1153,8 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx std::vector& effBuffer = mcEffBuffer[mNEvents - 1]; effBuffer[i] = mRecTracks[iCol][i] * 1000 + mFakeTracks[iCol][i]; } - } - } + } // clang-format off + }, tbb::simple_partitioner()); // clang-format on if (QA_TIMING || (mTracking && mTracking->GetProcessingSettings().debugLevel >= 3)) { GPUInfo("QA Time: Compute track mc parameters:\t%6.0f us", timer.GetCurrentElapsedTime(true) * 1e6); } From 61e51e96bbc5970365684102fbc76ddefbb67f4f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 12:56:01 +0100 Subject: [PATCH 0103/1914] GPU: Remove some unused code --- GPU/GPUTracking/Base/GPUReconstruction.h | 1 - GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 7 ------- GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx | 1 - 3 files changed, 9 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 1fdfabb11211a..26e9d61b65efc 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -348,7 +348,6 @@ class GPUReconstruction std::vector>* mOutputErrorCodes = nullptr; int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU - int32_t mThreadId = -1; // Thread ID that is valid for the local CUDA context int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events int32_t mNStreams = 1; // Number of parallel GPU streams int32_t mMaxHostThreads = 0; // Maximum number of OMP threads diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index b5f9d591fd9a6..4db49d62f21cc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -211,7 +211,6 @@ int32_t GPUReconstructionCPU::InitDevice() if (mProcessingSettings.inKernelParallel) { mBlockCount = mMaxHostThreads; } - mThreadId = GetThread(); mProcShadow.mProcessorsProc = processors(); return 0; } @@ -241,12 +240,6 @@ int32_t GPUReconstructionCPU::RunChains() return retVal; } } else { - if (mThreadId != GetThread()) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Thread changed, migrating context, Previous Thread: %d, New Thread: %d", mThreadId, GetThread()); - } - mThreadId = GetThread(); - } if (mSlaves.size() || mMaster) { WriteConstantParams(); // Reinitialize // TODO: Get this in sync with GPUChainTracking::DoQueuedUpdates, and consider the doublePipeline } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 91715fab4f668..661ff81fbbd07 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -102,7 +102,6 @@ int32_t GPUReconstructionDeviceBase::InitDevice() GPUError("Too many straems requested %d > %d\n", mProcessingSettings.nStreams, GPUCA_MAX_STREAMS); return (1); } - mThreadId = GetThread(); void* semLock = nullptr; if (mProcessingSettings.globalInitMutex && GetGlobalLock(semLock)) { From fe5d5e59a0b5b8457761744895b105818b145971 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 14:11:01 +0100 Subject: [PATCH 0104/1914] GPU: Refactor some code, and keep only CPU-kernel realted code in GPUReconstructionCPU --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 19 +- GPU/GPUTracking/Base/GPUReconstruction.h | 181 +-------- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 73 +--- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 106 +---- GPU/GPUTracking/Base/GPUReconstructionIO.h | 183 +++++++++ .../Base/GPUReconstructionKernels.h | 15 +- .../Base/GPUReconstructionProcessing.cxx | 113 ++++++ .../Base/GPUReconstructionProcessing.h | 187 +++++++++ .../Base/cuda/GPUReconstructionCUDA.cu | 4 +- .../Base/cuda/GPUReconstructionCUDA.h | 2 +- GPU/GPUTracking/CMakeLists.txt | 2 + GPU/GPUTracking/Global/GPUChain.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 378 +++++++++--------- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 1 + .../Global/GPUChainTrackingSliceTracker.cxx | 182 +++++---- 15 files changed, 780 insertions(+), 668 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionIO.h create mode 100644 GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx create mode 100644 GPU/GPUTracking/Base/GPUReconstructionProcessing.h diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 481494f268494..4650b795da172 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -26,6 +26,7 @@ #include "GPUReconstruction.h" #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" +#include "GPUReconstructionIO.h" #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" @@ -118,15 +119,6 @@ void GPUReconstruction::GetITSTraits(std::unique_ptr* tr } } -void GPUReconstruction::SetNActiveThreads(int32_t n) -{ - mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); - mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); - } -} - int32_t GPUReconstruction::getHostThreadIndex() { return std::max(0, tbb::this_task_arena::current_thread_index()); @@ -327,12 +319,12 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } else { mProcessingSettings.autoAdjustHostThreads = false; } - mMaxHostThreads = mActiveHostKernelThreads = mProcessingSettings.nHostThreads; + mMaxHostThreads = mProcessingSettings.nHostThreads; if (mMaster == nullptr) { mThreading = std::make_shared(); mThreading->control = std::make_unique(tbb::global_control::max_allowed_parallelism, mMaxHostThreads); mThreading->allThreads = std::make_unique(mMaxHostThreads); - mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); + mThreading->activeThreads = std::make_unique(mMaxHostThreads); } else { mThreading = mMaster->mThreading; } @@ -1181,8 +1173,3 @@ void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } - -GPUReconstruction::GPUThreadContext::GPUThreadContext() = default; -GPUReconstruction::GPUThreadContext::~GPUThreadContext() = default; - -std::unique_ptr GPUReconstruction::GetThreadContext() { return std::unique_ptr(new GPUThreadContext); } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 26e9d61b65efc..2834e36c6a569 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -57,12 +57,11 @@ class GPUROOTDumpCore; namespace gpu_reconstruction_kernels { struct deviceEvent; +class threadContext; } class GPUReconstruction { - friend class GPUChain; - protected: class LibraryLoader; // These must be the first members to ensure correct destructor order! std::shared_ptr mMyLib = nullptr; @@ -207,8 +206,6 @@ class GPUReconstruction void SetOutputControl(void* ptr, size_t size); void SetInputControl(void* ptr, size_t size); GPUOutputControl& OutputControl() { return mOutputControl; } - int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; } - void SetNActiveThreads(int32_t n); int32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } @@ -235,8 +232,10 @@ class GPUReconstruction double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + // Threading std::shared_ptr mThreading; static int32_t getHostThreadIndex(); + int32_t GetMaxBackendThreads() const { return mMaxBackendThreads; } protected: void AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool); @@ -258,13 +257,7 @@ class GPUReconstruction virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; // Management for GPU thread contexts - class GPUThreadContext - { - public: - GPUThreadContext(); - virtual ~GPUThreadContext(); - }; - virtual std::unique_ptr GetThreadContext(); + virtual std::unique_ptr GetThreadContext() = 0; // Private helpers for library loading static std::shared_ptr* GetLibraryInstance(DeviceType type, bool verbose); @@ -347,11 +340,10 @@ class GPUReconstruction std::shared_ptr mROOTDump; std::vector>* mOutputErrorCodes = nullptr; - int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU - int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events - int32_t mNStreams = 1; // Number of parallel GPU streams - int32_t mMaxHostThreads = 0; // Maximum number of OMP threads - int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels + int32_t mMaxBackendThreads = 0; // Maximum number of threads that may be running, on CPU or GPU + int32_t mGPUStuck = 0; // Marks that the GPU is stuck, skip future events + int32_t mNStreams = 1; // Number of parallel GPU streams + int32_t mMaxHostThreads = 0; // Maximum number of OMP threads // Management for GPUProcessors struct ProcessorData { @@ -491,163 +483,6 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate) } } -template -inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) -{ - int32_t count = getNIOTypeMultiplicity(type); - uint32_t numTotal = 0; - for (int32_t i = 0; i < count; i++) { - numTotal += num[i]; - } - if (numTotal == 0) { - return 0; - } - fwrite(&type, sizeof(type), 1, fp); - for (int32_t i = 0; i < count; i++) { - fwrite(&num[i], sizeof(num[i]), 1, fp); - if (num[i]) { - fwrite(entries[i], sizeof(*entries[i]), num[i], fp); - } - } - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); - } - return numTotal; -} - -template -inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs) -{ - if (feof(fp)) { - return 0; - } - InOutPointerType inType; - size_t r, pos = ftell(fp); - r = fread(&inType, sizeof(inType), 1, fp); - if (r != 1 || inType != type) { - fseek(fp, pos, SEEK_SET); - return 0; - } - - int32_t count = getNIOTypeMultiplicity(type); - size_t numTotal = 0; - for (int32_t i = 0; i < count; i++) { - r = fread(&num[i], sizeof(num[i]), 1, fp); - T* m = AllocateIOMemoryHelper(num[i], entries[i], mem[i]); - if (nonConstPtrs) { - nonConstPtrs[i] = m; - } - if (num[i]) { - r = fread(m, sizeof(*entries[i]), num[i], fp); - } - numTotal += num[i]; - } - (void)r; - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); - } - return numTotal; -} - -template -inline void GPUReconstruction::DumpFlatObjectToFile(const T* obj, const char* file) -{ - FILE* fp = fopen(file, "w+b"); - if (fp == nullptr) { - return; - } - size_t size[2] = {sizeof(*obj), obj->getFlatBufferSize()}; - fwrite(size, sizeof(size[0]), 2, fp); - fwrite(obj, 1, size[0], fp); - fwrite(obj->getFlatBufferPtr(), 1, size[1], fp); - fclose(fp); -} - -template -inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* file) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return nullptr; - } - size_t size[2] = {0}, r; - r = fread(size, sizeof(size[0]), 2, fp); - if (r == 0 || size[0] != sizeof(T)) { - fclose(fp); - GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size[0], (int64_t)sizeof(T)); - throw std::runtime_error("invalid size"); - } - std::unique_ptr retVal(new T); - retVal->destroy(); - char* buf = new char[size[1]]; // Not deleted as ownership is transferred to FlatObject - r = fread((void*)retVal.get(), 1, size[0], fp); - r = fread(buf, 1, size[1], fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - retVal->clearInternalBufferPtr(); - retVal->setActualBufferAddress(buf); - retVal->adoptInternalBuffer(buf); - return retVal; -} - -template -inline void GPUReconstruction::DumpStructToFile(const T* obj, const char* file) -{ - FILE* fp = fopen(file, "w+b"); - if (fp == nullptr) { - return; - } - size_t size = sizeof(*obj); - fwrite(&size, sizeof(size), 1, fp); - fwrite(obj, 1, size, fp); - fclose(fp); -} - -template -inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return nullptr; - } - size_t size, r; - r = fread(&size, sizeof(size), 1, fp); - if (r == 0 || size != sizeof(T)) { - fclose(fp); - GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size, (int64_t)sizeof(T)); - throw std::runtime_error("invalid size"); - } - std::unique_ptr newObj(new T); - r = fread(newObj.get(), 1, size, fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - return newObj; -} - -template -inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) -{ - FILE* fp = fopen(file, "rb"); - if (fp == nullptr) { - return 1; - } - size_t size, r; - r = fread(&size, sizeof(size), 1, fp); - if (r == 0) { - fclose(fp); - return 1; - } - r = fread(obj, 1, size, fp); - fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Read %ld bytes from %s", (int64_t)r, file); - } - return 0; -} } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 4db49d62f21cc..19b33f4b03a17 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -54,23 +54,6 @@ GPUReconstructionCPU::~GPUReconstructionCPU() Exit(); // Needs to be identical to GPU backend bahavior in order to avoid calling abstract methods later in the destructor } -int32_t GPUReconstructionCPUBackend::getNKernelHostThreads(bool splitCores) -{ - int32_t nThreads = 0; - if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { - if (splitCores) { - nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; - nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; - } else { - nThreads = mMaxHostThreads; - } - nThreads = std::max(1, nThreads); - } else { - nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; - } - return nThreads; -} - template inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { @@ -198,6 +181,8 @@ int32_t GPUReconstructionCPU::GetThread() int32_t GPUReconstructionCPU::InitDevice() { + mActiveHostKernelThreads = mMaxHostThreads; + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { if (mDeviceMemorySize > mHostMemorySize) { @@ -339,60 +324,6 @@ void GPUReconstructionCPU::ResetDeviceProcessorTypes() } } -static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation - -GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) -{ - while (timerFlag.test_and_set()) { - } - if (mTimers.size() <= id) { - mTimers.resize(id + 1); - } - if (mTimers[id] == nullptr) { - if (J >= 0) { - name += std::to_string(J); - } - mTimers[id].reset(new timerMeta{std::unique_ptr{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0}); - } else { - mTimers[id]->count++; - } - timerMeta* retVal = mTimers[id].get(); - timerFlag.clear(); - return retVal; -} - -GPUReconstructionCPU::timerMeta* GPUReconstructionCPU::getTimerById(uint32_t id, bool increment) -{ - timerMeta* retVal = nullptr; - while (timerFlag.test_and_set()) { - } - if (mTimers.size() > id && mTimers[id]) { - retVal = mTimers[id].get(); - retVal->count += increment; - } - timerFlag.clear(); - return retVal; -} - -uint32_t GPUReconstructionCPU::getNextTimerId() -{ - static std::atomic id{0}; - return id.fetch_add(1); -} - -uint32_t GPUReconstructionCPU::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) -{ - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; - } else { - mNActiveThreadsOuterLoop = 1; - } - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); - } - return mNActiveThreadsOuterLoop; -} - void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream) { param().occupancyMap = mapHost; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index f82f481df6a63..7d50a564fedf8 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -15,10 +15,9 @@ #ifndef GPURECONSTRUCTIONICPU_H #define GPURECONSTRUCTIONICPU_H -#include "GPUReconstruction.h" +#include "GPUReconstructionProcessing.h" #include "GPUConstantMem.h" #include -#include "utils/timer.h" #include #include "GPUGeneralKernels.h" @@ -30,21 +29,19 @@ namespace o2 namespace gpu { -class GPUReconstructionCPUBackend : public GPUReconstruction +class GPUReconstructionCPUBackend : public GPUReconstructionProcessing { public: ~GPUReconstructionCPUBackend() override = default; protected: - GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {} template int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); template int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); - uint32_t mNActiveThreadsOuterLoop = 1; - int32_t getNKernelHostThreads(bool splitCores); }; class GPUReconstructionCPU : public GPUReconstructionKernels @@ -65,23 +62,12 @@ class GPUReconstructionCPU : public GPUReconstructionKernels()); } - template - constexpr static const char* GetKernelName(); - virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } void ResetDeviceProcessorTypes(); - template - void AddGPUEvents(T*& events); int32_t RunChains() override; - HighResTimer& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)].timerTotal; } - HighResTimer& getGeneralStepTimer(GeneralStep step) { return mTimersGeneralSteps[getGeneralStepNum(step)]; } - - void SetNActiveThreadsOuterLoop(uint32_t f) { mNActiveThreadsOuterLoop = f; } - uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max); - void UpdateParamOccupancyMap(const uint32_t* mapHost, const uint32_t* mapGPU, uint32_t occupancyTotal, int32_t stream = -1); protected: @@ -142,43 +128,8 @@ class GPUReconstructionCPU : public GPUReconstructionKernels timer; - std::string name; - int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams) - int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer - uint32_t count; // How often was the timer queried - RecoStep step; // Which RecoStep is this - size_t memSize; // Memory size for memory bandwidth computation - }; - - struct RecoStepTimerMeta { - HighResTimer timerToGPU; - HighResTimer timerToHost; - HighResTimer timerTotal; - size_t bytesToGPU = 0; - size_t bytesToHost = 0; - uint32_t countToGPU = 0; - uint32_t countToHost = 0; - }; - - HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]; - - std::vector> mTimers; - RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]; - HighResTimer timerTotal; - template - HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true); - template - HighResTimer& getTimer(const char* name, int32_t num = -1); - - std::vector> mEvents; - private: size_t TransferMemoryResourcesHelper(GPUProcessor* proc, int32_t stream, bool all, bool toGPU); - uint32_t getNextTimerId(); - timerMeta* getTimerById(uint32_t id, bool increment = true); - timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); }; template @@ -246,57 +197,6 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args return retVal; } -#define GPUCA_KRNL(x_class, ...) \ - template <> \ - constexpr const char* GPUReconstructionCPU::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - -template -inline void GPUReconstructionCPU::AddGPUEvents(T*& events) -{ - mEvents.emplace_back(std::vector(sizeof(T) / sizeof(deviceEvent))); - events = (T*)mEvents.back().data(); -} - -template -HighResTimer& GPUReconstructionCPU::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment) -{ - static int32_t id = getNextTimerId(); - timerMeta* timer = getTimerById(id, increment); - if (timer == nullptr) { - timer = insertTimer(id, GetKernelName(), -1, NSLICES, 0, step); - } - if (addMemorySize) { - timer->memSize += addMemorySize; - } - if (num < 0 || num >= timer->num) { - throw std::runtime_error("Invalid timer requested"); - } - return timer->timer[num]; -} - -template -HighResTimer& GPUReconstructionCPU::getTimer(const char* name, int32_t num) -{ - static int32_t id = getNextTimerId(); - timerMeta* timer = getTimerById(id); - if (timer == nullptr) { - int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); - timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); - } - if (num == -1) { - num = getHostThreadIndex(); - } - if (num < 0 || num >= timer->num) { - throw std::runtime_error("Invalid timer requested"); - } - return timer->timer[num]; -} - } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Base/GPUReconstructionIO.h b/GPU/GPUTracking/Base/GPUReconstructionIO.h new file mode 100644 index 0000000000000..2208c15846e09 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionIO.h @@ -0,0 +1,183 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionIO.h +/// \author David Rohr + +#if !defined(GPURECONSTRUCTIONIO_H) +#define GPURECONSTRUCTIONIO_H + +#include "GPUReconstruction.h" + +namespace o2::gpu +{ + +template +inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) +{ + int32_t count = getNIOTypeMultiplicity(type); + uint32_t numTotal = 0; + for (int32_t i = 0; i < count; i++) { + numTotal += num[i]; + } + if (numTotal == 0) { + return 0; + } + fwrite(&type, sizeof(type), 1, fp); + for (int32_t i = 0; i < count; i++) { + fwrite(&num[i], sizeof(num[i]), 1, fp); + if (num[i]) { + fwrite(entries[i], sizeof(*entries[i]), num[i], fp); + } + } + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); + } + return numTotal; +} + +template +inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs) +{ + if (feof(fp)) { + return 0; + } + InOutPointerType inType; + size_t r, pos = ftell(fp); + r = fread(&inType, sizeof(inType), 1, fp); + if (r != 1 || inType != type) { + fseek(fp, pos, SEEK_SET); + return 0; + } + + int32_t count = getNIOTypeMultiplicity(type); + size_t numTotal = 0; + for (int32_t i = 0; i < count; i++) { + r = fread(&num[i], sizeof(num[i]), 1, fp); + T* m = AllocateIOMemoryHelper(num[i], entries[i], mem[i]); + if (nonConstPtrs) { + nonConstPtrs[i] = m; + } + if (num[i]) { + r = fread(m, sizeof(*entries[i]), num[i], fp); + } + numTotal += num[i]; + } + (void)r; + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); + } + return numTotal; +} + +template +inline void GPUReconstruction::DumpFlatObjectToFile(const T* obj, const char* file) +{ + FILE* fp = fopen(file, "w+b"); + if (fp == nullptr) { + return; + } + size_t size[2] = {sizeof(*obj), obj->getFlatBufferSize()}; + fwrite(size, sizeof(size[0]), 2, fp); + fwrite(obj, 1, size[0], fp); + fwrite(obj->getFlatBufferPtr(), 1, size[1], fp); + fclose(fp); +} + +template +inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* file) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return nullptr; + } + size_t size[2] = {0}, r; + r = fread(size, sizeof(size[0]), 2, fp); + if (r == 0 || size[0] != sizeof(T)) { + fclose(fp); + GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size[0], (int64_t)sizeof(T)); + throw std::runtime_error("invalid size"); + } + std::unique_ptr retVal(new T); + retVal->destroy(); + char* buf = new char[size[1]]; // Not deleted as ownership is transferred to FlatObject + r = fread((void*)retVal.get(), 1, size[0], fp); + r = fread(buf, 1, size[1], fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + retVal->clearInternalBufferPtr(); + retVal->setActualBufferAddress(buf); + retVal->adoptInternalBuffer(buf); + return retVal; +} + +template +inline void GPUReconstruction::DumpStructToFile(const T* obj, const char* file) +{ + FILE* fp = fopen(file, "w+b"); + if (fp == nullptr) { + return; + } + size_t size = sizeof(*obj); + fwrite(&size, sizeof(size), 1, fp); + fwrite(obj, 1, size, fp); + fclose(fp); +} + +template +inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return nullptr; + } + size_t size, r; + r = fread(&size, sizeof(size), 1, fp); + if (r == 0 || size != sizeof(T)) { + fclose(fp); + GPUError("ERROR reading %s, invalid size: %ld (%ld expected)", file, (int64_t)size, (int64_t)sizeof(T)); + throw std::runtime_error("invalid size"); + } + std::unique_ptr newObj(new T); + r = fread(newObj.get(), 1, size, fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + return newObj; +} + +template +inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) +{ + FILE* fp = fopen(file, "rb"); + if (fp == nullptr) { + return 1; + } + size_t size, r; + r = fread(&size, sizeof(size), 1, fp); + if (r == 0) { + fclose(fp); + return 1; + } + r = fread(obj, 1, size, fp); + fclose(fp); + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("Read %ld bytes from %s", (int64_t)r, file); + } + return 0; +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index d5d329d55ad4a..3ed2ef1a95109 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -24,20 +24,6 @@ namespace gpu namespace gpu_reconstruction_kernels { -struct deviceEvent { - constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; - template - void set(T val) { v = reinterpret_cast(val); } - template - T& get() { return reinterpret_cast(v); } - template - T* getEventList() { return reinterpret_cast(this); } - bool isSet() const { return v; } - - private: - void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way. -}; template struct classArgument { @@ -95,6 +81,7 @@ struct krnlSetupArgs : public gpu_reconstruction_kernels::classArgument { const krnlSetupTime s; std::tuple sizeof(void*)), const Args&, const Args>::type...> v; }; + } // namespace gpu_reconstruction_kernels template diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx new file mode 100644 index 0000000000000..7909bc0720430 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -0,0 +1,113 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessing.cxx +/// \author David Rohr + +#include "GPUReconstructionProcessing.h" +#include "GPUReconstructionThreading.h" + +using namespace o2::gpu; + +int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) +{ + int32_t nThreads = 0; + if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { + if (splitCores) { + nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; + nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; + } else { + nThreads = mMaxHostThreads; + } + nThreads = std::max(1, nThreads); + } else { + nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; + } + return nThreads; +} + +void GPUReconstructionProcessing::SetNActiveThreads(int32_t n) +{ + mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); + mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); + if (mProcessingSettings.debugLevel >= 3) { + GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); + } +} + +void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function lambda) +{ + tbb::task_arena(SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads)).execute([&] { + tbb::parallel_for(0, nThreads, lambda, tbb::simple_partitioner()); + }); +} + +static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation + +GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) +{ + while (timerFlag.test_and_set()) { + } + if (mTimers.size() <= id) { + mTimers.resize(id + 1); + } + if (mTimers[id] == nullptr) { + if (J >= 0) { + name += std::to_string(J); + } + mTimers[id].reset(new timerMeta{std::unique_ptr{new HighResTimer[num]}, name, num, type, 1u, step, (size_t)0}); + } else { + mTimers[id]->count++; + } + timerMeta* retVal = mTimers[id].get(); + timerFlag.clear(); + return retVal; +} + +GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment) +{ + timerMeta* retVal = nullptr; + while (timerFlag.test_and_set()) { + } + if (mTimers.size() > id && mTimers[id]) { + retVal = mTimers[id].get(); + retVal->count += increment; + } + timerFlag.clear(); + return retVal; +} + +uint32_t GPUReconstructionProcessing::getNextTimerId() +{ + static std::atomic id{0}; + return id.fetch_add(1); +} + +uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) +{ + if (condition && mProcessingSettings.inKernelParallel != 1) { + mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + } else { + mNActiveThreadsOuterLoop = 1; + } + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); + } + return mNActiveThreadsOuterLoop; +} + +std::unique_ptr GPUReconstructionProcessing::GetThreadContext() +{ + return std::make_unique(); +} + +gpu_reconstruction_kernels::threadContext::threadContext() = default; +gpu_reconstruction_kernels::threadContext::~threadContext() = default; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h new file mode 100644 index 0000000000000..b064a43697a67 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -0,0 +1,187 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessing.h +/// \author David Rohr + +#if !defined(GPURECONSTRUCTIONPROCESSING_H) +#define GPURECONSTRUCTIONPROCESSING_H + +#include "GPUReconstruction.h" +#include "GPUReconstructionKernelIncludes.h" + +#include "utils/timer.h" +#include + +namespace o2::gpu +{ + +namespace gpu_reconstruction_kernels +{ +struct deviceEvent { + constexpr deviceEvent() = default; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; + template + void set(T val) + { + v = reinterpret_cast(val); + } + template + T& get() + { + return reinterpret_cast(v); + } + template + T* getEventList() + { + return reinterpret_cast(this); + } + bool isSet() const { return v; } + + private: + void* v = nullptr; // We use only pointers anyway, and since cl_event and cudaEvent_t and hipEvent_t are actually pointers, we can cast them to deviceEvent (void*) this way. +}; + +class threadContext +{ + public: + threadContext(); + virtual ~threadContext(); +}; + +} // namespace gpu_reconstruction_kernels + +class GPUReconstructionProcessing : public GPUReconstruction +{ + public: + ~GPUReconstructionProcessing() override = default; + + // Threading + int32_t getNKernelHostThreads(bool splitCores); + uint32_t getNActiveThreadsOuterLoop() const { return mNActiveThreadsOuterLoop; } + void SetNActiveThreadsOuterLoop(uint32_t f) { mNActiveThreadsOuterLoop = f; } + uint32_t SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max); + void runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function lambda); + void SetNActiveThreads(int32_t n); + + // Interface to query name of a kernel + template + constexpr static const char* GetKernelName(); + + // Public queries for timers + HighResTimer& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)].timerTotal; } + HighResTimer& getGeneralStepTimer(GeneralStep step) { return mTimersGeneralSteps[getGeneralStepNum(step)]; } + + template + void AddGPUEvents(T*& events); + + virtual std::unique_ptr GetThreadContext() override; + + protected: + GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + + int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels + uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop + + std::vector> mEvents; + + // Timer related stuff + struct timerMeta { + std::unique_ptr timer; + std::string name; + int32_t num; // How many parallel instances to sum up (CPU threads / GPU streams) + int32_t type; // 0 = kernel, 1 = CPU step, 2 = DMA transfer + uint32_t count; // How often was the timer queried + RecoStep step; // Which RecoStep is this + size_t memSize; // Memory size for memory bandwidth computation + }; + + struct RecoStepTimerMeta { + HighResTimer timerToGPU; + HighResTimer timerToHost; + HighResTimer timerTotal; + size_t bytesToGPU = 0; + size_t bytesToHost = 0; + uint32_t countToGPU = 0; + uint32_t countToHost = 0; + }; + + HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]; + + std::vector> mTimers; + RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]; + HighResTimer timerTotal; + template + HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true); + template + HighResTimer& getTimer(const char* name, int32_t num = -1); + + private: + uint32_t getNextTimerId(); + timerMeta* getTimerById(uint32_t id, bool increment = true); + timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); +}; + +template +inline void GPUReconstructionProcessing::AddGPUEvents(T*& events) +{ + mEvents.emplace_back(std::vector(sizeof(T) / sizeof(deviceEvent))); + events = (T*)mEvents.back().data(); +} + +template +HighResTimer& GPUReconstructionProcessing::getKernelTimer(RecoStep step, int32_t num, size_t addMemorySize, bool increment) +{ + static int32_t id = getNextTimerId(); + timerMeta* timer = getTimerById(id, increment); + if (timer == nullptr) { + timer = insertTimer(id, GetKernelName(), -1, NSLICES, 0, step); + } + if (addMemorySize) { + timer->memSize += addMemorySize; + } + if (num < 0 || num >= timer->num) { + throw std::runtime_error("Invalid timer requested"); + } + return timer->timer[num]; +} + +template +HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t num) +{ + static int32_t id = getNextTimerId(); + timerMeta* timer = getTimerById(id); + if (timer == nullptr) { + int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); + timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); + } + if (num == -1) { + num = getHostThreadIndex(); + } + if (num < 0 || num >= timer->num) { + throw std::runtime_error("Invalid timer requested"); + } + return timer->timer[num]; +} + +#define GPUCA_KRNL(x_class, ...) \ + template <> \ + constexpr const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 20ce23b578d84..16a13f07a7bf9 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -548,10 +548,10 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } -std::unique_ptr GPUReconstructionCUDA::GetThreadContext() +std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { GPUFailedMsg(cudaSetDevice(mDeviceId)); - return std::unique_ptr(new GPUThreadContext); + return GPUReconstructionProcessing::GetThreadContext(); } void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 49142d409c5ae..4d1865c0fd0e7 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -71,7 +71,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels GetThreadContext() override; + std::unique_ptr GetThreadContext() override; void SynchronizeGPU() override; int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override; void SynchronizeStream(int32_t stream) override; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 3e738fb6df5cb..f9c3aacdb00cb 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -71,6 +71,7 @@ set(SRCS_NO_CINT DataTypes/GPUNewCalibValues.cxx DataTypes/GPUTPCClusterOccupancyMap.cxx Base/GPUReconstruction.cxx + Base/GPUReconstructionProcessing.cxx Base/GPUReconstructionCPU.cxx Base/GPUProcessor.cxx Base/GPUMemoryResource.cxx @@ -106,6 +107,7 @@ set(HDRS_INSTALL Base/GPUParamRTC.h Base/GPUReconstructionIncludes.h Base/GPUReconstructionThreading.h + Base/GPUReconstructionIO.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h Base/GPUReconstructionKernels.h diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 0981fea43810a..4b2778735ce3d 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -101,7 +101,7 @@ class GPUChain } inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); } inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); } - virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } + virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } inline void SynchronizeGPU() { mRec->SynchronizeGPU(); } inline void ReleaseEvent(deviceEvent ev, bool doGPU = true) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 6ca645808c5bd..364fa4918257c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -35,8 +35,6 @@ #include "utils/strtag.h" -#include - #ifndef GPUCA_NO_VC #include #endif @@ -676,230 +674,222 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSliceBase, iSliceBase + GetProcessingSettings().nTPCClustererLanes - 1); } - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - if (doGPU && fragment.index != 0) { - SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished - } - - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; - clusterer.mPmemory->fragment = fragment; - - if (mIOPtrs.tpcPackedDigits) { - bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; - bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; - auto* inDigits = mIOPtrs.tpcPackedDigits; - size_t numDigits = inDigits->nTPCDigits[iSlice]; - if (setDigitsOnGPU) { - GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); - } - if (setDigitsOnHost) { - clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) - } - clusterer.mPmemory->counters.nDigits = numDigits; - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + if (doGPU && fragment.index != 0) { + SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished + } - if (mIOPtrs.tpcZS) { - if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; - clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; - } else { - clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; - } + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; + clusterer.mPmemory->fragment = fragment; + + if (mIOPtrs.tpcPackedDigits) { + bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; + bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; + auto* inDigits = mIOPtrs.tpcPackedDigits; + size_t numDigits = inDigits->nTPCDigits[iSlice]; + if (setDigitsOnGPU) { + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); } - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - - using ChargeMapType = decltype(*clustererShadow.mPchargeMap); - using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); - if (fragment.index == 0) { - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); + if (setDigitsOnHost) { + clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); + clusterer.mPmemory->counters.nDigits = numDigits; + } - if (doGPU) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); - SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); - } - SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory + if (mIOPtrs.tpcZS) { + if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { + clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; + clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; + } else { + clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; } + } + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + + using ChargeMapType = decltype(*clustererShadow.mPchargeMap); + using PeakMapType = decltype(*clustererShadow.mPpeakMap); + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); + if (fragment.index == 0) { + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); + } + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); - if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { - clusterer.mPmemory->counters.nPositions = 0; - return; - } - if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { - clusterer.mPmemory->counters.nPositions = 0; - return; + if (doGPU) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); + SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); } + SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory + } - if (propagateMCLabels && fragment.index == 0) { - clusterer.PrepareMC(); - clusterer.mPinputLabels = digitsMC->v[iSlice]; - if (clusterer.mPinputLabels == nullptr) { - GPUFatal("MC label container missing, sector %d", iSlice); - } - if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { - GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); - } - } + if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { + clusterer.mPmemory->counters.nPositions = 0; + return; + } + if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { + clusterer.mPmemory->counters.nPositions = 0; + return; + } - if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { - if (not mIOPtrs.tpcZS) { - runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } else if (propagateMCLabels) { - runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); - TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } + if (propagateMCLabels && fragment.index == 0) { + clusterer.PrepareMC(); + clusterer.mPinputLabels = digitsMC->v[iSlice]; + if (clusterer.mPinputLabels == nullptr) { + GPUFatal("MC label container missing, sector %d", iSlice); } + if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { + GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); + } + } - if (mIOPtrs.tpcZS) { - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0); - uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; - - (void)tpcTimeBinCut; // TODO: To be used in decoding kernels - switch (mCFContext->zsVersion) { - default: - GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); - break; - case ZSVersionRowBased10BitADC: - case ZSVersionRowBased12BitADC: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionLinkBasedWithMeta: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - case ZSVersionDenseLinkBased: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); - break; - } + if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { + if (not mIOPtrs.tpcZS) { + runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - } // clang-format off - }, tbb::simple_partitioner()); // clang-format on - }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - if (doGPU) { - SynchronizeStream(lane); + } else if (propagateMCLabels) { + runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); + TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); } - if (mIOPtrs.tpcZS) { - CfFragment f = fragment.next(); - int32_t nextSlice = iSlice; - if (f.isEnd()) { - nextSlice += GetProcessingSettings().nTPCClustererLanes; - f = mCFContext->fragmentFirst; - } - if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { - mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); - } - } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (clusterer.mPmemory->counters.nPositions == 0) { - return; + } + + if (mIOPtrs.tpcZS) { + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0); + uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; + + (void)tpcTimeBinCut; // TODO: To be used in decoding kernels + switch (mCFContext->zsVersion) { + default: + GPUFatal("Data with invalid TPC ZS mode (%d) received", mCFContext->zsVersion); + break; + case ZSVersionRowBased10BitADC: + case ZSVersionRowBased12BitADC: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; + case ZSVersionLinkBasedWithMeta: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; + case ZSVersionDenseLinkBased: + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + break; } - if (!mIOPtrs.tpcZS) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + } // clang-format off + }); + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSlice = iSliceBase + lane; + if (doGPU) { + SynchronizeStream(lane); + } + if (mIOPtrs.tpcZS) { + CfFragment f = fragment.next(); + int32_t nextSlice = iSlice; + if (f.isEnd()) { + nextSlice += GetProcessingSettings().nTPCClustererLanes; + f = mCFContext->fragmentFirst; } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { - clusterer.DumpChargeMap(*mDebugFile, "Charges"); + if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { + mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); } + } + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (clusterer.mPmemory->counters.nPositions == 0) { + return; + } + if (!mIOPtrs.tpcZS) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + } + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + clusterer.DumpChargeMap(*mDebugFile, "Charges"); + } - if (propagateMCLabels) { - runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); - } + if (propagateMCLabels) { + runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); + } - bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); - checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); - checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; + bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); + checkForNoisyPads &= (rec()->GetParam().rec.tpc.noisyPadsQuickCheck ? fragment.index == 0 : true); + checkForNoisyPads &= !GetProcessingSettings().disableTPCNoisyPadFilter; - if (checkForNoisyPads) { - int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; + if (checkForNoisyPads) { + int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); - } + runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Peaks"); - } + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } - if (clusterer.mPmemory->counters.nPeaks == 0) { - return; - } - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { - clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } + if (clusterer.mPmemory->counters.nPeaks == 0) { + return; + } + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); + if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); + } - RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, maxLane)).execute([&] { - tbb::parallel_for(0, maxLane, [&](auto lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; - if (doGPU) { - SynchronizeStream(lane); - } + mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { + uint32_t iSlice = iSliceBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + if (doGPU) { + SynchronizeStream(lane); + } - if (fragment.index == 0) { - deviceEvent* waitEvent = nullptr; - if (transferRunning[lane] == 1) { - waitEvent = &mEvents->stream[lane]; - transferRunning[lane] = 2; - } - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); + if (fragment.index == 0) { + deviceEvent* waitEvent = nullptr; + if (transferRunning[lane] == 1) { + waitEvent = &mEvents->stream[lane]; + transferRunning[lane] = 2; } + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding), krnlRunRangeNone, {nullptr, waitEvent}}, clustererShadow.mPclusterInRow, GPUCA_ROW_COUNT * sizeof(*clustererShadow.mPclusterInRow)); + } - if (clusterer.mPmemory->counters.nClusters == 0) { - return; - } + if (clusterer.mPmemory->counters.nClusters == 0) { + return; + } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); - if (doGPU && propagateMCLabels) { - TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); - if (doGPU) { - SynchronizeStream(lane); - } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); + if (doGPU && propagateMCLabels) { + TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); + if (doGPU) { + SynchronizeStream(lane); } + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); + } - TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); - laneHasData[lane] = true; - // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off - }, tbb::simple_partitioner()); // clang-format on + TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); + laneHasData[lane] = true; + // Include clusters in default debug mask, exclude other debug output by default + DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 229469af801f6..c159e333a3b18 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUReconstructionIO.h" #include "GPUTPCClusterData.h" #include "GPUTPCSliceOutput.h" #include "GPUTPCSliceOutCluster.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx index cab025b03e8b6..760d2cf2c5f40 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx @@ -22,8 +22,6 @@ #include "utils/strtag.h" #include -#include - using namespace o2::gpu; int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) @@ -156,108 +154,107 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() int32_t streamMap[NSLICES]; bool error = false; - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { - tbb::parallel_for(0, NSLICES, [&](auto iSlice) { - GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; - GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; - int32_t useStream = (iSlice % mRec->NStreams()); + mRec->runParallelOuterLoop(doGPU, NSLICES, [&](uint32_t iSlice) { + GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; + GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; + int32_t useStream = (iSlice % mRec->NStreams()); - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Creating Slice Data (Slice %d)", iSlice); - } - if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - } else { - if (ReadEvent(iSlice, 0)) { - GPUError("Error reading event"); - error = 1; - return; - } - } - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); - } - if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Creating Slice Data (Slice %d)", iSlice); + } + if (doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; + } else { + if (ReadEvent(iSlice, 0)) { + GPUError("Error reading event"); + error = 1; return; } + } + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); + } + if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { + return; + } - if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; - if (GetProcessingSettings().debugMask & 1) { - if (doGPU) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); - } - trk.DumpSliceData(*mDebugFile); + if (GetProcessingSettings().debugLevel >= 6) { + *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; + if (GetProcessingSettings().debugMask & 1) { + if (doGPU) { + TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); } + trk.DumpSliceData(*mDebugFile); } + } - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); - } - runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); + // Initialize temporary memory where needed + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); + } + runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } + if (!doGPU) { + TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory + } + if (GPUDebug("Initialization (3)", useStream)) { + throw std::runtime_error("memcpy failure"); + } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + streamInit[useStream] = true; - if (GetProcessingSettings().keepDisplayMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); - memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { - trk.DumpLinks(*mDebugFile, 0); - } + if (GetProcessingSettings().keepDisplayMemory) { + TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); + if (GetProcessingSettings().debugMask & 2) { + trk.DumpLinks(*mDebugFile, 0); } + } - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); + DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); #ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { - runKernel({GetGridAuto(useStream), {iSlice}}); - } + if (doGPU) { + runKernel({GetGridAuto(useStream), {iSlice}}); + } #endif - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); - } - DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSlice}}); + } + DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); - if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - trk.UpdateMaxData(); - AllocateRegisteredMemory(trk.MemoryResTracklets()); - AllocateRegisteredMemory(trk.MemoryResOutput()); - } + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + trk.UpdateMaxData(); + AllocateRegisteredMemory(trk.MemoryResTracklets()); + AllocateRegisteredMemory(trk.MemoryResOutput()); + } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - trk.DumpHitWeights(*mDebugFile); - } + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { + runKernel({GetGridAuto(useStream), {iSlice}}); + DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + trk.DumpHitWeights(*mDebugFile); } + } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); - streamMap[iSlice] = useStream; - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); - } - DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); - } }, tbb::simple_partitioner()); + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { + runKernel({GetGridAuto(useStream), {iSlice}}); + runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSlice}}); + } + TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); + streamMap[iSlice] = useStream; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); + } + DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + } }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { @@ -421,14 +418,13 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } else { mSliceSelectorReady = NSLICES; - tbb::task_arena(mRec->SetAndGetNActiveThreadsOuterLoop(!doGPU, NSLICES)).execute([&] { - tbb::parallel_for(0, NSLICES, [&](auto iSlice) { - if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSlice, 0); - } - if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSlice, 0); - } }, tbb::simple_partitioner()); + mRec->runParallelOuterLoop(doGPU, NSLICES, [&](uint32_t iSlice) { + if (param().rec.tpc.extrapolationTracking) { + ExtrapolationTracking(iSlice, 0); + } + if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { + WriteOutput(iSlice, 0); + } }); mRec->SetNActiveThreadsOuterLoop(1); } From 38810787ecc4eb9010dabdd15d2677b05b9af9af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 14:21:59 +0100 Subject: [PATCH 0105/1914] GPU TPC: Rename slice to sector --- .../TPC/monitor/src/SimpleEventDisplayGUI.cxx | 2 +- .../reconstruction/test/testGPUCATracking.cxx | 2 +- .../TPC/workflow/src/EntropyEncoderSpec.cxx | 8 +- GPU/GPUTracking/Base/GPUConstantMem.h | 4 +- GPU/GPUTracking/Base/GPUParam.cxx | 26 +- GPU/GPUTracking/Base/GPUParam.h | 26 +- GPU/GPUTracking/Base/GPUParam.inc | 14 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 16 +- GPU/GPUTracking/Base/GPUReconstruction.h | 12 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 4 +- .../Base/GPUReconstructionConvert.cxx | 52 +- .../Base/GPUReconstructionConvert.h | 2 +- .../Base/GPUReconstructionDeviceBase.cxx | 2 +- .../Base/GPUReconstructionDeviceBase.h | 2 +- .../Base/GPUReconstructionIncludes.h | 4 +- .../Base/GPUReconstructionKernelMacros.h | 16 +- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/GPUReconstructionTimeframe.cxx | 68 +-- .../Base/GPUReconstructionTimeframe.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 8 +- GPU/GPUTracking/CMakeLists.txt | 62 +-- .../GPUTPCClusterStatistics.cxx | 26 +- .../DataCompression/GPUTPCClusterStatistics.h | 10 +- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../DataCompression/GPUTPCCompression.h | 2 +- .../GPUTPCCompressionKernels.cxx | 122 ++-- .../GPUTPCCompressionTrackModel.cxx | 2 +- .../GPUTPCCompressionTrackModel.h | 16 +- .../DataCompression/GPUTPCDecompression.cxx | 10 +- .../DataCompression/GPUTPCDecompression.h | 2 +- .../GPUTPCDecompressionKernels.cxx | 40 +- .../GPUTPCDecompressionKernels.h | 4 +- .../TPCClusterDecompressionCore.inc | 14 +- .../TPCClusterDecompressor.cxx | 12 +- .../DataCompression/TPCClusterDecompressor.h | 2 +- ...andalone-cluster-dump-entropy-analysed.cxx | 66 +-- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 40 +- .../DataTypes/GPUMemorySizeScalers.h | 10 +- .../DataTypes/GPUTPCClusterOccupancyMap.h | 2 +- .../DataTypes/GPUTPCGMMergedTrackHit.h | 2 +- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 20 +- GPU/GPUTracking/DataTypes/TPCPadBitMap.h | 2 +- GPU/GPUTracking/DataTypes/TPCPadGainCalib.h | 6 +- .../Definitions/GPUDefConstantsAndSettings.h | 8 +- .../Definitions/GPUDefGPUParameters.h | 56 +- GPU/GPUTracking/Definitions/GPUSettingsList.h | 12 +- GPU/GPUTracking/GPUTrackingLinkDef_O2.h | 4 +- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 46 +- GPU/GPUTracking/Global/GPUChainTracking.h | 44 +- .../Global/GPUChainTrackingClusterizer.cxx | 368 ++++++------- .../Global/GPUChainTrackingCompression.cxx | 30 +- .../GPUChainTrackingDebugAndProfiling.cxx | 10 +- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 60 +- .../Global/GPUChainTrackingMerger.cxx | 76 +-- ....cxx => GPUChainTrackingSectorTracker.cxx} | 300 +++++----- .../Global/GPUChainTrackingTransformation.cxx | 28 +- GPU/GPUTracking/Global/GPUErrorCodes.h | 8 +- .../Global/GPUTrackingInputProvider.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 520 +++++++++--------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 94 ++-- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 94 ++-- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 28 +- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 8 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 6 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 4 +- ...SliceTrack.cxx => GPUTPCGMSectorTrack.cxx} | 54 +- ...PCGMSliceTrack.h => GPUTPCGMSectorTrack.h} | 38 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 146 ++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 20 +- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 2 +- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 32 +- .../Merger/macros/checkPropagation.C | 56 +- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../GPUTPCBaseTrackParam.h | 2 +- .../GPUTPCClusterData.h | 0 .../GPUTPCCreateOccupancyMap.cxx | 18 +- .../GPUTPCCreateOccupancyMap.h | 2 +- .../GPUTPCCreateTrackingData.cxx} | 8 +- .../GPUTPCCreateTrackingData.h} | 12 +- .../GPUTPCDef.h | 0 .../GPUTPCDefinitions.h | 0 .../GPUTPCExtrapolationTracking.cxx | 64 +-- .../GPUTPCExtrapolationTracking.h | 12 +- .../GPUTPCGrid.cxx | 0 .../GPUTPCGrid.h | 22 +- .../GPUTPCHit.h | 0 .../GPUTPCHitId.h | 0 .../GPUTPCMCInfo.h | 0 .../GPUTPCNeighboursCleaner.cxx | 0 .../GPUTPCNeighboursCleaner.h | 2 +- .../GPUTPCNeighboursFinder.cxx | 8 +- .../GPUTPCNeighboursFinder.h | 12 +- .../GPUTPCRow.cxx | 0 .../GPUTPCRow.h | 10 +- .../GPUTPCSectorDebugSortKernels.cxx | 4 +- .../GPUTPCSectorDebugSortKernels.h | 4 +- .../GPUTPCSectorOutCluster.h} | 28 +- .../GPUTPCSectorOutput.cxx} | 16 +- .../GPUTPCSectorOutput.h} | 28 +- .../GPUTPCStartHitsFinder.cxx | 6 +- .../GPUTPCStartHitsFinder.h | 2 +- .../GPUTPCStartHitsSorter.cxx | 0 .../GPUTPCStartHitsSorter.h | 2 +- .../GPUTPCTrack.cxx | 0 .../GPUTPCTrack.h | 18 +- .../GPUTPCTrackLinearisation.h | 0 .../GPUTPCTrackParam.cxx | 2 +- .../GPUTPCTrackParam.h | 4 +- .../GPUTPCTracker.cxx | 54 +- .../GPUTPCTracker.h | 94 ++-- .../GPUTPCTrackerDump.cxx | 20 +- .../GPUTPCTrackingData.cxx} | 44 +- .../GPUTPCTrackingData.h} | 104 ++-- .../GPUTPCTracklet.h | 10 +- .../GPUTPCTrackletConstructor.cxx | 62 +-- .../GPUTPCTrackletConstructor.h | 40 +- .../GPUTPCTrackletSelector.cxx | 4 +- .../GPUTPCTrackletSelector.h | 2 +- .../Standalone/Benchmark/standalone.cxx | 4 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- GPU/GPUTracking/Standalone/tools/createGeo.sh | 2 +- GPU/GPUTracking/TPCClusterFinder/CfFragment.h | 26 +- .../TPCClusterFinder/GPUTPCCFChainContext.h | 20 +- .../GPUTPCCFChargeMapFiller.cxx | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 2 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 24 +- .../GPUTPCCFStreamCompaction.cxx | 2 +- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 4 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 4 +- .../GPUTPCClusterFinderDump.cxx | 16 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.h | 4 +- .../TPCConvert/GPUTPCConvertImpl.h | 12 +- .../TPCConvert/GPUTPCConvertKernel.cxx | 14 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 16 +- .../display/3rdparty/GL/glcorearb.h | 6 +- GPU/GPUTracking/display/GPUDisplay.cxx | 70 +-- GPU/GPUTracking/display/GPUDisplay.h | 42 +- GPU/GPUTracking/display/GPUDisplayInterface.h | 2 +- .../display/backend/GPUDisplayBackend.cxx | 10 +- .../display/backend/GPUDisplayBackend.h | 2 +- .../backend/GPUDisplayBackendOpenGL.cxx | 16 +- .../backend/GPUDisplayBackendVulkan.cxx | 6 +- .../display/frontend/GPUDisplayKeys.cxx | 30 +- .../display/render/GPUDisplayDraw.cxx | 344 ++++++------ .../display/render/GPUDisplayImportEvent.cxx | 34 +- GPU/GPUTracking/kernels.cmake | 28 +- GPU/GPUTracking/qa/GPUQA.cxx | 16 +- GPU/GPUTracking/qa/genEvents.cxx | 62 +-- GPU/GPUTracking/qa/genEvents.h | 16 +- GPU/Workflow/src/GPUWorkflowInternal.h | 8 +- GPU/Workflow/src/GPUWorkflowPipeline.cxx | 34 +- GPU/Workflow/src/GPUWorkflowSpec.cxx | 18 +- 154 files changed, 2263 insertions(+), 2263 deletions(-) rename GPU/GPUTracking/Global/{GPUChainTrackingSliceTracker.cxx => GPUChainTrackingSectorTracker.cxx} (50%) rename GPU/GPUTracking/Merger/{GPUTPCGMSliceTrack.cxx => GPUTPCGMSectorTrack.cxx} (85%) rename GPU/GPUTracking/Merger/{GPUTPCGMSliceTrack.h => GPUTPCGMSectorTrack.h} (84%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCBaseTrackParam.h (98%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCClusterData.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCCreateOccupancyMap.cxx (78%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCCreateOccupancyMap.h (95%) rename GPU/GPUTracking/{SliceTracker/GPUTPCCreateSliceData.cxx => SectorTracker/GPUTPCCreateTrackingData.cxx} (68%) rename GPU/GPUTracking/{SliceTracker/GPUTPCCreateSliceData.h => SectorTracker/GPUTPCCreateTrackingData.h} (83%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCDef.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCDefinitions.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCExtrapolationTracking.cxx (80%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCExtrapolationTracking.h (85%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCGrid.cxx (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCGrid.h (82%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCHit.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCHitId.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCMCInfo.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCNeighboursCleaner.cxx (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCNeighboursCleaner.h (97%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCNeighboursFinder.cxx (97%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCNeighboursFinder.h (86%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCRow.cxx (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCRow.h (90%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCSectorDebugSortKernels.cxx (95%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCSectorDebugSortKernels.h (94%) rename GPU/GPUTracking/{SliceTracker/GPUTPCSliceOutCluster.h => SectorTracker/GPUTPCSectorOutCluster.h} (70%) rename GPU/GPUTracking/{SliceTracker/GPUTPCSliceOutput.cxx => SectorTracker/GPUTPCSectorOutput.cxx} (71%) rename GPU/GPUTracking/{SliceTracker/GPUTPCSliceOutput.h => SectorTracker/GPUTPCSectorOutput.h} (68%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCStartHitsFinder.cxx (92%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCStartHitsFinder.h (97%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCStartHitsSorter.cxx (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCStartHitsSorter.h (97%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrack.cxx (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrack.h (71%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackLinearisation.h (100%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackParam.cxx (99%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackParam.h (98%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTracker.cxx (78%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTracker.h (74%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackerDump.cxx (90%) rename GPU/GPUTracking/{SliceTracker/GPUTPCSliceData.cxx => SectorTracker/GPUTPCTrackingData.cxx} (86%) rename GPU/GPUTracking/{SliceTracker/GPUTPCSliceData.h => SectorTracker/GPUTPCTrackingData.h} (58%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTracklet.h (86%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackletConstructor.cxx (91%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackletConstructor.h (76%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackletSelector.cxx (97%) rename GPU/GPUTracking/{SliceTracker => SectorTracker}/GPUTPCTrackletSelector.h (97%) diff --git a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx index 23afc85f94d64..8784f096e3202 100644 --- a/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx +++ b/Detectors/TPC/monitor/src/SimpleEventDisplayGUI.cxx @@ -1227,7 +1227,7 @@ void SimpleEventDisplayGUI::showClusters(int roc, int row) } if (fillSingleTB && std::abs(cl.getTime() - timeBin) < 2) { const auto ly = gpuGeom.LinearPad2Y(sector, irow, cl.getPad() + 0.5); - mClustersRowPad->SetNextPoint(gpuGeom.Row2X(irow), (sector >= GPUCA_NSLICES / 2) ? -ly : ly); + mClustersRowPad->SetNextPoint(gpuGeom.Row2X(irow), (sector >= GPUCA_NSECTORS / 2) ? -ly : ly); } } // fmt::print("\n"); diff --git a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx index 6c0ea8b265585..bdf9b95e94450 100644 --- a/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx +++ b/Detectors/TPC/reconstruction/test/testGPUCATracking.cxx @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(CATracking_test1) config.configReconstruction.tpc.searchWindowDZDR = 2.5f; //Should always be 2.5 for looper-finding and/or continuous tracking config.configReconstruction.tpc.trackReferenceX = refX; - config.configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, GPUDataTypes::RecoStep::TPCSliceTracking, + config.configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, GPUDataTypes::RecoStep::TPCSectorTracking, GPUDataTypes::RecoStep::TPCMerging, GPUDataTypes::RecoStep::TPCCompression, GPUDataTypes::RecoStep::TPCdEdx); config.configWorkflow.inputs.set(GPUDataTypes::InOutType::TPCClusters); config.configWorkflow.outputs.set(GPUDataTypes::InOutType::TPCMergedTracks); diff --git a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx index 294a93709e863..cc964ade0d87c 100644 --- a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx +++ b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx @@ -159,7 +159,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) const auto& tinfo = pc.services().get(); const auto firstIR = o2::InteractionRecord(0, tinfo.firstTForbit); - const float totalT = std::max(mFastTransform->getMaxDriftTime(0), mFastTransform->getMaxDriftTime(GPUCA_NSLICES / 2)); + const float totalT = std::max(mFastTransform->getMaxDriftTime(0), mFastTransform->getMaxDriftTime(GPUCA_NSECTORS / 2)); unsigned int offset = 0, lasti = 0; const unsigned int maxTime = (mParam->continuousMaxTimeBin + 1) * o2::tpc::ClusterNative::scaleTimePacked - 1; @@ -206,8 +206,8 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) } } offset = 0; - unsigned int offsets[GPUCA_NSLICES][GPUCA_ROW_COUNT]; - for (unsigned int i = 0; i < GPUCA_NSLICES; i++) { + unsigned int offsets[GPUCA_NSECTORS][GPUCA_ROW_COUNT]; + for (unsigned int i = 0; i < GPUCA_NSECTORS; i++) { for (unsigned int j = 0; j < GPUCA_ROW_COUNT; j++) { if (i * GPUCA_ROW_COUNT + j >= clusters.nSliceRows) { break; @@ -218,7 +218,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) } #ifdef WITH_OPENMP -#pragma omp parallel for num_threads(mNThreads) schedule(static, (GPUCA_NSLICES + mNThreads - 1) / mNThreads) // Static round-robin scheduling with one chunk per thread to ensure correct order of the final vector +#pragma omp parallel for num_threads(mNThreads) schedule(static, (GPUCA_NSECTORS + mNThreads - 1) / mNThreads) // Static round-robin scheduling with one chunk per thread to ensure correct order of the final vector #endif for (unsigned int ii = 0; ii < clusters.nSliceRows; ii++) { unsigned int i = ii / GPUCA_ROW_COUNT; diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 3396219634587..378b9a9be2ead 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -41,14 +41,14 @@ namespace gpu struct GPUConstantMem { GPUParam param; GPUTPCTracker - tpcTrackers[GPUCA_NSLICES]; + tpcTrackers[GPUCA_NSECTORS]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; GPUTPCGMMerger tpcMerger; GPUTRDTrackerGPU trdTrackerGPU; GPUTRDTracker trdTrackerO2; - GPUTPCClusterFinder tpcClusterer[GPUCA_NSLICES]; + GPUTPCClusterFinder tpcClusterer[GPUCA_NSECTORS]; GPUITSFitter itsFitter; GPUTrackingRefitProcessor trackingRefit; GPUTrackingInOutPointers ioPtrs; diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index d5c1149b0ab29..192e46c36dc68 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -91,22 +91,22 @@ void GPUParam::SetDefaults(float solenoidBz) constexpr float plusZmax = 249.778; constexpr float minusZmin = -249.645; constexpr float minusZmax = -0.0799937; - for (int32_t i = 0; i < GPUCA_NSLICES; i++) { - const bool zPlus = (i < GPUCA_NSLICES / 2); - SliceParam[i].ZMin = zPlus ? plusZmin : minusZmin; - SliceParam[i].ZMax = zPlus ? plusZmax : minusZmax; + for (int32_t i = 0; i < GPUCA_NSECTORS; i++) { + const bool zPlus = (i < GPUCA_NSECTORS / 2); + SectorParam[i].ZMin = zPlus ? plusZmin : minusZmin; + SectorParam[i].ZMax = zPlus ? plusZmax : minusZmax; int32_t tmp = i; - if (tmp >= GPUCA_NSLICES / 2) { - tmp -= GPUCA_NSLICES / 2; + if (tmp >= GPUCA_NSECTORS / 2) { + tmp -= GPUCA_NSECTORS / 2; } - if (tmp >= GPUCA_NSLICES / 4) { - tmp -= GPUCA_NSLICES / 2; + if (tmp >= GPUCA_NSECTORS / 4) { + tmp -= GPUCA_NSECTORS / 2; } - SliceParam[i].Alpha = 0.174533f + par.dAlpha * tmp; - SliceParam[i].CosAlpha = CAMath::Cos(SliceParam[i].Alpha); - SliceParam[i].SinAlpha = CAMath::Sin(SliceParam[i].Alpha); - SliceParam[i].AngleMin = SliceParam[i].Alpha - par.dAlpha / 2.f; - SliceParam[i].AngleMax = SliceParam[i].Alpha + par.dAlpha / 2.f; + SectorParam[i].Alpha = 0.174533f + par.dAlpha * tmp; + SectorParam[i].CosAlpha = CAMath::Cos(SectorParam[i].Alpha); + SectorParam[i].SinAlpha = CAMath::Sin(SectorParam[i].Alpha); + SectorParam[i].AngleMin = SectorParam[i].Alpha - par.dAlpha / 2.f; + SectorParam[i].AngleMax = SectorParam[i].Alpha + par.dAlpha / 2.f; } par.assumeConstantBz = false; diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 78c13d19be81d..279e1f9142231 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -39,11 +39,11 @@ struct GPUSettingsRec; struct GPUSettingsGTP; struct GPURecoStepConfiguration; -struct GPUParamSlice { - float Alpha; // slice angle - float CosAlpha, SinAlpha; // sign and cosine of the slice angle +struct GPUParamSector { + float Alpha; // sector angle + float CosAlpha, SinAlpha; // sign and cosine of the sector angle float AngleMin, AngleMax; // minimal and maximal angle - float ZMin, ZMax; // slice Z range + float ZMin, ZMax; // sector Z range }; namespace internal @@ -66,7 +66,7 @@ struct GPUParam_t { const uint32_t* occupancyMap; // Ptr to TPC occupancy map uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) - GPUParamSlice SliceParam[GPUCA_NSLICES]; + GPUParamSector SectorParam[GPUCA_NSECTORS]; protected: #ifdef GPUCA_TPC_GEOMETRY_O2 @@ -88,15 +88,15 @@ struct GPUParam : public internal::GPUParam_t void UpdateRun3ClusterErrors(const float* yErrorParam, const float* zErrorParam); #endif - GPUd() float Alpha(int32_t iSlice) const + GPUd() float Alpha(int32_t iSector) const { - if (iSlice >= GPUCA_NSLICES / 2) { - iSlice -= GPUCA_NSLICES / 2; + if (iSector >= GPUCA_NSECTORS / 2) { + iSector -= GPUCA_NSECTORS / 2; } - if (iSlice >= GPUCA_NSLICES / 4) { - iSlice -= GPUCA_NSLICES / 2; + if (iSector >= GPUCA_NSECTORS / 4) { + iSector -= GPUCA_NSECTORS / 2; } - return 0.174533f + par.dAlpha * iSlice; + return 0.174533f + par.dAlpha * iSector; } GPUd() float GetClusterErrorSeeding(int32_t yz, int32_t type, float zDiff, float angle2, float unscaledMult) const; GPUd() void GetClusterErrorsSeeding2(uint8_t sector, int32_t row, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const; @@ -108,8 +108,8 @@ struct GPUParam : public internal::GPUParam_t GPUd() void UpdateClusterError2ByState(int16_t clusterState, float& ErrY2, float& ErrZ2) const; GPUd() float GetUnscaledMult(float time) const; - GPUd() void Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const; - GPUd() void Global2Slice(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const; + GPUd() void Sector2Global(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const; + GPUd() void Global2Sector(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const; GPUd() bool rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const; }; diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 87d7b2dc4c7cd..1c26a6d56664b 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -24,19 +24,19 @@ namespace o2 namespace gpu { -GPUdi() void GPUParam::Slice2Global(int32_t iSlice, float x, float y, float z, float* X, float* Y, float* Z) const +GPUdi() void GPUParam::Sector2Global(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const { // conversion of coordinates sector->global - *X = x * SliceParam[iSlice].CosAlpha - y * SliceParam[iSlice].SinAlpha; - *Y = y * SliceParam[iSlice].CosAlpha + x * SliceParam[iSlice].SinAlpha; + *X = x * SectorParam[iSector].CosAlpha - y * SectorParam[iSector].SinAlpha; + *Y = y * SectorParam[iSector].CosAlpha + x * SectorParam[iSector].SinAlpha; *Z = z; } -GPUdi() void GPUParam::Global2Slice(int32_t iSlice, float X, float Y, float Z, float* x, float* y, float* z) const +GPUdi() void GPUParam::Global2Sector(int32_t iSector, float X, float Y, float Z, float* x, float* y, float* z) const { // conversion of coordinates global->sector - *x = X * SliceParam[iSlice].CosAlpha + Y * SliceParam[iSlice].SinAlpha; - *y = Y * SliceParam[iSlice].CosAlpha - X * SliceParam[iSlice].SinAlpha; + *x = X * SectorParam[iSector].CosAlpha + Y * SectorParam[iSector].SinAlpha; + *y = Y * SectorParam[iSector].CosAlpha - X * SectorParam[iSector].SinAlpha; *z = Z; } @@ -117,7 +117,7 @@ GPUdi() float GPUParam::GetSystematicClusterErrorC122(float x, float y, uint8_t return 0.f; } constexpr float dEdgeInv = 18.f / CAMath::Pi(); - const float dy = (sector == (GPUCA_NSLICES / 2 + 1) ? 0.5f : -0.5f) * (y / x) * dEdgeInv + 0.5f; + const float dy = (sector == (GPUCA_NSECTORS / 2 + 1) ? 0.5f : -0.5f) * (y / x) * dEdgeInv + 0.5f; const float errC12 = rec.tpc.sysClusErrorC12Norm * occupancyTotal * dy; return errC12 * errC12; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 4650b795da172..1bae5a12f2ee7 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -90,9 +90,9 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos new (&mGRPSettings) GPUSettingsGRP; param().SetDefaults(&mGRPSettings); mMemoryScalers.reset(new GPUMemorySizeScalers); - for (uint32_t i = 0; i < NSLICES; i++) { - processors()->tpcTrackers[i].SetSlice(i); // TODO: Move to a better place - processors()->tpcClusterer[i].mISlice = i; + for (uint32_t i = 0; i < NSECTORS; i++) { + processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place + processors()->tpcClusterer[i].mISector = i; } #ifndef GPUCA_NO_ROOT mROOTDump = GPUROOTDumpCore::getAndCreate(); @@ -273,8 +273,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.trackletSelectorInPipeline < 0) { mProcessingSettings.trackletSelectorInPipeline = 1; } - if (mProcessingSettings.trackletSelectorSlices < 0) { - mProcessingSettings.trackletSelectorSlices = 1; + if (mProcessingSettings.trackletSelectorSectors < 0) { + mProcessingSettings.trackletSelectorSectors = 1; } } if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { @@ -334,14 +334,14 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.nTPCClustererLanes == -1) { - mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSLICES, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSLICES : 4) : 1) : mMaxHostThreads)); + mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); } if (mProcessingSettings.overrideClusterizerFragmentLen == -1) { mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; } - if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSLICES) { + if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) { GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); - mProcessingSettings.nTPCClustererLanes = GPUCA_NSLICES; + mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS; } if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 2834e36c6a569..f0526777a8bcb 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -31,7 +31,7 @@ #include "GPUOutputControl.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPULogging.h" namespace o2 @@ -76,7 +76,7 @@ class GPUReconstruction GPUReconstruction& operator=(const GPUReconstruction&) = delete; // General definitions - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; using GeometryType = GPUDataTypes::GeometryType; using DeviceType = GPUDataTypes::DeviceType; @@ -94,8 +94,8 @@ class GPUReconstruction static DeviceType GetDeviceType(const char* type); enum InOutPointerType : uint32_t { CLUSTER_DATA = 0, - SLICE_OUT_TRACK = 1, - SLICE_OUT_CLUSTER = 2, + SECTOR_OUT_TRACK = 1, + SECTOR_OUT_CLUSTER = 2, MC_LABEL_TPC = 3, MC_INFO_TPC = 4, MERGED_TRACK = 5, @@ -113,10 +113,10 @@ class GPUReconstruction TRD_SPACEPOINT = 17, TRD_TRIGGERRECORDS = 18, TF_SETTINGS = 19 }; - static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Slice Tracks", "TPC Slice Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets", + static constexpr const char* const IOTYPENAMES[] = {"TPC HLT Clusters", "TPC Sector Tracks", "TPC Sector Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets", "TPC Raw Clusters", "TPC Native Clusters", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit", "TPC ZS Page", "TPC Native Clusters MC Labels", "TPC Digit MC Labeels", "TRD Spacepoints", "TRD Triggerrecords", "TF Settings"}; - static uint32_t getNIOTypeMultiplicity(InOutPointerType type) { return (type == CLUSTER_DATA || type == SLICE_OUT_TRACK || type == SLICE_OUT_CLUSTER || type == RAW_CLUSTERS || type == TPC_DIGIT || type == TPC_DIGIT_MC) ? NSLICES : 1; } + static uint32_t getNIOTypeMultiplicity(InOutPointerType type) { return (type == CLUSTER_DATA || type == SECTOR_OUT_TRACK || type == SECTOR_OUT_CLUSTER || type == RAW_CLUSTERS || type == TPC_DIGIT || type == TPC_DIGIT_MC) ? NSECTORS : 1; } // Functionality to create an instance of GPUReconstruction for the desired device static GPUReconstruction* CreateInstance(const GPUSettingsDeviceBackend& cfg); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 19b33f4b03a17..7d03b908b86de 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -18,8 +18,8 @@ #include "GPUChain.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTRDTrackletWord.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 629d23075d9bc..c1a0a78dce6fe 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -49,16 +49,16 @@ using namespace std::string_literals; void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr* clusters, uint32_t* nClusters, const TPCFastTransform* transform, int32_t continuousMaxTimeBin) { - memset(nClusters, 0, NSLICES * sizeof(nClusters[0])); + memset(nClusters, 0, NSECTORS * sizeof(nClusters[0])); uint32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { - uint32_t nClSlice = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + uint32_t nClSector = 0; for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - nClSlice += native->nClusters[i][j]; + nClSector += native->nClusters[i][j]; } - nClusters[i] = nClSlice; - clusters[i].reset(new GPUTPCClusterData[nClSlice]); - nClSlice = 0; + nClusters[i] = nClSector; + clusters[i].reset(new GPUTPCClusterData[nClSector]); + nClSector = 0; for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < native->nClusters[i][j]; k++) { const auto& clin = native->clusters[i][j][k]; @@ -68,7 +68,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative } else { transform->TransformInTimeFrame(i, j, clin.getPad(), clin.getTime(), x, y, z, continuousMaxTimeBin); } - auto& clout = clusters[i].get()[nClSlice]; + auto& clout = clusters[i].get()[nClSector]; clout.x = x; clout.y = y; clout.z = z; @@ -76,7 +76,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative clout.amp = clin.qTot; clout.flags = clin.getFlags(); clout.id = offset + k; - nClSlice++; + nClSector++; } native->clusterOffset[i][j] = offset; offset += native->nClusters[i][j]; @@ -87,7 +87,7 @@ void GPUReconstructionConvert::ConvertNativeToClusterData(o2::tpc::ClusterNative void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters) { memset((void*)&native, 0, sizeof(native)); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < nRawClusters[i]; j++) { native.nClusters[i][rawClusters[i][j].GetPadRow()]++; } @@ -96,7 +96,7 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce nativeBuffer.reset(new ClusterNative[native.nClustersTotal]); native.clustersLinear = nativeBuffer.get(); native.setOffsetPtrs(); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { native.nClusters[i][j] = 0; } @@ -117,7 +117,7 @@ void GPUReconstructionConvert::ConvertRun2RawToNative(o2::tpc::ClusterNativeAcce int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& native) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < native.nClusters[i][j]; k++) { if (native.clusters[i][j][k].getTime() > retVal) { @@ -132,7 +132,7 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const ClusterNativeAccess& nativ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& digits) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t k = 0; k < digits.nTPCDigits[i]; k++) { if (digits.tpcDigits[i][k].getTimeStamp() > retVal) { retVal = digits.tpcDigits[i][k].getTimeStamp(); @@ -145,12 +145,12 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutDigits& di int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspages) { float retVal = 0; - for (uint32_t i = 0; i < NSLICES; i++) { - int32_t firstHBF = zspages.slice[i].count[0] ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)zspages.slice[i].zsPtr[0][0]) : 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + int32_t firstHBF = zspages.sector[i].count[0] ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)zspages.sector[i].zsPtr[0][0]) : 0; for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < zspages.slice[i].count[j]; k++) { - const char* page = (const char*)zspages.slice[i].zsPtr[j][k]; - for (uint32_t l = 0; l < zspages.slice[i].nZSPtr[j][k]; l++) { + for (uint32_t k = 0; k < zspages.sector[i].count[j]; k++) { + const char* page = (const char*)zspages.sector[i].zsPtr[j][k]; + for (uint32_t l = 0; l < zspages.sector[i].nZSPtr[j][k]; l++) { o2::header::RAWDataHeader* rdh = (o2::header::RAWDataHeader*)(page + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); TPCZSHDR* hdr = (TPCZSHDR*)(page + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); int32_t nTimeBinSpan = hdr->nTimeBinSpan; @@ -1328,8 +1328,8 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr buffer[NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) { + std::vector buffer[NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) { for (uint32_t i = range.begin(); i < range.end(); i++) { std::vector tmpBuffer; red.digitsInput += ZSEncoderGetNDigits(in, i); @@ -1392,7 +1392,7 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptrreset(new uint64_t[reduced.totalPages * TPCZSHDR::TPC_ZS_PAGE_SIZE / sizeof(uint64_t)]); uint64_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { memcpy((char*)outBuffer->get() + offset, buffer[i][j].data(), buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE); offset += buffer[i][j].size() * TPCZSHDR::TPC_ZS_PAGE_SIZE; @@ -1417,20 +1417,20 @@ template void GPUReconstructionConvert::RunZSEncoder(const DigitArra void GPUReconstructionConvert::RunZSEncoderCreateMeta(const uint64_t* buffer, const uint32_t* sizes, void** ptrs, GPUTrackingInOutZS* out) { uint64_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j] = (char*)buffer + offset; offset += sizes[i * GPUTrackingInOutZS::NENDPOINTS + j] * TPCZSHDR::TPC_ZS_PAGE_SIZE; - out->slice[i].zsPtr[j] = &ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j]; - out->slice[i].nZSPtr[j] = &sizes[i * GPUTrackingInOutZS::NENDPOINTS + j]; - out->slice[i].count[j] = 1; + out->sector[i].zsPtr[j] = &ptrs[i * GPUTrackingInOutZS::NENDPOINTS + j]; + out->sector[i].nZSPtr[j] = &sizes[i * GPUTrackingInOutZS::NENDPOINTS + j]; + out->sector[i].count[j] = 1; } } } void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* buffers, const o2::tpc::Digit* const* ptrs, size_t* nsb, const size_t* ns, const GPUParam& param, bool zs12bit, float threshold) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { if (buffers[i].get() != ptrs[i] || nsb != ns) { throw std::runtime_error("Not owning digits"); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h index b8aedbcde582b..28e4552ba3849 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h @@ -50,7 +50,7 @@ struct GPUTrackingInOutZS; class GPUReconstructionConvert { public: - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; static void ConvertNativeToClusterData(o2::tpc::ClusterNativeAccess* native, std::unique_ptr* clusters, uint32_t* nClusters, const TPCFastTransform* transform, int32_t continuousMaxTimeBin = 0); static void ConvertRun2RawToNative(o2::tpc::ClusterNativeAccess& native, std::unique_ptr& nativeBuffer, const AliHLTTPCRawCluster** rawClusters, uint32_t* nRawClusters); template diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 661ff81fbbd07..64d9351b447e2 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUTPCTracker.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 1381fd0f76981..a279c6c0c2508 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -52,7 +52,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); - virtual const GPUTPCTracker* CPUTracker(int32_t iSlice) { return &processors()->tpcTrackers[iSlice]; } + virtual const GPUTPCTracker* CPUTracker(int32_t iSector) { return &processors()->tpcTrackers[iSector]; } int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override = 0; size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override; diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index d4502b978ef5b..b35613f3bec59 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -36,8 +36,8 @@ if (mProcessingSettings.trackletSelectorInPipeline < 0) { \ mProcessingSettings.trackletSelectorInPipeline = GPUCA_SELECTOR_IN_PIPELINE; \ } \ - if (mProcessingSettings.trackletSelectorSlices < 0) { \ - mProcessingSettings.trackletSelectorSlices = GPUCA_TRACKLET_SELECTOR_SLICE_COUNT; \ + if (mProcessingSettings.trackletSelectorSectors < 0) { \ + mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ } \ if (mProcessingSettings.alternateBorderSort < 0) { \ mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 295e6e1a5d9b7..41abc8725c07b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -53,31 +53,31 @@ #define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) // GPU Kernel entry point for single sector #define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSlice_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__); #else #define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSlice_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif // GPU Kernel entry point for multiple sector #define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSlice, int32_t nSliceCount GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__); #else #define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - const int32_t iSlice_internal = nSliceCount * (get_group_id(0) + (get_num_groups(0) % nSliceCount != 0 && nSliceCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ - const int32_t nSliceBlockOffset = get_num_groups(0) * iSlice_internal / nSliceCount; \ - const int32_t sliceBlockId = get_group_id(0) - nSliceBlockOffset; \ - const int32_t sliceGridDim = get_num_groups(0) * (iSlice_internal + 1) / nSliceCount - get_num_groups(0) * (iSlice_internal) / nSliceCount; \ + const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ + const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \ + const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \ + const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(sliceGridDim, get_local_size(0), sliceBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSlice + iSlice_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index b064a43697a67..62ad99f7fa606 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -144,7 +144,7 @@ HighResTimer& GPUReconstructionProcessing::getKernelTimer(RecoStep step, int32_t static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id, increment); if (timer == nullptr) { - timer = insertTimer(id, GetKernelName(), -1, NSLICES, 0, step); + timer = insertTimer(id, GetKernelName(), -1, NSECTORS, 0, step); } if (addMemorySize) { timer->memSize += addMemorySize; diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index fbca43e03781a..4693a1eff24f2 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -72,23 +72,23 @@ int32_t GPUReconstructionTimeframe::ReadEventShifted(int32_t iEvent, float shift mReadEvent(iEvent); if (config.overlayRaw) { float shiftTTotal = (((double)config.timeFrameLen - DRIFT_TIME) * ((double)TPCZ / (double)DRIFT_TIME) - shiftZ) / mChain->GetTPCTransformHelper()->getCorrMap()->getVDrift(); - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (uint32_t j = 0; j < mChain->mIOPtrs.nRawClusters[iSlice]; j++) { - auto& tmp = mChain->mIOMem.rawClusters[iSlice][j]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t j = 0; j < mChain->mIOPtrs.nRawClusters[iSector]; j++) { + auto& tmp = mChain->mIOMem.rawClusters[iSector][j]; tmp.fTime += shiftTTotal; } } } if (shiftZ != 0.f) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (uint32_t j = 0; j < mChain->mIOPtrs.nClusterData[iSlice]; j++) { - auto& tmp = mChain->mIOMem.clusterData[iSlice][j]; - tmp.z += iSlice < NSLICES / 2 ? shiftZ : -shiftZ; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t j = 0; j < mChain->mIOPtrs.nClusterData[iSector]; j++) { + auto& tmp = mChain->mIOMem.clusterData[iSector][j]; + tmp.z += iSector < NSECTORS / 2 ? shiftZ : -shiftZ; } } for (uint32_t i = 0; i < mChain->mIOPtrs.nMCInfosTPC; i++) { auto& tmp = mChain->mIOMem.mcInfosTPC[i]; - tmp.z += i < NSLICES / 2 ? shiftZ : -shiftZ; + tmp.z += i < NSECTORS / 2 ? shiftZ : -shiftZ; } } @@ -97,40 +97,40 @@ int32_t GPUReconstructionTimeframe::ReadEventShifted(int32_t iEvent, float shift uint32_t removed = 0; if (minZ > -1e6 || maxZ > -1e6) { uint32_t currentClusterTotal = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t currentClusterSlice = 0; - bool doRaw = config.overlayRaw && mChain->mIOPtrs.nClusterData[iSlice] == mChain->mIOPtrs.nRawClusters[iSlice]; - for (uint32_t i = 0; i < mChain->mIOPtrs.nClusterData[iSlice]; i++) { - float sign = iSlice < NSLICES / 2 ? 1 : -1; - if (sign * mChain->mIOMem.clusterData[iSlice][i].z >= minZ && sign * mChain->mIOMem.clusterData[iSlice][i].z <= maxZ) { - if (currentClusterSlice != i) { - mChain->mIOMem.clusterData[iSlice][currentClusterSlice] = mChain->mIOMem.clusterData[iSlice][i]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t currentClusterSector = 0; + bool doRaw = config.overlayRaw && mChain->mIOPtrs.nClusterData[iSector] == mChain->mIOPtrs.nRawClusters[iSector]; + for (uint32_t i = 0; i < mChain->mIOPtrs.nClusterData[iSector]; i++) { + float sign = iSector < NSECTORS / 2 ? 1 : -1; + if (sign * mChain->mIOMem.clusterData[iSector][i].z >= minZ && sign * mChain->mIOMem.clusterData[iSector][i].z <= maxZ) { + if (currentClusterSector != i) { + mChain->mIOMem.clusterData[iSector][currentClusterSector] = mChain->mIOMem.clusterData[iSector][i]; if (doRaw) { - mChain->mIOMem.rawClusters[iSlice][currentClusterSlice] = mChain->mIOMem.rawClusters[iSlice][i]; + mChain->mIOMem.rawClusters[iSector][currentClusterSector] = mChain->mIOMem.rawClusters[iSector][i]; } } if (mChain->mIOPtrs.nMCLabelsTPC > currentClusterTotal && nClusters != currentClusterTotal) { mChain->mIOMem.mcLabelsTPC[nClusters] = mChain->mIOMem.mcLabelsTPC[currentClusterTotal]; } - // GPUInfo("Keeping Cluster ID %d (ID in slice %d) Z=%f (sector %d) --> %d (slice %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSlice][i].fZ, iSlice, nClusters, currentClusterSlice); - currentClusterSlice++; + // GPUInfo("Keeping Cluster ID %d (ID in sector %d) Z=%f (sector %d) --> %d (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSector][i].fZ, iSector, nClusters, currentClusterSector); + currentClusterSector++; nClusters++; } else { - // GPUInfo("Removing Cluster ID %d (ID in slice %d) Z=%f (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSlice][i].fZ, iSlice); + // GPUInfo("Removing Cluster ID %d (ID in sector %d) Z=%f (sector %d)", currentClusterTotal, i, mChain->mIOMem.clusterData[iSector][i].fZ, iSector); removed++; } currentClusterTotal++; } - mChain->mIOPtrs.nClusterData[iSlice] = currentClusterSlice; + mChain->mIOPtrs.nClusterData[iSector] = currentClusterSector; if (doRaw) { - mChain->mIOPtrs.nRawClusters[iSlice] = currentClusterSlice; + mChain->mIOPtrs.nRawClusters[iSector] = currentClusterSector; } } if (mChain->mIOPtrs.nMCLabelsTPC) { mChain->mIOPtrs.nMCLabelsTPC = nClusters; } } else { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { nClusters += mChain->mIOPtrs.nClusterData[i]; } } @@ -151,7 +151,7 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() mChain->ClearIOPointers(); for (uint32_t i = 0; i < mShiftedEvents.size(); i++) { auto& ptr = std::get<0>(mShiftedEvents[i]); - for (uint32_t j = 0; j < NSLICES; j++) { + for (uint32_t j = 0; j < NSECTORS; j++) { mChain->mIOPtrs.nClusterData[j] += ptr.nClusterData[j]; if (config.overlayRaw) { mChain->mIOPtrs.nRawClusters[j] += ptr.nRawClusters[j]; @@ -164,9 +164,9 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() } uint32_t nClustersTotal = 0; uint32_t nClustersTotalRaw = 0; - uint32_t nClustersSliceOffset[NSLICES] = {0}; - for (uint32_t i = 0; i < NSLICES; i++) { - nClustersSliceOffset[i] = nClustersTotal; + uint32_t nClustersSectorOffset[NSECTORS] = {0}; + for (uint32_t i = 0; i < NSECTORS; i++) { + nClustersSectorOffset[i] = nClustersTotal; nClustersTotal += mChain->mIOPtrs.nClusterData[i]; nClustersTotalRaw += mChain->mIOPtrs.nRawClusters[i]; } @@ -183,23 +183,23 @@ void GPUReconstructionTimeframe::MergeShiftedEvents() uint32_t nTrackOffset = 0; uint32_t nColOffset = 0; - uint32_t nClustersEventOffset[NSLICES] = {0}; + uint32_t nClustersEventOffset[NSECTORS] = {0}; for (uint32_t i = 0; i < mShiftedEvents.size(); i++) { auto& ptr = std::get<0>(mShiftedEvents[i]); uint32_t inEventOffset = 0; - for (uint32_t j = 0; j < NSLICES; j++) { + for (uint32_t j = 0; j < NSECTORS; j++) { memcpy((void*)&mChain->mIOMem.clusterData[j][nClustersEventOffset[j]], (void*)ptr.clusterData[j], ptr.nClusterData[j] * sizeof(ptr.clusterData[j][0])); if (nClustersTotalRaw) { memcpy((void*)&mChain->mIOMem.rawClusters[j][nClustersEventOffset[j]], (void*)ptr.rawClusters[j], ptr.nRawClusters[j] * sizeof(ptr.rawClusters[j][0])); } if (mChain->mIOPtrs.nMCLabelsTPC) { - memcpy((void*)&mChain->mIOMem.mcLabelsTPC[nClustersSliceOffset[j] + nClustersEventOffset[j]], (void*)&ptr.mcLabelsTPC[inEventOffset], ptr.nClusterData[j] * sizeof(ptr.mcLabelsTPC[0])); + memcpy((void*)&mChain->mIOMem.mcLabelsTPC[nClustersSectorOffset[j] + nClustersEventOffset[j]], (void*)&ptr.mcLabelsTPC[inEventOffset], ptr.nClusterData[j] * sizeof(ptr.mcLabelsTPC[0])); } for (uint32_t k = 0; k < ptr.nClusterData[j]; k++) { - mChain->mIOMem.clusterData[j][nClustersEventOffset[j] + k].id = nClustersSliceOffset[j] + nClustersEventOffset[j] + k; + mChain->mIOMem.clusterData[j][nClustersEventOffset[j] + k].id = nClustersSectorOffset[j] + nClustersEventOffset[j] + k; if (mChain->mIOPtrs.nMCLabelsTPC) { for (int32_t l = 0; l < 3; l++) { - auto& label = mChain->mIOMem.mcLabelsTPC[nClustersSliceOffset[j] + nClustersEventOffset[j] + k].fClusterID[l]; + auto& label = mChain->mIOMem.mcLabelsTPC[nClustersSectorOffset[j] + nClustersEventOffset[j] + k].fClusterID[l]; if (label.fMCID >= 0) { label.fMCID += nTrackOffset; } @@ -364,9 +364,9 @@ int32_t GPUReconstructionTimeframe::LoadMergedEvents(int32_t iEvent) void GPUReconstructionTimeframe::SetDisplayInformation(int32_t iCol) { if (mChain->GetEventDisplay()) { - for (uint32_t sl = 0; sl < NSLICES; sl++) { + for (uint32_t sl = 0; sl < NSECTORS; sl++) { mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, sl, mChain->mIOPtrs.nClusterData[sl]); } - mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, NSLICES, mChain->mIOPtrs.nMCInfosTPC); + mChain->GetEventDisplay()->SetCollisionFirstCluster(iCol, NSECTORS, mChain->mIOPtrs.nMCInfosTPC); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h index a4f2e055da2c3..9592f549bcc4b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h @@ -48,7 +48,7 @@ class GPUReconstructionTimeframe static constexpr int32_t TIME_ORBIT = 1000000000 / ORBIT_RATE; private: - constexpr static uint32_t NSLICES = GPUReconstruction::NSLICES; + constexpr static uint32_t NSECTORS = GPUReconstruction::NSECTORS; void SetDisplayInformation(int32_t iCol); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 16a13f07a7bf9..b195b375b4503 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -280,8 +280,8 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #ifdef GPUCA_USE_TEXTURES - if (GPUCA_SLICE_DATA_MEMORY * NSLICES > (size_t)deviceProp.maxTexture1DLinear) { - GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SLICE_DATA_MEMORY * NSLICES)); + if (GPUCA_SECTOR_DATA_MEMORY * NSECTORS > (size_t)deviceProp.maxTexture1DLinear) { + GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SECTOR_DATA_MEMORY * NSECTORS)); return (1); } #endif @@ -671,9 +671,9 @@ int32_t GPUReconstructionCUDA::PrepareTextures() #ifdef GPUCA_USE_TEXTURES cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); size_t offset; - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSLICES * GPUCA_SLICE_DATA_MEMORY)); + GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSLICES * GPUCA_SLICE_DATA_MEMORY)); + GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); #endif return (0); } diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index f9c3aacdb00cb..308122a2c5a59 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -30,25 +30,25 @@ endif() # General sources set(SRCS - SliceTracker/GPUTPCTrack.cxx - SliceTracker/GPUTPCTrackParam.cxx - SliceTracker/GPUTPCStartHitsFinder.cxx - SliceTracker/GPUTPCStartHitsSorter.cxx - SliceTracker/GPUTPCNeighboursCleaner.cxx - SliceTracker/GPUTPCTracker.cxx - SliceTracker/GPUTPCSliceData.cxx - SliceTracker/GPUTPCSliceOutput.cxx - SliceTracker/GPUTPCTrackletConstructor.cxx - SliceTracker/GPUTPCSectorDebugSortKernels.cxx - SliceTracker/GPUTPCCreateOccupancyMap.cxx - SliceTracker/GPUTPCNeighboursFinder.cxx - SliceTracker/GPUTPCGrid.cxx - SliceTracker/GPUTPCTrackletSelector.cxx - SliceTracker/GPUTPCRow.cxx - SliceTracker/GPUTPCExtrapolationTracking.cxx - SliceTracker/GPUTPCCreateSliceData.cxx + SectorTracker/GPUTPCTrack.cxx + SectorTracker/GPUTPCTrackParam.cxx + SectorTracker/GPUTPCStartHitsFinder.cxx + SectorTracker/GPUTPCStartHitsSorter.cxx + SectorTracker/GPUTPCNeighboursCleaner.cxx + SectorTracker/GPUTPCTracker.cxx + SectorTracker/GPUTPCTrackingData.cxx + SectorTracker/GPUTPCSectorOutput.cxx + SectorTracker/GPUTPCTrackletConstructor.cxx + SectorTracker/GPUTPCSectorDebugSortKernels.cxx + SectorTracker/GPUTPCCreateOccupancyMap.cxx + SectorTracker/GPUTPCNeighboursFinder.cxx + SectorTracker/GPUTPCGrid.cxx + SectorTracker/GPUTPCTrackletSelector.cxx + SectorTracker/GPUTPCRow.cxx + SectorTracker/GPUTPCExtrapolationTracking.cxx + SectorTracker/GPUTPCCreateTrackingData.cxx Merger/GPUTPCGMMerger.cxx - Merger/GPUTPCGMSliceTrack.cxx + Merger/GPUTPCGMSectorTrack.cxx Merger/GPUTPCGMTrackParam.cxx Merger/GPUTPCGMPropagator.cxx Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -62,7 +62,7 @@ set(SRCS set(SRCS_DATATYPES DataTypes/GPUDataTypes.cxx DataTypes/GPUConfigDump.cxx DataTypes/GPUTPCGMPolynomialField.cxx) -set(HDRS_CINT_O2 Merger/GPUTPCGMTrackParam.h Merger/GPUTPCGMMergedTrack.h Merger/GPUTPCGMSliceTrack.h Merger/GPUTPCGMBorderTrack.h TRDTracking/GPUTRDInterfaces.h) +set(HDRS_CINT_O2 Merger/GPUTPCGMTrackParam.h Merger/GPUTPCGMMergedTrack.h Merger/GPUTPCGMSectorTrack.h Merger/GPUTPCGMBorderTrack.h TRDTracking/GPUTRDInterfaces.h) set(HDRS_CINT_DATATYPES DataTypes/GPUTPCGMMergedTrackHit.h) set(HDRS_CINT_O2_ADDITIONAL DataTypes/GPUSettings.h Definitions/GPUSettingsList.h DataTypes/GPUDataTypes.h DataTypes/GPUTRDTrack.h DataTypes/CalibdEdxTrackTopologyPol.h DataTypes/CalibdEdxTrackTopologySpline.h) # Manual dependencies for ROOT dictionary generation @@ -88,14 +88,14 @@ set(SRCS_NO_CINT Debug/GPUTPCClusterFilter.cxx utils/timer.cxx) -set(SRCS_NO_H SliceTracker/GPUTPCTrackerDump.cxx +set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Merger/GPUTPCGMMergerDump.cxx Base/GPUReconstructionLibrary.cxx Global/GPUChainTrackingClusterizer.cxx Global/GPUChainTrackingTransformation.cxx Global/GPUChainTrackingTRD.cxx Global/GPUChainTrackingRefit.cxx - Global/GPUChainTrackingSliceTracker.cxx + Global/GPUChainTrackingSectorTracker.cxx Global/GPUChainTrackingMerger.cxx Global/GPUChainTrackingCompression.cxx Global/GPUChainTrackingDebugAndProfiling.cxx @@ -139,15 +139,15 @@ set(HDRS_INSTALL qa/GPUQAHelper.h qconfigoptions.h Refit/GPUTrackParamConvert.h - SliceTracker/GPUTPCBaseTrackParam.h - SliceTracker/GPUTPCClusterData.h - SliceTracker/GPUTPCDef.h - SliceTracker/GPUTPCHit.h - SliceTracker/GPUTPCHitId.h - SliceTracker/GPUTPCMCInfo.h - SliceTracker/GPUTPCSliceOutCluster.h - SliceTracker/GPUTPCTracklet.h - SliceTracker/GPUTPCTrackLinearisation.h + SectorTracker/GPUTPCBaseTrackParam.h + SectorTracker/GPUTPCClusterData.h + SectorTracker/GPUTPCDef.h + SectorTracker/GPUTPCHit.h + SectorTracker/GPUTPCHitId.h + SectorTracker/GPUTPCMCInfo.h + SectorTracker/GPUTPCSectorOutCluster.h + SectorTracker/GPUTPCTracklet.h + SectorTracker/GPUTPCTrackLinearisation.h TPCConvert/GPUTPCConvertImpl.h TRDTracking/GPUTRDGeometry.h TRDTracking/GPUTRDInterfaces.h @@ -298,7 +298,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") Definitions DataTypes Base - SliceTracker + SectorTracker TPCConvert dEdx ITS diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 794f4cb485f14..254bf1797bdd6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -113,10 +113,10 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* mDecoder.decompress(clustersCompressed, clustersNativeDecoded, allocator, param, true); std::vector tmpClusters; if (param.rec.tpc.rejectionStrategy == GPUSettings::RejectionNone) { // verification does not make sense if we reject clusters during compression - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { if (clustersNative->nClusters[i][j] != clustersNativeDecoded.nClusters[i][j]) { - GPUError("Number of clusters mismatch slice %u row %u: expected %d v.s. decoded %d", i, j, clustersNative->nClusters[i][j], clustersNativeDecoded.nClusters[i][j]); + GPUError("Number of clusters mismatch sector %u row %u: expected %d v.s. decoded %d", i, j, clustersNative->nClusters[i][j], clustersNativeDecoded.nClusters[i][j]); decodingErrors++; continue; } @@ -136,7 +136,7 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* const o2::tpc::ClusterNative& c2 = clustersNativeDecoded.clusters[i][j][k]; if (c1.timeFlagsPacked != c2.timeFlagsPacked || c1.padPacked != c2.padPacked || c1.sigmaTimePacked != c2.sigmaTimePacked || c1.sigmaPadPacked != c2.sigmaPadPacked || c1.qMax != c2.qMax || c1.qTot != c2.qTot) { if (decodingErrors++ < 100) { - GPUWarning("Cluster mismatch: slice %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTot); + GPUWarning("Cluster mismatch: sector %2u row %3u hit %5u: %6d %3d %4d %3d %3d %4d %4d", i, j, k, (int32_t)c1.getTimePacked(), (int32_t)c1.getFlags(), (int32_t)c1.padPacked, (int32_t)c1.sigmaTimePacked, (int32_t)c1.sigmaPadPacked, (int32_t)c1.qMax, (int32_t)c1.qTot); GPUWarning("%45s %6d %3d %4d %3d %3d %4d %4d", "", (int32_t)c2.getTimePacked(), (int32_t)c2.getFlags(), (int32_t)c2.padPacked, (int32_t)c2.sigmaTimePacked, (int32_t)c2.sigmaPadPacked, (int32_t)c2.qMax, (int32_t)c2.qTot); } } @@ -155,14 +155,14 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* FillStatistic(mPqMaxA, clustersCompressed->qMaxA, clustersCompressed->nAttachedClusters); FillStatistic(mPflagsA, clustersCompressed->flagsA, clustersCompressed->nAttachedClusters); FillStatistic(mProwDiffA, clustersCompressed->rowDiffA, clustersCompressed->nAttachedClustersReduced); - FillStatistic(mPsliceLegDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced); + FillStatistic(mPsectorLegDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPpadResA, clustersCompressed->padResA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPtimeResA, clustersCompressed->timeResA, clustersCompressed->nAttachedClustersReduced); FillStatistic(mPsigmaPadA, clustersCompressed->sigmaPadA, clustersCompressed->nAttachedClusters); FillStatistic(mPsigmaTimeA, clustersCompressed->sigmaTimeA, clustersCompressed->nAttachedClusters); FillStatistic(mPqPtA, clustersCompressed->qPtA, clustersCompressed->nTracks); FillStatistic(mProwA, clustersCompressed->rowA, clustersCompressed->nTracks); - FillStatistic(mPsliceA, clustersCompressed->sliceA, clustersCompressed->nTracks); + FillStatistic(mPsectorA, clustersCompressed->sliceA, clustersCompressed->nTracks); FillStatistic(mPtimeA, clustersCompressed->timeA, clustersCompressed->nTracks); FillStatistic(mPpadA, clustersCompressed->padA, clustersCompressed->nTracks); FillStatistic(mPqTotU, clustersCompressed->qTotU, clustersCompressed->nUnattachedClusters); @@ -173,12 +173,12 @@ void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* FillStatistic(mPsigmaPadU, clustersCompressed->sigmaPadU, clustersCompressed->nUnattachedClusters); FillStatistic(mPsigmaTimeU, clustersCompressed->sigmaTimeU, clustersCompressed->nUnattachedClusters); FillStatistic(mPnTrackClusters, clustersCompressed->nTrackClusters, clustersCompressed->nTracks); - FillStatistic(mPnSliceRowClusters, clustersCompressed->nSliceRowClusters, clustersCompressed->nSliceRows); + FillStatistic(mPnSectorRowClusters, clustersCompressed->nSliceRowClusters, clustersCompressed->nSliceRows); FillStatisticCombined(mPsigmaA, clustersCompressed->sigmaPadA, clustersCompressed->sigmaTimeA, clustersCompressed->nAttachedClusters, P_MAX_SIGMA); FillStatisticCombined(mPsigmaU, clustersCompressed->sigmaPadU, clustersCompressed->sigmaTimeU, clustersCompressed->nUnattachedClusters, P_MAX_SIGMA); FillStatisticCombined(mPQA, clustersCompressed->qMaxA, clustersCompressed->qTotA, clustersCompressed->nAttachedClusters, P_MAX_QMAX); FillStatisticCombined(mPQU, clustersCompressed->qMaxU, clustersCompressed->qTotU, clustersCompressed->nUnattachedClusters, P_MAX_QMAX); - FillStatisticCombined(mProwSliceA, clustersCompressed->rowDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced, GPUCA_ROW_COUNT); + FillStatisticCombined(mProwSectorA, clustersCompressed->rowDiffA, clustersCompressed->sliceLegDiffA, clustersCompressed->nAttachedClustersReduced, GPUCA_ROW_COUNT); mNTotalClusters += clustersCompressed->nAttachedClusters + clustersCompressed->nUnattachedClusters; } @@ -195,15 +195,15 @@ void GPUTPCClusterStatistics::Finish() double eQ = Analyze(mPqTotA, "qTot Attached", false); eQ += Analyze(mPqMaxA, "qMax Attached", false); Analyze(mPflagsA, "flags Attached"); - double eRowSlice = Analyze(mProwDiffA, "rowDiff Attached", false); - eRowSlice += Analyze(mPsliceLegDiffA, "sliceDiff Attached", false); + double eRowSector = Analyze(mProwDiffA, "rowDiff Attached", false); + eRowSector += Analyze(mPsectorLegDiffA, "sectorDiff Attached", false); Analyze(mPpadResA, "padRes Attached"); Analyze(mPtimeResA, "timeRes Attached"); double eSigma = Analyze(mPsigmaPadA, "sigmaPad Attached", false); eSigma += Analyze(mPsigmaTimeA, "sigmaTime Attached", false); Analyze(mPqPtA, "qPt Attached"); Analyze(mProwA, "row Attached"); - Analyze(mPsliceA, "slice Attached"); + Analyze(mPsectorA, "sector Attached"); Analyze(mPtimeA, "time Attached"); Analyze(mPpadA, "pad Attached"); eQ += Analyze(mPqTotU, "qTot Unattached", false); @@ -214,14 +214,14 @@ void GPUTPCClusterStatistics::Finish() eSigma += Analyze(mPsigmaPadU, "sigmaPad Unattached", false); eSigma += Analyze(mPsigmaTimeU, "sigmaTime Unattached", false); Analyze(mPnTrackClusters, "nClusters in Track"); - Analyze(mPnSliceRowClusters, "nClusters in Row"); + Analyze(mPnSectorRowClusters, "nClusters in Row"); double eSigmaCombined = Analyze(mPsigmaA, "combined sigma Attached"); eSigmaCombined += Analyze(mPsigmaU, "combined sigma Unattached"); double eQCombined = Analyze(mPQA, "combined Q Attached"); eQCombined += Analyze(mPQU, "combined Q Unattached"); - double eRowSliceCombined = Analyze(mProwSliceA, "combined row/slice Attached"); + double eRowSectorCombined = Analyze(mProwSectorA, "combined row/sector Attached"); - GPUInfo("Combined Row/Slice: %6.4f --> %6.4f (%6.4f%%)", eRowSlice, eRowSliceCombined, eRowSlice > 1e-1 ? (100. * (eRowSlice - eRowSliceCombined) / eRowSlice) : 0.f); + GPUInfo("Combined Row/Sector: %6.4f --> %6.4f (%6.4f%%)", eRowSector, eRowSectorCombined, eRowSector > 1e-1 ? (100. * (eRowSector - eRowSectorCombined) / eRowSector) : 0.f); GPUInfo("Combined Sigma: %6.4f --> %6.4f (%6.4f%%)", eSigma, eSigmaCombined, eSigma > 1e-3 ? (100. * (eSigma - eSigmaCombined) / eSigma) : 0.f); GPUInfo("Combined Q: %6.4f --> %6.4f (%6.4f%%)", eQ, eQCombined, eQ > 1e-3 ? (100. * (eQ - eQCombined) / eQ) : 0.f); diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h index 7c873fa67f522..1dfb958750bef 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.h @@ -29,7 +29,7 @@ namespace o2::gpu class GPUTPCClusterStatistics { public: - static constexpr uint32_t NSLICES = GPUCA_NSLICES; + static constexpr uint32_t NSECTORS = GPUCA_NSECTORS; void RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param); void Finish(); @@ -55,14 +55,14 @@ class GPUTPCClusterStatistics std::vector mPqMaxA = std::vector(P_MAX_QMAX, 0); std::vector mPflagsA = std::vector(P_MAX_FLAGS, 0); std::vector mProwDiffA = std::vector(GPUCA_ROW_COUNT, 0); - std::vector mPsliceLegDiffA = std::vector(GPUCA_NSLICES * 2, 0); + std::vector mPsectorLegDiffA = std::vector(GPUCA_NSECTORS * 2, 0); std::vector mPpadResA = std::vector(P_MAX_PAD, 0); std::vector mPtimeResA = std::vector(P_MAX_TIME, 0); std::vector mPsigmaPadA = std::vector(P_MAX_SIGMA, 0); std::vector mPsigmaTimeA = std::vector(P_MAX_SIGMA, 0); std::vector mPqPtA = std::vector(P_MAX_QPT, 0); std::vector mProwA = std::vector(GPUCA_ROW_COUNT, 0); - std::vector mPsliceA = std::vector(GPUCA_NSLICES, 0); + std::vector mPsectorA = std::vector(GPUCA_NSECTORS, 0); std::vector mPtimeA = std::vector(P_MAX_TIME, 0); std::vector mPpadA = std::vector(P_MAX_PAD, 0); std::vector mPqTotU = std::vector(P_MAX_QTOT, 0); @@ -73,12 +73,12 @@ class GPUTPCClusterStatistics std::vector mPsigmaPadU = std::vector(P_MAX_SIGMA, 0); std::vector mPsigmaTimeU = std::vector(P_MAX_SIGMA, 0); std::vector mPnTrackClusters; - std::vector mPnSliceRowClusters; + std::vector mPnSectorRowClusters; std::vector mPsigmaU = std::vector(P_MAX_SIGMA * P_MAX_SIGMA, 0); std::vector mPsigmaA = std::vector(P_MAX_SIGMA * P_MAX_SIGMA, 0); std::vector mPQU = std::vector(P_MAX_QMAX * P_MAX_QTOT, 0); std::vector mPQA = std::vector(P_MAX_QMAX * P_MAX_QTOT, 0); - std::vector mProwSliceA = std::vector(GPUCA_ROW_COUNT * GPUCA_NSLICES * 2, 0); + std::vector mProwSectorA = std::vector(GPUCA_ROW_COUNT * GPUCA_NSECTORS * 2, 0); double mEntropy = 0; double mHuffman = 0; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 8f8137a6307b0..335b201d11d07 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -65,7 +65,7 @@ void GPUTPCCompression::SetPointersCompressedClusters(void*& mem, T& c, uint32_t computePointerWithAlignment(mem, c.timeDiffU, nClU); computePointerWithAlignment(mem, c.sigmaPadU, nClU); computePointerWithAlignment(mem, c.sigmaTimeU, nClU); - computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES); + computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSECTORS); uint32_t nClAreduced = reducedClA ? nClA - nTr : nClA; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index 9a5d6436f06af..c1d9fe283fbea 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -64,7 +64,7 @@ class GPUTPCCompression : public GPUProcessor uint32_t nStoredUnattachedClusters = 0; }; - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; o2::tpc::CompressedClustersPtrs mPtrs; o2::tpc::CompressedClusters* mOutput = nullptr; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 0f5936095fdc3..966bffa963c7e 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -44,7 +44,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread processors.param.rec.tpc.rejectQPtB5 || trk.MergedLooper(); uint32_t nClustersStored = 0; CompressedClustersPtrs& GPUrestrict() c = compressor.mPtrs; - uint8_t lastRow = 0, lastSlice = 0; + uint8_t lastRow = 0, lastSector = 0; GPUTPCCompressionTrackModel track; float zOffset = 0; for (int32_t k = trk.NClusters() - 1; k >= 0; k--) { @@ -67,18 +67,18 @@ GPUdii() void GPUTPCCompressionKernels::Threadclusters[hit.slice][hit.row][hit.num - clusters->clusterOffset[hit.slice][hit.row]]; + const ClusterNative& GPUrestrict() orgCl = clusters -> clusters[hit.sector][hit.row][hit.num - clusters->clusterOffset[hit.sector][hit.row]]; float x = param.tpcGeometry.Row2X(hit.row); - float y = track.LinearPad2Y(hit.slice, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); - float z = param.tpcGeometry.LinearTime2Z(hit.slice, orgCl.getTime()); + float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); + float z = param.tpcGeometry.LinearTime2Z(hit.sector, orgCl.getTime()); if (nClustersStored) { - if ((hit.slice < GPUCA_NSLICES) ^ (lastSlice < GPUCA_NSLICES)) { + if ((hit.sector < GPUCA_NSECTORS) ^ (lastSector < GPUCA_NSECTORS)) { break; } if (lastLeg != hit.leg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(hit.row), param.SliceParam[hit.slice].Alpha)) { + if (track.Propagate(param.tpcGeometry.Row2X(hit.row), param.SectorParam[hit.sector].Alpha)) { break; } } @@ -89,35 +89,35 @@ GPUdii() void GPUTPCCompressionKernels::Thread 0 ? 254 : 0); zOffset = z; - track.Init(x, y, z - zOffset, param.SliceParam[hit.slice].Alpha, qpt, param); + track.Init(x, y, z - zOffset, param.SectorParam[hit.sector].Alpha, qpt, param); myTrack = CAMath::AtomicAdd(&compressor.mMemory->nStoredTracks, 1u); compressor.mAttachedClusterFirstIndex[myTrack] = trk.FirstClusterRef(); lastLeg = hit.leg; c.qPtA[myTrack] = qpt; c.rowA[myTrack] = hit.row; - c.sliceA[myTrack] = hit.slice; + c.sliceA[myTrack] = hit.sector; c.timeA[myTrack] = orgCl.getTimePacked(); c.padA[myTrack] = orgCl.padPacked; } else { uint32_t row = hit.row; - uint32_t slice = hit.slice; + uint32_t sector = hit.sector; if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) { if (lastRow > row) { row += GPUCA_ROW_COUNT; } row -= lastRow; - if (lastSlice > slice) { - slice += compressor.NSLICES; + if (lastSector > sector) { + sector += compressor.NSECTORS; } - slice -= lastSlice; + sector -= lastSector; } c.rowDiffA[cidx] = row; - c.sliceLegDiffA[cidx] = (hit.leg == lastLeg ? 0 : compressor.NSLICES) + slice; - float pad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(hit.slice, track.Y(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)))); + c.sliceLegDiffA[cidx] = (hit.leg == lastLeg ? 0 : compressor.NSECTORS) + sector; + float pad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(hit.sector, track.Y(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)))); c.padResA[cidx] = orgCl.padPacked - orgCl.packPad(pad); - float time = CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(hit.slice, track.Z() + zOffset)); + float time = CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(hit.sector, track.Z() + zOffset)); c.timeResA[cidx] = (orgCl.getTimePacked() - orgCl.packTime(time)) & 0xFFFFFF; lastLeg = hit.leg; } @@ -138,7 +138,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadnStoredAttachedClusters, nClustersStored); @@ -185,12 +185,12 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSlice][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSlice][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; - const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSlice])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal + for (int32_t iSectorRow = iBlock; iSectorRow < GPUCA_NSECTORS * GPUCA_ROW_COUNT; iSectorRow += nBlocks) { + const uint32_t iSector = iSectorRow / GPUCA_ROW_COUNT; + const uint32_t iRow = iSectorRow % GPUCA_ROW_COUNT; + const uint32_t idOffset = clusters->clusterOffset[iSector][iRow]; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; } @@ -199,12 +199,12 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSlice][iRow]); + const uint32_t nn = GPUCommonMath::nextMultipleOf(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; int32_t cidx = 0; do { - if (i >= clusters->nClusters[iSlice][iRow]) { + if (i >= clusters->nClusters[iSector][iRow]) { break; } if (compressor.mClusterStatus[idx]) { @@ -253,29 +253,29 @@ GPUdii() void GPUTPCCompressionKernels::Thread idOffsetOutMax) { if (iThread == nThreads - 1) { - compressor.raiseError(GPUErrors::ERROR_COMPRESSION_ROW_HIT_OVERFLOW, iSlice * 1000 + iRow, idOffsetOut + totalCount + count, idOffsetOutMax); + compressor.raiseError(GPUErrors::ERROR_COMPRESSION_ROW_HIT_OVERFLOW, iSector * 1000 + iRow, idOffsetOut + totalCount + count, idOffsetOutMax); } break; } if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) { if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSlice][iRow])); + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); } GPUbarrier(); } for (uint32_t j = get_local_id(0); j < count; j += get_local_size(0)) { int32_t outidx = idOffsetOut + totalCount + j; - const ClusterNative& GPUrestrict() orgCl = clusters->clusters[iSlice][iRow][sortBuffer[j]]; + const ClusterNative& GPUrestrict() orgCl = clusters -> clusters[iSector][iRow][sortBuffer[j]]; int32_t preId = j != 0 ? (int32_t)sortBuffer[j - 1] : (totalCount != 0 ? (int32_t)smem.lastIndex : -1); - GPUTPCCompression_EncodeUnattached(param.rec.tpc.compressionTypeMask, orgCl, c.timeDiffU[outidx], c.padDiffU[outidx], preId == -1 ? nullptr : &clusters->clusters[iSlice][iRow][preId]); + GPUTPCCompression_EncodeUnattached(param.rec.tpc.compressionTypeMask, orgCl, c.timeDiffU[outidx], c.padDiffU[outidx], preId == -1 ? nullptr : &clusters->clusters[iSector][iRow][preId]); uint16_t qtot = orgCl.qTot, qmax = orgCl.qMax; uint8_t sigmapad = orgCl.sigmaPadPacked, sigmatime = orgCl.sigmaTimePacked; @@ -304,7 +304,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadnStoredUnattachedClusters, totalCount); } GPUbarrier(); @@ -530,7 +530,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::ThreadnSliceRowClusters, compressor.mPtrs.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpy(compressor.mOutput->nSliceRowClusters, compressor.mPtrs.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpy(compressor.mOutput->nTrackClusters, compressor.mPtrs.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->qPtA, compressor.mPtrs.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->rowA, compressor.mPtrs.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -549,14 +549,14 @@ GPUdii() void GPUTPCCompressionGatherKernels::ThreadtimeA, compressor.mPtrs.timeA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpy(compressor.mOutput->padA, compressor.mPtrs.padA, compressor.mMemory->nStoredTracks, nThreads, iThread); - uint32_t sliceStart = rowStart / GPUCA_ROW_COUNT; - uint32_t sliceEnd = rowEnd / GPUCA_ROW_COUNT; + uint32_t sectorStart = rowStart / GPUCA_ROW_COUNT; + uint32_t sectorEnd = rowEnd / GPUCA_ROW_COUNT; - uint32_t sliceRowStart = rowStart % GPUCA_ROW_COUNT; - uint32_t sliceRowEnd = rowEnd % GPUCA_ROW_COUNT; + uint32_t sectorRowStart = rowStart % GPUCA_ROW_COUNT; + uint32_t sectorRowEnd = rowEnd % GPUCA_ROW_COUNT; - for (uint32_t i = sliceStart; i <= sliceEnd && i < compressor.NSLICES; i++) { - for (uint32_t j = ((i == sliceStart) ? sliceRowStart : 0); j < ((i == sliceEnd) ? sliceRowEnd : GPUCA_ROW_COUNT); j++) { + for (uint32_t i = sectorStart; i <= sectorEnd && i < compressor.NSECTORS; i++) { + for (uint32_t j = ((i == sectorStart) ? sectorRowStart : 0); j < ((i == sectorEnd) ? sectorRowEnd : GPUCA_ROW_COUNT); j++) { uint32_t nClusters = compressor.mPtrs.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; uint32_t clusterOffsetInCache = clusters->clusterOffset[i][j] * compressor.mMaxClusterFactorBase1024 / 1024; compressorMemcpy(compressor.mOutput->qTotU + rowsOffset, compressor.mPtrs.qTotU + clusterOffsetInCache, nClusters, nLanes, iLane); @@ -636,7 +636,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in auto& input = compressor.mPtrs; auto* output = compressor.mOutput; - uint32_t nRows = compressor.NSLICES * GPUCA_ROW_COUNT; + uint32_t nRows = compressor.NSECTORS * GPUCA_ROW_COUNT; uint32_t rowsPerWarp = (nRows + nGlobalWarps - 1) / nGlobalWarps; uint32_t rowStart = rowsPerWarp * iGlobalWarp; uint32_t rowEnd = CAMath::Min(nRows, rowStart + rowsPerWarp); @@ -661,7 +661,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in uint32_t tracksOffset = calculateWarpOffsets(smem, input.nTrackClusters, trackStart, trackEnd, nWarps, iWarp, nLanes, iLane); if (iBlock == 0) { - compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpyBasic(output->nTrackClusters, input.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->qPtA, input.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->rowA, input.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -671,17 +671,17 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherBuffered(int32_t nBlocks, in } const uint32_t* clusterOffsets = &clusters->clusterOffset[0][0] + rowStart; - const uint32_t* nSliceRowClusters = input.nSliceRowClusters + rowStart; + const uint32_t* nSectorRowClusters = input.nSliceRowClusters + rowStart; auto* buf = smem.getBuffer(iWarp); - compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); const uint16_t* nTrackClustersPtr = input.nTrackClusters + trackStart; const uint32_t* aClsFstIdx = compressor.mAttachedClusterFirstIndex + trackStart; @@ -714,7 +714,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 auto* buf = smem.getBuffer(iWarp); if (iBlock == 0) { - compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSLICES * GPUCA_ROW_COUNT, nThreads, iThread); + compressorMemcpyBasic(output->nSliceRowClusters, input.nSliceRowClusters, compressor.NSECTORS * GPUCA_ROW_COUNT, nThreads, iThread); compressorMemcpyBasic(output->nTrackClusters, input.nTrackClusters, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->qPtA, input.qPtA, compressor.mMemory->nStoredTracks, nThreads, iThread); compressorMemcpyBasic(output->rowA, input.rowA, compressor.mMemory->nStoredTracks, nThreads, iThread); @@ -725,7 +725,7 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 const uint32_t nGlobalWarps = nWarps * (nBlocks - 1) / 2; const uint32_t iGlobalWarp = nWarps * (iBlock - 1) / 2 + iWarp; - const uint32_t nRows = compressor.NSLICES * GPUCA_ROW_COUNT; + const uint32_t nRows = compressor.NSECTORS * GPUCA_ROW_COUNT; uint32_t rowsPerWarp = (nRows + nGlobalWarps - 1) / nGlobalWarps; uint32_t rowStart = rowsPerWarp * iGlobalWarp; uint32_t rowEnd = CAMath::Min(nRows, rowStart + rowsPerWarp); @@ -737,15 +737,15 @@ GPUdii() void GPUTPCCompressionGatherKernels::gatherMulti(int32_t nBlocks, int32 const uint32_t rowsOffset = calculateWarpOffsets(smem, input.nSliceRowClusters, rowStart, rowEnd, nWarps, iWarp, nLanes, iLane); const uint32_t* clusterOffsets = &clusters->clusterOffset[0][0] + rowStart; - const uint32_t* nSliceRowClusters = input.nSliceRowClusters + rowStart; - - compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); - compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSliceRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + const uint32_t* nSectorRowClusters = input.nSliceRowClusters + rowStart; + + compressorMemcpyBuffered(buf, output->qTotU + rowsOffset, input.qTotU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->qMaxU + rowsOffset, input.qMaxU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->flagsU + rowsOffset, input.flagsU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->padDiffU + rowsOffset, input.padDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->timeDiffU + rowsOffset, input.timeDiffU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaPadU + rowsOffset, input.sigmaPadU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); + compressorMemcpyBuffered(buf, output->sigmaTimeU + rowsOffset, input.sigmaTimeU, nSectorRowClusters, clusterOffsets, rowsPerWarp, nLanes, iLane, 0, compressor.mMaxClusterFactorBase1024); } else { const uint32_t nGlobalWarps = nWarps * (nBlocks - 1) / 2; const uint32_t iGlobalWarp = nWarps * (iBlock / 2 - 1) + iWarp; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx index 2d8b69a4be516..1f84aa4599a27 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.cxx @@ -66,7 +66,7 @@ GPUd() int32_t GPUTPCCompressionTrackModel::Mirror() return 0; } -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) #include "GPUTPCTrackLinearisation.h" #include "GPUTPCTracker.h" diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h index b67f544f513bf..b3b4da27e625b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionTrackModel.h @@ -17,7 +17,7 @@ // For debugging purposes, we provide means to use other track models // #define GPUCA_COMPRESSION_TRACK_MODEL_MERGER -// #define GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER +// #define GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER #include "GPUDef.h" @@ -25,7 +25,7 @@ #include "GPUTPCGMPropagator.h" #include "GPUTPCGMTrackParam.h" -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) #include "GPUTPCTrackParam.h" #else // Default internal track model for compression @@ -49,7 +49,7 @@ class GPUTPCCompressionTrackModel GPUd() int32_t Filter(float y, float z, int32_t iRow); GPUd() int32_t Mirror(); -#if defined(GPUCA_COMPRESSION_TRACK_MODEL_MERGER) || defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#if defined(GPUCA_COMPRESSION_TRACK_MODEL_MERGER) || defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) GPUd() float X() const { return mTrk.GetX(); @@ -100,15 +100,15 @@ class GPUTPCCompressionTrackModel GPUd() void getClusterErrors2(int32_t iRow, float z, float sinPhi, float DzDs, float& ErrY2, float& ErrZ2) const; GPUd() void resetCovariance(); - GPUd() float LinearPad2Y(int32_t slice, float pad, float padWidth, uint8_t npads) const + GPUd() float LinearPad2Y(int32_t sector, float pad, float padWidth, uint8_t npads) const { const float u = (pad - 0.5f * npads) * padWidth; - return (slice >= GPUCA_NSLICES / 2) ? -u : u; + return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() float LinearY2Pad(int32_t slice, float y, float padWidth, uint8_t npads) const + GPUd() float LinearY2Pad(int32_t sector, float y, float padWidth, uint8_t npads) const { - const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; + const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; return u / padWidth + 0.5f * npads; } @@ -120,7 +120,7 @@ class GPUTPCCompressionTrackModel GPUTPCGMTrackParam mTrk; const GPUParam* mParam; -#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SLICETRACKER) +#elif defined(GPUCA_COMPRESSION_TRACK_MODEL_SECTORTRACKER) GPUTPCTrackParam mTrk; float mAlpha; const GPUParam* mParam; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx index 16c6cb4b8d61c..fd0c929dd2ba7 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.cxx @@ -39,7 +39,7 @@ void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, uint32 computePointerWithAlignment(mem, c.timeDiffU, nClU); computePointerWithAlignment(mem, c.sigmaPadU, nClU); computePointerWithAlignment(mem, c.sigmaTimeU, nClU); - computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSLICES); + computePointerWithAlignment(mem, c.nSliceRowClusters, GPUCA_ROW_COUNT * NSECTORS); uint32_t nClAreduced = reducedClA ? nClA - nTr : nClA; @@ -67,19 +67,19 @@ void GPUTPCDecompression::SetPointersCompressedClusters(void*& mem, T& c, uint32 void* GPUTPCDecompression::SetPointersTmpNativeBuffersGPU(void* mem) { - computePointerWithAlignment(mem, mTmpNativeClusters, NSLICES * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer); + computePointerWithAlignment(mem, mTmpNativeClusters, NSECTORS * GPUCA_ROW_COUNT * mMaxNativeClustersPerBuffer); return mem; } void* GPUTPCDecompression::SetPointersTmpNativeBuffersOutput(void* mem) { - computePointerWithAlignment(mem, mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mNativeClustersIndex, NSECTORS * GPUCA_ROW_COUNT); return mem; } void* GPUTPCDecompression::SetPointersTmpNativeBuffersInput(void* mem) { - computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mUnattachedClustersOffsets, NSECTORS * GPUCA_ROW_COUNT); computePointerWithAlignment(mem, mAttachedClustersOffsets, mInputGPU.nTracks); return mem; } @@ -98,7 +98,7 @@ void* GPUTPCDecompression::SetPointersInputClusterNativeAccess(void* mem) void* GPUTPCDecompression::SetPointersNClusterPerSectorRow(void* mem) { - computePointerWithAlignment(mem, mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT); + computePointerWithAlignment(mem, mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT); return mem; } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h index d39eba6a08e2d..e6f8377a246e2 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompression.h @@ -49,7 +49,7 @@ class GPUTPCDecompression : public GPUProcessor #endif protected: - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; o2::tpc::CompressedClusters mInputGPU; uint32_t mMaxNativeClustersPerBuffer; diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx index 2ba80bf4d3b21..ee1a9c97cc30b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx @@ -38,19 +38,19 @@ GPUdii() void GPUTPCDecompressionKernels::Thread -GPUdii() void GPUTPCDecompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sliceStart, int32_t nSlices) +GPUdii() void GPUTPCDecompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sectorStart, int32_t nSectors) { GPUTPCDecompression& GPUrestrict() decompressor = processors.tpcDecompressor; CompressedClusters& GPUrestrict() cmprClusters = decompressor.mInputGPU; ClusterNative* GPUrestrict() clusterBuffer = decompressor.mNativeClustersBuffer; const ClusterNativeAccess* outputAccess = decompressor.mClusterNativeAccess; uint32_t* offsets = decompressor.mUnattachedClustersOffsets; - for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSlices; i += get_global_size(0)) { + for (int32_t i = get_global_id(0); i < GPUCA_ROW_COUNT * nSectors; i += get_global_size(0)) { uint32_t iRow = i % GPUCA_ROW_COUNT; - uint32_t iSlice = sliceStart + (i / GPUCA_ROW_COUNT); - const uint32_t linearIndex = iSlice * GPUCA_ROW_COUNT + iRow; - uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSlice, iRow, decompressor.mMaxNativeClustersPerBuffer); - ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSlice][iRow]; + uint32_t iSector = sectorStart + (i / GPUCA_ROW_COUNT); + const uint32_t linearIndex = iSector * GPUCA_ROW_COUNT + iRow; + uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(iSector, iRow, decompressor.mMaxNativeClustersPerBuffer); + ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[iSector][iRow]; if (decompressor.mNativeClustersIndex[linearIndex] != 0) { decompressorMemcpyBasic(buffer, decompressor.mTmpNativeClusters + tmpBufferIndex, decompressor.mNativeClustersIndex[linearIndex]); } @@ -58,7 +58,7 @@ GPUdii() void GPUTPCDecompressionKernels::Thread= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]); TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout); if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) { - for (uint32_t k = 0; k < outputAccess->nClusters[iSlice][iRow]; k++) { + for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { auto& cl = buffer[k]; float t = cl.getTime() + processors.param.rec.tpc.clustersShiftTimebins; if (t < 0) { @@ -92,11 +92,11 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadnClusters[slice][row]; k++) { - ClusterNative cl = clusterAccess->clusters[slice][row][k]; + for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) { + ClusterNative cl = clusterAccess->clusters[sector][row][k]; if (isClusterKept(cl, param)) { decompressor.mNClusterPerSectorRow[i]++; } @@ -112,14 +112,14 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadnClusters[slice][row]; k++) { - const ClusterNative cl = clusterAccess->clusters[slice][row][k]; + for (uint32_t k = 0; k < clusterAccess->nClusters[sector][row]; k++) { + const ClusterNative cl = clusterAccess->clusters[sector][row][k]; if (isClusterKept(cl, param)) { - clusterBuffer[outputAccess->clusterOffset[slice][row] + count] = cl; + clusterBuffer[outputAccess->clusterOffset[sector][row] + count] = cl; count++; } } @@ -131,10 +131,10 @@ GPUdii() void GPUTPCDecompressionUtilKernels::ThreadclusterOffset[slice][row]; - GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[slice][row]); + ClusterNative* buffer = clusterBuffer + outputAccess->clusterOffset[sector][row]; + GPUCommonAlgorithm::sort(buffer, buffer + outputAccess->nClusters[sector][row]); } } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h index 0bd69653fdbd4..1ea93e4acb9d0 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h @@ -37,9 +37,9 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args); - GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer) + GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t sector, uint32_t row, uint32_t maxClustersPerBuffer) { - return slice * (GPUCA_ROW_COUNT * maxClustersPerBuffer) + row * maxClustersPerBuffer; + return sector * (GPUCA_ROW_COUNT * maxClustersPerBuffer) + row * maxClustersPerBuffer; } template diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc index 43ed260f461a4..6c4f70d7c6884 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc @@ -47,7 +47,7 @@ class TPCClusterDecompressionCore return clusterVector.back(); } - GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT]) + GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector (&clusters)[GPUCA_NSECTORS][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSECTORS][GPUCA_ROW_COUNT]) { std::vector& clusterVector = clusters[slice][row]; auto& lock = locks[slice][row]; @@ -85,14 +85,14 @@ class TPCClusterDecompressionCore uint32_t pad = 0, time = 0; if (clusterIndex != 0) { uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1]; - bool changeLeg = (tmpSlice >= GPUCA_NSLICES); + bool changeLeg = (tmpSlice >= GPUCA_NSECTORS); if (changeLeg) { - tmpSlice -= GPUCA_NSLICES; + tmpSlice -= GPUCA_NSECTORS; } if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) { slice += tmpSlice; - if (slice >= GPUCA_NSLICES) { - slice -= GPUCA_NSLICES; + if (slice >= GPUCA_NSECTORS) { + slice -= GPUCA_NSECTORS; } row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1]; if (row >= GPUCA_ROW_COUNT) { @@ -105,7 +105,7 @@ class TPCClusterDecompressionCore if (changeLeg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) { + if (track.Propagate(param.tpcGeometry.Row2X(row), param.SectorParam[slice].Alpha)) { break; } uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; @@ -140,7 +140,7 @@ class TPCClusterDecompressionCore float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); if (clusterIndex == 0) { zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); + track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); } if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { break; diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index e3b8965c3e27b..296a203cf070b 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -48,9 +48,9 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom if (clustersCompressed->nTracks && clustersCompressed->maxTimeBin != -1e6 && clustersCompressed->maxTimeBin != param.continuousMaxTimeBin) { throw std::runtime_error("Configured max time bin does not match value used for track model encoding"); } - std::vector clusters[NSLICES][GPUCA_ROW_COUNT]; - std::atomic_flag locks[NSLICES][GPUCA_ROW_COUNT]; - for (uint32_t i = 0; i < NSLICES * GPUCA_ROW_COUNT; i++) { + std::vector clusters[NSECTORS][GPUCA_ROW_COUNT]; + std::atomic_flag locks[NSECTORS][GPUCA_ROW_COUNT]; + for (uint32_t i = 0; i < NSECTORS * GPUCA_ROW_COUNT; i++) { (&locks[0][0])[i].clear(); } const uint32_t maxTime = param.continuousMaxTimeBin > 0 ? ((param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1) : TPC_MAX_TIME_BIN_TRIGGERED; @@ -69,10 +69,10 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom }); size_t nTotalClusters = clustersCompressed->nAttachedClusters + clustersCompressed->nUnattachedClusters; ClusterNative* clusterBuffer = allocator(nTotalClusters); - uint32_t offsets[NSLICES][GPUCA_ROW_COUNT]; + uint32_t offsets[NSECTORS][GPUCA_ROW_COUNT]; uint32_t offset = 0; uint32_t decodedAttachedClusters = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { clustersNative.nClusters[i][j] = clusters[i][j].size() + ((i * GPUCA_ROW_COUNT + j >= clustersCompressed->nSliceRows) ? 0 : clustersCompressed->nSliceRowClusters[i * GPUCA_ROW_COUNT + j]); offsets[i][j] = offset; @@ -85,7 +85,7 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom } clustersNative.clustersLinear = clusterBuffer; clustersNative.setOffsetPtrs(); - tbb::parallel_for(0, NSLICES, [&](auto i) { + tbb::parallel_for(0, NSECTORS, [&](auto i) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { ClusterNative* buffer = &clusterBuffer[clustersNative.clusterOffset[i][j]]; if (clusters[i][j].size()) { diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h index 4a40b20e8d4f5..0c54f34c0237a 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.h @@ -32,7 +32,7 @@ struct GPUParam; class TPCClusterDecompressor { public: - static constexpr uint32_t NSLICES = GPUCA_NSLICES; + static constexpr uint32_t NSECTORS = GPUCA_NSECTORS; static int32_t decompress(const o2::tpc::CompressedClustersFlat* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); static int32_t decompress(const o2::tpc::CompressedClusters* clustersCompressed, o2::tpc::ClusterNativeAccess& clustersNative, std::function allocator, const GPUParam& param, bool deterministicRec); }; diff --git a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx index 9cb49bf4c7ef5..b23d19c3c9cd4 100644 --- a/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx +++ b/GPU/GPUTracking/DataCompression/standalone-cluster-dump-entropy-analysed.cxx @@ -23,7 +23,7 @@ #include const int32_t sort_method = 1; // 0 No sorting, 1 sort after pad, 2 sort after time, 3/4 mixed methods favoring pad / time -const int32_t slice_diff = 1; +const int32_t sector_diff = 1; const int32_t row_diff = 1; const int32_t pad_diff = 1; const int32_t time_diff = 1; @@ -40,7 +40,7 @@ const int32_t track_separate_q = track_based && 1; const int32_t track_diffsigma = track_based && 0; const int32_t track_separate_sigma = track_based && 1; const int32_t truncate_bits = 1; -const int32_t separate_slices = 0; +const int32_t separate_sectors = 0; const int32_t separate_patches = 0; const int32_t separate_sides = 0; const int32_t full_row_numbers = 1; @@ -59,7 +59,7 @@ const int32_t sort_pad_mixed_bins = 100; const int32_t sort_time_mixed_bins = 400; #define EVENT 0 -#define SLICE 1 +#define SECTOR 1 #define PATCH 2 #define ROW 3 #define PAD 4 @@ -95,14 +95,14 @@ const int32_t rr = optimized_negative_values && 0 ? 13 : 14; // We can make them const uint32_t field_bits[] = {0, 6, 0, 8, 14, 15, 8, 8, 10, 16, 2, 0, 14, 15, 16, 10, 26, 16, 8, 8, 16, 26, 8, 8, rr, rr, rr, rr, rr, 14}; const uint32_t significant_bits[] = {0, 6, 0, 8, 14, 15, truncate_sigma, truncate_sigma, truncate_charge, truncate_charge, 2, 0, 14, 15, truncate_charge, truncate_charge, 26, 16, truncate_sigma, truncate_sigma, 16, 26, 8, 8, rr, rr, rr, rr, rr, 14}; const int32_t nFields = sizeof(field_bits) / sizeof(field_bits[0]); -const char* field_names[] = {"event", "slice", "patch", "row", "pad", "time", "sigmaPad", "sigmaTime", "qmax", "qtot", "flagPadTime", "trackID", "resTrackPad", +const char* field_names[] = {"event", "sector", "patch", "row", "pad", "time", "sigmaPad", "sigmaTime", "qmax", "qtot", "flagPadTime", "trackID", "resTrackPad", "resTrackTime", "trackQTot", "trackQMax", "qmaxtot", "sigmapadtime", "diffsigmapad", "diffsigmatime", "diffsigmapadtime", "tracktotmax", "trackfirstrow", "trackrow", "pad_80", "pad_92", "pad_104", "pad_116", "pad_128", "pad_140"}; union cluster_struct { struct { - uint32_t event, slice, patch, row, pad, time, sigmaPad, sigmaTime, qmax, qtot, splitPadTime; + uint32_t event, sector, patch, row, pad, time, sigmaPad, sigmaTime, qmax, qtot, splitPadTime; int32_t trackID; uint32_t resPad, resTime, avgtot, avgmax; }; @@ -209,7 +209,7 @@ bool clustercompare_padtime_mixed(cluster_struct a, cluster_struct b) { return ( bool clustercompare_timepad_mixed(cluster_struct a, cluster_struct b) { return (a.time / sort_time_mixed_bins < b.time / sort_time_mixed_bins || (a.time / sort_time_mixed_bins == b.time / sort_time_mixed_bins && a.pad < b.pad)); } -bool clustercompare_inevent(cluster_struct a, cluster_struct b) { return (a.slice < b.slice || (a.slice == b.slice && a.patch < b.patch) || (a.slice == b.slice && a.patch == b.patch && a.row < b.row)); } +bool clustercompare_inevent(cluster_struct a, cluster_struct b) { return (a.sector < b.sector || (a.sector == b.sector && a.patch < b.patch) || (a.sector == b.sector && a.patch == b.patch && a.row < b.row)); } void do_diff(uint32_t& val, int32_t& last, uint32_t bits, uint32_t maxval = 0) { @@ -327,7 +327,7 @@ int32_t main(int argc, char** argv) double* probabilities[nFields]; int64_t counts[nFields]; int32_t used[nFields]; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID) { continue; } @@ -337,18 +337,18 @@ int32_t main(int argc, char** argv) double rawtotalbytes = 0; double entrototalbytes = 0; - for (int32_t islice = 0; islice < 36; islice++) { + for (int32_t isector = 0; isector < 36; isector++) { for (int32_t ipatch = 0; ipatch < 6; ipatch++) { - if (separate_slices) { - printf("SLICE %d ", islice); + if (separate_sectors) { + printf("SECTOR %d ", isector); } if (separate_patches) { printf("PATCH %d", ipatch); } - if (separate_slices || separate_patches) { + if (separate_sectors || separate_patches) { printf("\n"); } - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -359,7 +359,7 @@ int32_t main(int argc, char** argv) size_t nClustersUsed = 0; - int32_t lastRow = 0, lastPad = 0, lastTime = 0, lastSlice = 0, lastResPad = 0, lastResTime = 0, lastQTot = 0, lastQMax = 0, lastSigmaPad = 0, lastSigmaTime = 0, lastTrack = -1, lastEvent = 0; + int32_t lastRow = 0, lastPad = 0, lastTime = 0, lastSector = 0, lastResPad = 0, lastResTime = 0, lastQTot = 0, lastQMax = 0, lastSigmaPad = 0, lastSigmaTime = 0, lastTrack = -1, lastEvent = 0; for (size_t i = 0; i < nClusters; i++) { const cluster_struct& cluster_org = clusters[i]; @@ -368,10 +368,10 @@ int32_t main(int argc, char** argv) printf("%d\n", cluster.pad); } - if ((separate_slices && cluster.slice != islice) || (separate_patches && cluster.patch != ipatch)) { + if ((separate_sectors && cluster.sector != isector) || (separate_patches && cluster.patch != ipatch)) { continue; } - if (separate_sides && !(cluster.slice < 18 ^ islice < 18)) { + if (separate_sides && !(cluster.sector < 18 ^ isector < 18)) { continue; } @@ -379,7 +379,7 @@ int32_t main(int argc, char** argv) uint32_t dSigmaPad, dSigmaTime; if (cluster.event != lastEvent) { - lastRow = lastPad = lastTime = lastSlice = 0; + lastRow = lastPad = lastTime = lastSector = 0; lastTrack = -1; } @@ -387,13 +387,13 @@ int32_t main(int argc, char** argv) cluster.row += fgRows[cluster.patch][0]; } - if ((slice_diff || res_diff || track_diffqtot || track_diffqmax) && cluster.trackID != -1 && track_based) { + if ((sector_diff || res_diff || track_diffqtot || track_diffqmax) && cluster.trackID != -1 && track_based) { if (lastTrack != cluster.trackID) { - lastSlice = lastResPad = lastResTime = lastQTot = lastQMax = lastSigmaPad = lastSigmaTime = 0; + lastSector = lastResPad = lastResTime = lastQTot = lastQMax = lastSigmaPad = lastSigmaTime = 0; } - if (slice_diff) { - do_diff(cluster.slice, lastSlice, field_bits[SLICE]); + if (sector_diff) { + do_diff(cluster.sector, lastSector, field_bits[SECTOR]); } if (res_diff) { @@ -483,17 +483,17 @@ int32_t main(int argc, char** argv) lastTrack = cluster.trackID; if (print_clusters > 0 || (print_clusters < 0 && i < -print_clusters)) { - printf("Event %u Track %d Slice %u Patch %u Row %u Pad %u Time %u sigmaPad %u sigmaTime %u qTot %u qMax %u Flag %u resPad %u resTime %u avgTot %u avgMax %u\n", cluster.event, cluster.trackID, cluster.slice, cluster.patch, cluster.row, cluster.pad, cluster.time, cluster.sigmaPad, + printf("Event %u Track %d Sector %u Patch %u Row %u Pad %u Time %u sigmaPad %u sigmaTime %u qTot %u qMax %u Flag %u resPad %u resTime %u avgTot %u avgMax %u\n", cluster.event, cluster.trackID, cluster.sector, cluster.patch, cluster.row, cluster.pad, cluster.time, cluster.sigmaPad, cluster.sigmaTime, cluster.qtot, cluster.qmax, cluster.splitPadTime, cluster.resPad, cluster.resTime, cluster.avgtot, cluster.avgmax); } - for (int32_t j = SLICE; j < nFields; j++) { + for (int32_t j = SECTOR; j < nFields; j++) { bool forceStore = false; if (j == CLUSTER_ID || j == PATCH) { continue; } - if (j == SLICE && (track_based == 0 || cluster.trackID == -1)) { + if (j == SECTOR && (track_based == 0 || cluster.trackID == -1)) { continue; } @@ -594,7 +594,7 @@ int32_t main(int argc, char** argv) double log2 = log(2.); double entropies[nFields]; double huffmanSizes[nFields]; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -631,7 +631,7 @@ int32_t main(int argc, char** argv) int32_t rawBits = 0; double entroTotal = 0., huffmanTotal = 0.; - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } @@ -663,7 +663,7 @@ int32_t main(int argc, char** argv) used[i] = 1; } } - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (field_bits[i] == 0) { continue; } @@ -672,7 +672,7 @@ int32_t main(int argc, char** argv) } printf("Field %2d/%16s (count %10ld / used %1d) rawBits %2d huffman %9.6f entropy %9.6f\n", i, field_names[i], counts[i], used[i], field_bits[i], huffmanSizes[i], entropies[i]); } - rawBits = 79; // Override incorrect calculation: Row is only 6 bit in raw format, and slice is not needed! + rawBits = 79; // Override incorrect calculation: Row is only 6 bit in raw format, and sector is not needed! printf("Raw Bits: %d - Total Size %f MB Clusters %d\n", rawBits, (double)rawBits * (double)nClustersUsed / 8. / 1.e6, nClustersUsed); printf("Huffman Bits: %f - Total Size %f MB\n", huffmanTotal / (double)nClustersUsed, huffmanTotal / 8. / 1.e6); printf("Entropy Bits: %f - Total Size %f MB\n", entroTotal / (double)nClustersUsed, entroTotal / 8. / 1.e6); @@ -680,10 +680,10 @@ int32_t main(int argc, char** argv) entrototalbytes += entroTotal; rawtotalbytes += (double)rawBits * (double)nClustersUsed; - if (separate_sides && !separate_slices && islice == 0) { - islice = 17; - } else if (!separate_slices) { - islice = 9999999; + if (separate_sides && !separate_sectors && isector == 0) { + isector = 17; + } else if (!separate_sectors) { + isector = 9999999; } if (!separate_patches) { @@ -692,12 +692,12 @@ int32_t main(int argc, char** argv) } } - if (separate_slices || separate_patches || separate_sides) { + if (separate_sectors || separate_patches || separate_sides) { printf("Total Compression: %f\n", rawtotalbytes / entrototalbytes); } printf("Exiting\n"); - for (int32_t i = SLICE; i < nFields; i++) { + for (int32_t i = SECTOR; i < nFields; i++) { if (i == CLUSTER_ID || i == PATCH) { continue; } diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 4c275d6de1bf1..1faee5f28907d 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -138,7 +138,7 @@ class GPUDataTypes QA = 2 }; enum ENUM_CLASS RecoStep { TPCConversion = 1, - TPCSliceTracking = 2, + TPCSectorTracking = 2, TPCMerging = 4, TPCCompression = 8, TRDTracking = 16, @@ -167,7 +167,7 @@ class GPUDataTypes #endif typedef bitfield RecoStepField; typedef bitfield InOutTypeField; - static constexpr uint32_t NSLICES = 36; + static constexpr uint32_t NSECTORS = 36; static DeviceType GetDeviceType(const char* type); }; @@ -205,27 +205,27 @@ typedef GPUCalibObjectsTemplate GPUCalibObjects; // NOTE: These 2 mu typedef GPUCalibObjectsTemplate GPUCalibObjectsConst; struct GPUTrackingInOutZS { - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; static constexpr uint32_t NENDPOINTS = 20; - struct GPUTrackingInOutZSSlice { + struct GPUTrackingInOutZSSector { const void* const* zsPtr[NENDPOINTS]; const uint32_t* nZSPtr[NENDPOINTS]; uint32_t count[NENDPOINTS]; }; struct GPUTrackingInOutZSCounts { - uint32_t count[NSLICES][NENDPOINTS] = {}; + uint32_t count[NSECTORS][NENDPOINTS] = {}; }; struct GPUTrackingInOutZSMeta { - void* ptr[NSLICES][NENDPOINTS]; - uint32_t n[NSLICES][NENDPOINTS]; + void* ptr[NSECTORS][NENDPOINTS]; + uint32_t n[NSECTORS][NENDPOINTS]; }; - GPUTrackingInOutZSSlice slice[NSLICES]; + GPUTrackingInOutZSSector sector[NSECTORS]; }; struct GPUTrackingInOutDigits { - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; - const o2::tpc::Digit* tpcDigits[NSLICES] = {nullptr}; - size_t nTPCDigits[NSLICES] = {0}; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; + const o2::tpc::Digit* tpcDigits[NSECTORS] = {nullptr}; + size_t nTPCDigits[NSECTORS] = {0}; const GPUTPCDigitsMCInput* tpcDigitsMC = nullptr; }; @@ -233,18 +233,18 @@ struct GPUTrackingInOutPointers { GPUTrackingInOutPointers() = default; // TPC - static constexpr uint32_t NSLICES = GPUDataTypes::NSLICES; + static constexpr uint32_t NSECTORS = GPUDataTypes::NSECTORS; const GPUTrackingInOutZS* tpcZS = nullptr; const GPUTrackingInOutDigits* tpcPackedDigits = nullptr; - const GPUTPCClusterData* clusterData[NSLICES] = {nullptr}; - uint32_t nClusterData[NSLICES] = {0}; - const AliHLTTPCRawCluster* rawClusters[NSLICES] = {nullptr}; - uint32_t nRawClusters[NSLICES] = {0}; + const GPUTPCClusterData* clusterData[NSECTORS] = {nullptr}; + uint32_t nClusterData[NSECTORS] = {0}; + const AliHLTTPCRawCluster* rawClusters[NSECTORS] = {nullptr}; + uint32_t nRawClusters[NSECTORS] = {0}; const o2::tpc::ClusterNativeAccess* clustersNative = nullptr; - const GPUTPCTrack* sliceTracks[NSLICES] = {nullptr}; - uint32_t nSliceTracks[NSLICES] = {0}; - const GPUTPCHitId* sliceClusters[NSLICES] = {nullptr}; - uint32_t nSliceClusters[NSLICES] = {0}; + const GPUTPCTrack* sectorTracks[NSECTORS] = {nullptr}; + uint32_t nSectorTracks[NSECTORS] = {0}; + const GPUTPCHitId* sectorClusters[NSECTORS] = {nullptr}; + uint32_t nSectorClusters[NSECTORS] = {0}; const AliHLTTPCClusterMCLabel* mcLabelsTPC = nullptr; uint32_t nMCLabelsTPC = 0; const GPUTPCMCInfo* mcInfosTPC = nullptr; diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index 2cec1775dd239..e5012d86742f8 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -45,8 +45,8 @@ struct GPUMemorySizeScalers { double tpcSectorTracksPerHit = 0.02; double tpcSectorTrackHitsPerHit = 0.8; double tpcSectorTrackHitsPerHitWithRejection = 1.0; - double tpcMergedTrackPerSliceTrack = 0.9; - double tpcMergedTrackHitPerSliceHit = 1.1; + double tpcMergedTrackPerSectorTrack = 0.9; + double tpcMergedTrackHitPerSectorHit = 1.1; size_t tpcCompressedUnattachedHitsBase1024[3] = {900, 900, 500}; // No ratio, but integer fraction of 1024 for exact computation // Upper limits @@ -71,7 +71,7 @@ struct GPUMemorySizeScalers { return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * factor * temporaryFactor); } - inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSLICES * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } + inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } inline size_t NTPCClusters(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxSectorClusters : tpcMaxClusters, (conservative ? 1.0 : tpcClustersPerPeak) * NTPCPeaks(tpcDigits, perSector)); } inline size_t NTPCStartHits(size_t tpcHits) { return getValue(tpcMaxStartHits, tpcHits * tpcStartHitsPerHit); } inline size_t NTPCRowStartHits(size_t tpcHits) { return getValue(tpcMaxRowStartHits, std::max(NTPCStartHits(tpcHits) * (tpcHits < 30000000 ? 20 : 12) / GPUCA_ROW_COUNT, tpcMinRowStartHits)); } @@ -79,8 +79,8 @@ struct GPUMemorySizeScalers { inline size_t NTPCTrackletHits(size_t tpcHits) { return getValue(tpcMaxTrackletHits, hitOffset + tpcHits * tpcTrackletHitsPerHit); } inline size_t NTPCSectorTracks(size_t tpcHits) { return getValue(tpcMaxSectorTracks, tpcHits * tpcSectorTracksPerHit); } inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); } - inline size_t NTPCMergedTracks(size_t tpcSliceTracks) { return getValue(tpcMaxMergedTracks, tpcSliceTracks * (conservative ? 1.0 : tpcMergedTrackPerSliceTrack)); } - inline size_t NTPCMergedTrackHits(size_t tpcSliceTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSliceTrackHitss * tpcMergedTrackHitPerSliceHit); } + inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); } + inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); } inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } }; diff --git a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h index 746fb1cf7d19f..a1dd54bbba02b 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCClusterOccupancyMap.h @@ -22,7 +22,7 @@ namespace o2::gpu { struct GPUParam; struct GPUTPCClusterOccupancyMapBin { - uint16_t bin[GPUCA_NSLICES][GPUCA_ROW_COUNT]; + uint16_t bin[GPUCA_NSECTORS][GPUCA_ROW_COUNT]; GPUd() static uint32_t getNBins(const GPUParam& param); GPUd() static uint32_t getTotalSize(const GPUParam& param); diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index fb1a12da994da..029d0bdea1397 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -23,7 +23,7 @@ namespace gpu { struct GPUTPCGMMergedTrackHit { uint32_t num; - uint8_t slice, row, leg, state; + uint8_t sector, row, leg, state; // NOTE: the lower states must match those from ClusterNative! // TODO: take them directly from clusterNative header. diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index da9a66fa57301..cb022bf891f4b 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -17,9 +17,9 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_NSLICES) && !defined(GPUCA_ROW_COUNT) +#if !defined(GPUCA_NSECTORS) && !defined(GPUCA_ROW_COUNT) #include "DataFormatsTPC/Constants.h" -#define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR +#define GPUCA_NSECTORS o2::tpc::constants::MAXSECTOR #define GPUCA_ROW_COUNT o2::tpc::constants::MAXGLOBALPADROW #ifndef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_TPC_GEOMETRY_O2 @@ -112,25 +112,25 @@ class GPUTPCGeometry // TODO: Make values constexpr GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } - GPUd() float LinearPad2Y(int32_t slice, int32_t row, float pad) const + GPUd() float LinearPad2Y(int32_t sector, int32_t row, float pad) const { #ifdef GPUCA_TPC_GEOMETRY_O2 const float u = (pad - 0.5f * (mNPads[row] - 1)) * PadWidth(row); #else const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); #endif - return (slice >= GPUCA_NSLICES / 2) ? -u : u; + return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() static float LinearTime2Z(int32_t slice, float time) + GPUd() static float LinearTime2Z(int32_t sector, float time) { const float v = 250.f - time * FACTOR_T2Z; // Used in compression, must remain constant at 250cm! - return (slice >= GPUCA_NSLICES / 2) ? -v : v; + return (sector >= GPUCA_NSECTORS / 2) ? -v : v; } - GPUd() float LinearY2Pad(int32_t slice, int32_t row, float y) const + GPUd() float LinearY2Pad(int32_t sector, int32_t row, float y) const { - const float u = (slice >= GPUCA_NSLICES / 2) ? -y : y; + const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; #ifdef GPUCA_TPC_GEOMETRY_O2 return u / PadWidth(row) + 0.5f * (mNPads[row] - 1); #else @@ -138,9 +138,9 @@ class GPUTPCGeometry // TODO: Make values constexpr #endif } - GPUd() static float LinearZ2Time(int32_t slice, float z) + GPUd() static float LinearZ2Time(int32_t sector, float z) { - const float v = (slice >= GPUCA_NSLICES / 2) ? -z : z; + const float v = (sector >= GPUCA_NSECTORS / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } }; diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h index 5a4beda82889e..6ddfac8c268ee 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.h +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.h @@ -102,7 +102,7 @@ struct TPCPadBitMap { }; uint16_t mPadOffsetPerRow[GPUCA_ROW_COUNT]; - SectorBitMap mBitMap[GPUCA_NSLICES]; + SectorBitMap mBitMap[GPUCA_NSECTORS]; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h index 35aa353a384e6..263956c8b5602 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.h @@ -81,14 +81,14 @@ struct TPCPadGainCalib { GPUdi() void setMinCorrectionFactor(const float minCorrectionFactor) { - for (int32_t sector = 0; sector < GPUCA_NSLICES; sector++) { + for (int32_t sector = 0; sector < GPUCA_NSECTORS; sector++) { mGainCorrection[sector].mMinCorrectionFactor = minCorrectionFactor; } } GPUdi() void setMaxCorrectionFactor(const float maxCorrectionFactor) { - for (int32_t sector = 0; sector < GPUCA_NSLICES; sector++) { + for (int32_t sector = 0; sector < GPUCA_NSECTORS; sector++) { mGainCorrection[sector].mMaxCorrectionFactor = maxCorrectionFactor; } } @@ -154,7 +154,7 @@ struct TPCPadGainCalib { }; uint16_t mPadOffsetPerRow[GPUCA_ROW_COUNT]; - SectorPadGainCorrection mGainCorrection[GPUCA_NSLICES]; + SectorPadGainCorrection mGainCorrection[GPUCA_NSECTORS]; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 75b0169a660a5..f18390629f2bc 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -48,17 +48,17 @@ #define TPC_MAX_TIME_BIN_TRIGGERED 600 -#if defined(GPUCA_NSLICES) || defined(GPUCA_ROW_COUNT) - #error GPUCA_NSLICES or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! +#if defined(GPUCA_NSECTORS) || defined(GPUCA_ROW_COUNT) + #error GPUCA_NSECTORS or GPUCA_ROW_COUNT already defined, do not include GPUTPCGeometry.h before! #endif #if defined(GPUCA_TPC_GEOMETRY_O2) && !(defined(ROOT_VERSION_CODE) && ROOT_VERSION_CODE < 393216) //Use definitions from the O2 headers if available for nicer code and type safety #include "DataFormatsTPC/Constants.h" - #define GPUCA_NSLICES o2::tpc::constants::MAXSECTOR + #define GPUCA_NSECTORS o2::tpc::constants::MAXSECTOR #define GPUCA_ROW_COUNT o2::tpc::constants::MAXGLOBALPADROW #else //Define it manually, if O2 headers not available, ROOT5, and OpenCL 1.2, which do not know C++11. - #define GPUCA_NSLICES 36 + #define GPUCA_NSECTORS 36 #ifdef GPUCA_TPC_GEOMETRY_O2 #define GPUCA_ROW_COUNT 152 #else diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index cebc74fcc4a5b..c40659ecd2632 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -32,7 +32,7 @@ #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT 256 - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 @@ -46,7 +46,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 @@ -56,7 +56,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 @@ -89,7 +89,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -98,7 +98,7 @@ #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT 256 - #define GPUCA_LB_GPUTPCCreateSliceData 128 + #define GPUCA_LB_GPUTPCCreateTrackingData 128 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 @@ -112,7 +112,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -122,7 +122,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -155,7 +155,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -164,7 +164,7 @@ #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT 512 - #define GPUCA_LB_GPUTPCCreateSliceData 384 + #define GPUCA_LB_GPUTPCCreateTrackingData 384 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 @@ -178,7 +178,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 32, 6 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -188,7 +188,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -221,7 +221,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -230,7 +230,7 @@ #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT 512 - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 @@ -244,7 +244,7 @@ #define GPUCA_LB_GPUTPCCFGather 1024, 1 #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 64, 5 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 @@ -254,7 +254,7 @@ #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 @@ -279,7 +279,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_COMP_GATHER_KERNEL 4 #define GPUCA_COMP_GATHER_MODE 3 @@ -297,8 +297,8 @@ #ifndef GPUCA_THREAD_COUNT #define GPUCA_THREAD_COUNT 256 #endif - #ifndef GPUCA_LB_GPUTPCCreateSliceData - #define GPUCA_LB_GPUTPCCreateSliceData 256 + #ifndef GPUCA_LB_GPUTPCCreateTrackingData + #define GPUCA_LB_GPUTPCCreateTrackingData 256 #endif #ifndef GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCTrackletConstructor 256 @@ -372,8 +372,8 @@ #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerSliceRefit - #define GPUCA_LB_GPUTPCGMMergerSliceRefit 256 + #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 @@ -402,8 +402,8 @@ #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 #endif - #ifndef GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare - #define GPUCA_LB_GPUTPCGMMergerMergeSlicesPrepare 256 + #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 #endif #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 @@ -512,8 +512,8 @@ #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCTrackletConstructor_singleSlice GPUCA_LB_GPUTPCTrackletConstructor -#define GPUCA_LB_GPUTPCTrackletConstructor_allSlices GPUCA_LB_GPUTPCTrackletConstructor +#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor +#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER @@ -549,8 +549,8 @@ #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif - #ifndef GPUCA_TRACKLET_SELECTOR_SLICE_COUNT - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results + #ifndef GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results #endif #ifndef GPUCA_COMP_GATHER_KERNEL #define GPUCA_COMP_GATHER_KERNEL 0 @@ -566,7 +566,7 @@ #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_TRACKLET_SELECTOR_SLICE_COUNT 1 + #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_THREAD_COUNT_FINDER 1 #define GPUCA_COMP_GATHER_KERNEL 0 #define GPUCA_COMP_GATHER_MODE 0 @@ -612,7 +612,7 @@ #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#define GPUCA_MAX_SLICE_NTRACK (2 << 24) // Maximum number of tracks per slice (limited by track id format) +#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format) // #define GPUCA_KERNEL_DEBUGGER_OUTPUT diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 905622de26ba9..d0447f189e40e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -90,8 +90,8 @@ AddOptionRTC(extraClusterErrorFactorSplitPadSharedSingleY2, float, 3.0f, "", 0, AddOptionRTC(extraClusterErrorSplitTimeSharedSingleZ2, float, 0.03f, "", 0, "Additive extra cluster error for Z2 if splittime, shared, or single set") AddOptionRTC(extraClusterErrorFactorSplitTimeSharedSingleZ2, float, 3.0f, "", 0, "Multiplicative extra cluster error for Z2 if splittime, shared, or single set") AddOptionArray(errorsCECrossing, float, 5, (0.f, 0.f, 0.f, 0.f, 0.f), "", 0, "Extra errors to add to track when crossing CE, depending on addErrorsCECrossing") // BUG: CUDA cannot yet handle AddOptionArrayRTC -AddOptionRTC(extrapolationTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") -AddOptionRTC(extrapolationTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in slice that is not used in searching for extrapolated track candidates") +AddOptionRTC(extrapolationTrackingYRangeUpper, float, 0.85f, "", 0, "Inner portion of y-range in sector that is not used in searching for extrapolated track candidates") +AddOptionRTC(extrapolationTrackingYRangeLower, float, 0.85f, "", 0, "Inner portion of y-range in sector that is not used in searching for extrapolated track candidates") AddOptionRTC(trackFollowingYFactor, float, 4.f, "", 0, "Weight of y residual vs z residual in tracklet constructor") AddOptionRTC(trackMergerFactor2YS, float, 1.5f * 1.5f, "", 0, "factor2YS for track merging") AddOptionRTC(trackMergerFactor2ZT, float, 1.5f * 1.5f, "", 0, "factor2ZT for track merging") @@ -112,7 +112,7 @@ AddOptionRTC(trackMergerMinPartHits, uint8_t, 10, "", 0, "Minimum hits of track AddOptionRTC(trackMergerMinTotalHits, uint8_t, 20, "", 0, "Minimum total of track part during track merging") AddOptionRTC(mergerCERowLimit, uint8_t, 5, "", 0, "Distance from first / last row in order to attempt merging accross CE") AddOptionRTC(mergerLooperQPtB5Limit, uint8_t, 4, "", 0, "Min Q/Pt (@B=0.5T) to run special looper merging procedure") -AddOptionRTC(mergerLooperSecondHorizontalQPtB5Limit, uint8_t, 2, "", 0, "Min Q/Pt (@B=0.5T) to attempt second horizontal merge between slices after a vertical merge was found") +AddOptionRTC(mergerLooperSecondHorizontalQPtB5Limit, uint8_t, 2, "", 0, "Min Q/Pt (@B=0.5T) to attempt second horizontal merge between sectors after a vertical merge was found") AddOptionRTC(trackFollowingMaxRowGap, uint8_t, 4, "", 0, "Maximum number of consecutive rows without hit in track following") AddOptionRTC(trackFollowingMaxRowGapSeed, uint8_t, 2, "", 0, "Maximum number of consecutive rows without hit in track following during fit of seed") AddOptionRTC(trackFitMaxRowMissedHard, uint8_t, 10, "", 0, "Hard limit for number of missed rows in fit / propagation") @@ -257,7 +257,7 @@ AddOption(autoAdjustHostThreads, bool, true, "", 0, "Auto-adjust number of OMP t AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") -AddOption(trackletSelectorSlices, int8_t, -1, "", 0, "Number of slices to processes in parallel at max") +AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") @@ -320,8 +320,8 @@ AddOption(drawTracks, bool, false, "", 0, "Highlight sector tracks") AddOption(drawExtrapolatedTracks, bool, false, "", 0, "Highlight global sector tracks prolonged into adjacent sector") AddOption(drawFinal, bool, false, "", 0, "Highlight final tracks") AddOption(excludeClusters, int32_t, 0, "", 0, "Exclude clusters from selected draw objects from display, (2 = exclude clusters but still show tracks)") -AddOption(drawSlice, int32_t, -1, "", 0, "Show individual slice") -AddOption(drawRelatedSlices, int32_t, 0, "", 0, "Show related slices (if drawSlice != -1)") +AddOption(drawSector, int32_t, -1, "", 0, "Show individual sector") +AddOption(drawRelatedSectors, int32_t, 0, "", 0, "Show related sectors (if drawSector != -1)") AddOption(drawGrid, int32_t, 0, "", 0, "Highlight grid") AddOption(propagateTracks, int32_t, 0, "", 0, "Propagate final tracks further (inward / outward / show MC tracks)") AddOption(showCollision, int32_t, -1, "", 0, "Show only individual collision") diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_O2.h b/GPU/GPUTracking/GPUTrackingLinkDef_O2.h index d5a81797bf7fc..8e99514a817c5 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_O2.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_O2.h @@ -19,12 +19,12 @@ #pragma link off all functions; #pragma link C++ class o2::gpu::GPUTPCGMMergedTrack + ; -#pragma link C++ class o2::gpu::GPUTPCGMSliceTrack + ; +#pragma link C++ class o2::gpu::GPUTPCGMSectorTrack + ; #pragma link C++ class o2::gpu::GPUTPCGMBorderTrack + ; #pragma link C++ class o2::gpu::GPUTPCGMTrackParam + ; #pragma link C++ class o2::gpu::GPUTPCTrack + ; #pragma link C++ struct o2::gpu::GPUTPCBaseTrackParam + ; -#pragma link C++ struct o2::gpu::GPUTPCGMSliceTrack::sliceTrackParam + ; +#pragma link C++ struct o2::gpu::GPUTPCGMSectorTrack::sectorTrackParam + ; #pragma link C++ class o2::gpu::trackInterface < o2::gpu::GPUTPCGMTrackParam> + ; #pragma link C++ class o2::gpu::GPUTRDTrack_t < o2::gpu::trackInterface < o2::gpu::GPUTPCGMTrackParam>> + ; #pragma link C++ class o2::gpu::gputpcgmmergertypes::GPUTPCOuterParam + ; diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 4b2778735ce3d..d899523217dbc 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -51,7 +51,7 @@ class GPUChain virtual bool SupportsDoublePipeline() { return false; } virtual int32_t FinalizePipelinedProcessing() { return 0; } - constexpr static int32_t NSLICES = GPUReconstruction::NSLICES; + constexpr static int32_t NSECTORS = GPUReconstruction::NSECTORS; virtual void DumpSettings(const char* dir = "") {} virtual void ReadSettings(const char* dir = "") {} diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 66f37e1122832..d80cf5c09f355 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -20,8 +20,8 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCTrack.h" @@ -78,9 +78,9 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() } mRec->RegisterGPUProcessor(mInputsHost.get(), mRec->IsGPU()); - if (GetRecoSteps() & RecoStep::TPCSliceTracking) { - for (uint32_t i = 0; i < NSLICES; i++) { - mRec->RegisterGPUProcessor(&processors()->tpcTrackers[i], GetRecoStepsGPU() & RecoStep::TPCSliceTracking); + if (GetRecoSteps() & RecoStep::TPCSectorTracking) { + for (uint32_t i = 0; i < NSECTORS; i++) { + mRec->RegisterGPUProcessor(&processors()->tpcTrackers[i], GetRecoStepsGPU() & RecoStep::TPCSectorTracking); } } if (GetRecoSteps() & RecoStep::TPCMerging) { @@ -102,7 +102,7 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() mRec->RegisterGPUProcessor(&processors()->tpcDecompressor, GetRecoStepsGPU() & RecoStep::TPCDecompression); } if (GetRecoSteps() & RecoStep::TPCClusterFinding) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUProcessor(&processors()->tpcClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); } } @@ -121,8 +121,8 @@ void GPUChainTracking::RegisterGPUProcessors() mRec->RegisterGPUDeviceProcessor(mInputsShadow.get(), mInputsHost.get()); } memcpy((void*)&processorsShadow()->trdTrackerGPU, (const void*)&processors()->trdTrackerGPU, sizeof(processors()->trdTrackerGPU)); - if (GetRecoStepsGPU() & RecoStep::TPCSliceTracking) { - for (uint32_t i = 0; i < NSLICES; i++) { + if (GetRecoStepsGPU() & RecoStep::TPCSectorTracking) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcTrackers[i], &processors()->tpcTrackers[i]); } } @@ -147,7 +147,7 @@ void GPUChainTracking::RegisterGPUProcessors() mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcDecompressor, &processors()->tpcDecompressor); } if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcClusterer[i], &processors()->tpcClusterer[i]); } } @@ -176,7 +176,7 @@ bool GPUChainTracking::ValidateSteps() return false; } if (!param().par.earlyTpcTransform) { - if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { + if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { GPUError("Invalid Reconstruction Step Setting: Tracking without early transform requires TPC Conversion to be active"); return false; } @@ -200,11 +200,11 @@ bool GPUChainTracking::ValidateSteps() return false; } #endif - if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCCompression) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCdEdx)) && !tpcClustersAvail) { + if (((GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCCompression) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCdEdx)) && !tpcClustersAvail) { GPUError("Missing input for TPC Cluster conversion / sector tracking / compression / dEdx: TPC Clusters required"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking))) { GPUError("Input for TPC merger missing"); return false; } @@ -220,8 +220,8 @@ bool GPUChainTracking::ValidateSteps() GPUError("TPC Raw / TPC Clusters / TRD Tracklets cannot be output"); return false; } - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - GPUError("No TPC Slice Tracker Output available"); + if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + GPUError("No TPC Sector Tracker Output available"); return false; } if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCMergedTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) { @@ -550,11 +550,11 @@ void GPUChainTracking::ClearIOPointers() void GPUChainTracking::AllocateIOMemory() { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { AllocateIOMemoryHelper(mIOPtrs.nClusterData[i], mIOPtrs.clusterData[i], mIOMem.clusterData[i]); AllocateIOMemoryHelper(mIOPtrs.nRawClusters[i], mIOPtrs.rawClusters[i], mIOMem.rawClusters[i]); - AllocateIOMemoryHelper(mIOPtrs.nSliceTracks[i], mIOPtrs.sliceTracks[i], mIOMem.sliceTracks[i]); - AllocateIOMemoryHelper(mIOPtrs.nSliceClusters[i], mIOPtrs.sliceClusters[i], mIOMem.sliceClusters[i]); + AllocateIOMemoryHelper(mIOPtrs.nSectorTracks[i], mIOPtrs.sectorTracks[i], mIOMem.sectorTracks[i]); + AllocateIOMemoryHelper(mIOPtrs.nSectorClusters[i], mIOPtrs.sectorClusters[i], mIOMem.sectorClusters[i]); } mIOMem.clusterNativeAccess.reset(new ClusterNativeAccess); std::memset(mIOMem.clusterNativeAccess.get(), 0, sizeof(ClusterNativeAccess)); // ClusterNativeAccess has no its own constructor @@ -725,18 +725,18 @@ int32_t GPUChainTracking::RunChain() return 1; } - mRec->PushNonPersistentMemory(qStr2Tag("TPCSLCD1")); // 1st stack level for TPC tracking slice data - mTPCSliceScratchOnStack = true; - if (runRecoStep(RecoStep::TPCSliceTracking, &GPUChainTracking::RunTPCTrackingSlices)) { + mRec->PushNonPersistentMemory(qStr2Tag("TPCSLCD1")); // 1st stack level for TPC tracking sector data + mTPCSectorScratchOnStack = true; + if (runRecoStep(RecoStep::TPCSectorTracking, &GPUChainTracking::RunTPCTrackingSectors)) { return 1; } if (runRecoStep(RecoStep::TPCMerging, &GPUChainTracking::RunTPCTrackingMerger, false)) { return 1; } - if (mTPCSliceScratchOnStack) { - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLCD1")); // Release 1st stack level, TPC slice data not needed after merger - mTPCSliceScratchOnStack = false; + if (mTPCSectorScratchOnStack) { + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Release 1st stack level, TPC sector data not needed after merger + mTPCSectorScratchOnStack = false; } if (mIOPtrs.clustersNative) { diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 6e5e0b3048140..e998baf8e45e8 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -101,14 +101,14 @@ class GPUChainTracking : public GPUChain std::unique_ptr tpcCompressedClusters; // TODO: Fix alignment std::unique_ptr tpcZSmeta; std::unique_ptr tpcZSmeta2; - std::unique_ptr tpcDigits[NSLICES]; + std::unique_ptr tpcDigits[NSECTORS]; std::unique_ptr digitMap; - std::unique_ptr clusterData[NSLICES]; - std::unique_ptr rawClusters[NSLICES]; + std::unique_ptr clusterData[NSECTORS]; + std::unique_ptr rawClusters[NSECTORS]; std::unique_ptr clustersNative; std::unique_ptr clusterNativeAccess; - std::unique_ptr sliceTracks[NSLICES]; - std::unique_ptr sliceClusters[NSLICES]; + std::unique_ptr sectorTracks[NSECTORS]; + std::unique_ptr sectorClusters[NSECTORS]; std::unique_ptr mcLabelsTPC; std::unique_ptr mcInfosTPC; std::unique_ptr mcInfosTPCCol; @@ -123,7 +123,7 @@ class GPUChainTracking : public GPUChain std::unique_ptr trdTracks; std::unique_ptr clusterNativeMC; std::unique_ptr> clusterNativeMCView; - std::unique_ptr tpcDigitsMC[NSLICES]; + std::unique_ptr tpcDigitsMC[NSECTORS]; std::unique_ptr[]> tpcDigitMCView; std::unique_ptr tpcDigitMCMap; std::unique_ptr> clusterNativeMCBuffer; @@ -149,8 +149,8 @@ class GPUChainTracking : public GPUChain // Getters for external usage of tracker classes GPUTRDTrackerGPU* GetTRDTrackerGPU() { return &processors()->trdTrackerGPU; } - GPUTPCTracker* GetTPCSliceTrackers() { return processors()->tpcTrackers; } - const GPUTPCTracker* GetTPCSliceTrackers() const { return processors()->tpcTrackers; } + GPUTPCTracker* GetTPCSectorTrackers() { return processors()->tpcTrackers; } + const GPUTPCTracker* GetTPCSectorTrackers() const { return processors()->tpcTrackers; } const GPUTPCGMMerger& GetTPCMerger() const { return processors()->tpcMerger; } GPUTPCGMMerger& GetTPCMerger() { return processors()->tpcMerger; } GPUDisplayInterface* GetEventDisplay() { return mEventDisplay.get(); } @@ -164,7 +164,7 @@ class GPUChainTracking : public GPUChain // Processing functions int32_t RunTPCClusterizer(bool synchronizeOutput = true); int32_t ForwardTPCDigits(); - int32_t RunTPCTrackingSlices(); + int32_t RunTPCTrackingSectors(); int32_t RunTPCTrackingMerger(bool synchronizeOutput = true); template int32_t RunTRDTracking(); @@ -216,7 +216,7 @@ class GPUChainTracking : public GPUChain struct eventStruct // Must consist only of void* ptr that will hold the GPU event ptrs! { - deviceEvent slice[NSLICES]; + deviceEvent sector[NSECTORS]; deviceEvent stream[GPUCA_MAX_STREAMS]; deviceEvent init; deviceEvent single; @@ -231,9 +231,9 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ReadEvent(uint32_t iSlice, int32_t threadId); - void WriteOutput(int32_t iSlice, int32_t threadId); - int32_t ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput = true); + int32_t ReadEvent(uint32_t iSector, int32_t threadId); + void WriteOutput(int32_t iSector, int32_t threadId); + int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); int32_t PrepareProfile(); int32_t DoProfile(); @@ -277,7 +277,7 @@ class GPUChainTracking : public GPUChain // (Ptrs to) configuration objects std::unique_ptr mCFContext; - bool mTPCSliceScratchOnStack = false; + bool mTPCSectorScratchOnStack = false; std::unique_ptr mNewCalibObjects; bool mUpdateNewCalibObjects = false; std::unique_ptr mNewCalibValues; @@ -291,24 +291,24 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; - volatile int32_t mSliceSelectorReady = 0; - std::array mWriteOutputDone; + volatile int32_t mSectorSelectorReady = 0; + std::array mWriteOutputDone; std::vector mOutputQueue; private: int32_t RunChainFinalize(); void SanityCheck(); - int32_t RunTPCTrackingSlices_internal(); + int32_t RunTPCTrackingSectors_internal(); int32_t RunTPCClusterizer_prepare(bool restorePointers); #ifdef GPUCA_TPC_GEOMETRY_O2 - std::pair RunTPCClusterizer_transferZS(int32_t iSlice, const CfFragment& fragment, int32_t lane); + std::pair RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane); void RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clusterer, GPUTPCClusterFinder& clustererShadow, int32_t stage, bool doGPU, int32_t lane); - std::pair TPCClusterizerDecodeZSCount(uint32_t iSlice, const CfFragment& fragment); - std::pair TPCClusterizerDecodeZSCountUpdate(uint32_t iSlice, const CfFragment& fragment); - void TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFragment& fragment); + std::pair TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment); + std::pair TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment); + void TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment); #endif - void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType); + void RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType); void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 364fa4918257c..282a04c99c056 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -45,23 +45,23 @@ using namespace o2::tpc::constants; using namespace o2::dataformats; #ifdef GPUCA_TPC_GEOMETRY_O2 -std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSlice, const CfFragment& fragment) +std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdate(uint32_t iSector, const CfFragment& fragment) { bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder::ZSOffset* o = processors()->tpcClusterer[iSlice].mPzsOffsets; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder::ZSOffset* o = processors()->tpcClusterer[iSector].mPzsOffsets; uint32_t digits = 0; uint32_t pages = 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - clusterer.mMinMaxCN[j] = mCFContext->fragmentData[fragment.index].minMaxCN[iSlice][j]; + clusterer.mMinMaxCN[j] = mCFContext->fragmentData[fragment.index].minMaxCN[iSector][j]; if (doGPU) { uint16_t posInEndpoint = 0; uint16_t pagesEndpoint = 0; for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) { const uint32_t pageFirst = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0; - const uint32_t pageLast = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; + const uint32_t pageLast = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; for (uint32_t l = pageFirst; l < pageLast; l++) { - uint16_t pageDigits = mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j][posInEndpoint++]; + uint16_t pageDigits = mCFContext->fragmentData[fragment.index].pageDigits[iSector][j][posInEndpoint++]; if (pageDigits) { *(o++) = GPUTPCClusterFinder::ZSOffset{digits, j, pagesEndpoint}; digits += pageDigits; @@ -69,44 +69,44 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCountUpdat pagesEndpoint++; } } - if (pagesEndpoint != mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()) { + if (pagesEndpoint != mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()) { if (GetProcessingSettings().ignoreNonFatalGPUErrors) { - GPUError("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()); + GPUError("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()); return {0, 0}; } else { - GPUFatal("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSlice][j].size()); + GPUFatal("TPC raw page count mismatch in TPCClusterizerDecodeZSCountUpdate: expected %d / buffered %lu", pagesEndpoint, mCFContext->fragmentData[fragment.index].pageDigits[iSector][j].size()); } } } else { clusterer.mPzsOffsets[j] = GPUTPCClusterFinder::ZSOffset{digits, j, 0}; - digits += mCFContext->fragmentData[fragment.index].nDigits[iSlice][j]; - pages += mCFContext->fragmentData[fragment.index].nPages[iSlice][j]; + digits += mCFContext->fragmentData[fragment.index].nDigits[iSector][j]; + pages += mCFContext->fragmentData[fragment.index].nPages[iSector][j]; } } if (doGPU) { - pages = o - processors()->tpcClusterer[iSlice].mPzsOffsets; + pages = o - processors()->tpcClusterer[iSector].mPzsOffsets; } if (!doGPU && GetProcessingSettings().debugLevel >= 4 && mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { - TPCClusterizerEnsureZSOffsets(iSlice, fragment); + TPCClusterizerEnsureZSOffsets(iSector, fragment); } return {digits, pages}; } -void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFragment& fragment) +void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfFragment& fragment) { - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; uint32_t nAdcs = 0; for (uint16_t endpoint = 0; endpoint < GPUTrackingInOutZS::NENDPOINTS; endpoint++) { const auto& data = mCFContext->fragmentData[fragment.index]; uint32_t pagesEndpoint = 0; - const uint32_t nAdcsExpected = data.nDigits[iSlice][endpoint]; - const uint32_t nPagesExpected = data.nPages[iSlice][endpoint]; + const uint32_t nAdcsExpected = data.nDigits[iSector][endpoint]; + const uint32_t nPagesExpected = data.nPages[iSector][endpoint]; uint32_t nAdcDecoded = 0; - const auto& zs = mIOPtrs.tpcZS->slice[iSlice]; - for (uint32_t i = data.minMaxCN[iSlice][endpoint].zsPtrFirst; i < data.minMaxCN[iSlice][endpoint].zsPtrLast; i++) { - const uint32_t pageFirst = (i == data.minMaxCN[iSlice][endpoint].zsPtrFirst) ? data.minMaxCN[iSlice][endpoint].zsPageFirst : 0; - const uint32_t pageLast = (i + 1 == data.minMaxCN[iSlice][endpoint].zsPtrLast) ? data.minMaxCN[iSlice][endpoint].zsPageLast : zs.nZSPtr[endpoint][i]; + const auto& zs = mIOPtrs.tpcZS->sector[iSector]; + for (uint32_t i = data.minMaxCN[iSector][endpoint].zsPtrFirst; i < data.minMaxCN[iSector][endpoint].zsPtrLast; i++) { + const uint32_t pageFirst = (i == data.minMaxCN[iSector][endpoint].zsPtrFirst) ? data.minMaxCN[iSector][endpoint].zsPageFirst : 0; + const uint32_t pageLast = (i + 1 == data.minMaxCN[iSector][endpoint].zsPtrLast) ? data.minMaxCN[iSector][endpoint].zsPageLast : zs.nZSPtr[endpoint][i]; for (uint32_t j = pageFirst; j < pageLast; j++) { const uint8_t* page = static_cast(zs.zsPtr[endpoint][i]) + j * TPCZSHDR::TPC_ZS_PAGE_SIZE; const header::RAWDataHeader* rawDataHeader = reinterpret_cast(page); @@ -119,15 +119,15 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSlice, const CfFr } if (pagesEndpoint != nPagesExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSlice, endpoint, fragment.index, pagesEndpoint, nPagesExpected); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); } if (nAdcDecoded != nAdcsExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSlice, endpoint, fragment.index, nAdcsExpected, nAdcDecoded); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC count mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcsExpected, nAdcDecoded); } if (nAdcs != clusterer.mPzsOffsets[endpoint].offset) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC offset mismatch: expected %u, buffered %u", iSlice, endpoint, fragment.index, nAdcs, clusterer.mPzsOffsets[endpoint].offset); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC ADC offset mismatch: expected %u, buffered %u", iSector, endpoint, fragment.index, nAdcs, clusterer.mPzsOffsets[endpoint].offset); } nAdcs += nAdcsExpected; @@ -141,7 +141,7 @@ struct TPCCFDecodeScanTmp { }; } // namespace -std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSlice, const CfFragment& fragment) +std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint32_t iSector, const CfFragment& fragment) { mRec->getGeneralStepTimer(GeneralStep::Prepare).Start(); uint32_t nDigits = 0; @@ -149,15 +149,15 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC if (GetProcessingSettings().prefetchTPCpageScan >= 3 && j < GPUTrackingInOutZS::NENDPOINTS - 1) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j + 1]; k++) { - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j + 1][k]; l++) { - Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j + 1]; k++) { + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j + 1][k]; l++) { + Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchMid(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j + 1][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } } } @@ -174,19 +174,19 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t firstPossibleFragment = 0; uint32_t pageCounter = 0; uint32_t emptyPages = 0; - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j]; k++) { - if (GetProcessingSettings().tpcSingleSector != -1 && GetProcessingSettings().tpcSingleSector != (int32_t)iSlice) { + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) { + if (GetProcessingSettings().tpcSingleSector != -1 && GetProcessingSettings().tpcSingleSector != (int32_t)iSector) { break; } - nPages += mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; l++) { + nPages += mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) { #ifndef GPUCA_NO_VC - if (GetProcessingSettings().prefetchTPCpageScan >= 2 && l + 1 < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]) { - Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + if (GetProcessingSettings().prefetchTPCpageScan >= 2 && l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]) { + Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchForOneRead(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + (l + 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } #endif - const uint8_t* const page = ((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE; + const uint8_t* const page = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE; const o2::header::RAWDataHeader* rdh = (const o2::header::RAWDataHeader*)page; if (o2::raw::RDHUtils::getMemorySize(*rdh) == sizeof(o2::header::RAWDataHeader)) { emptyPages++; @@ -251,7 +251,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint } bool extendsInNextPage = false; if (mCFContext->zsVersion >= ZSVersion::ZSVersionDenseLinkBased) { - if (l + 1 < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k] && o2::raw::RDHUtils::getMemorySize(*rdh) == TPCZSHDR::TPC_ZS_PAGE_SIZE) { + if (l + 1 < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k] && o2::raw::RDHUtils::getMemorySize(*rdh) == TPCZSHDR::TPC_ZS_PAGE_SIZE) { const o2::header::RAWDataHeader* nextrdh = (const o2::header::RAWDataHeader*)(page + TPCZSHDR::TPC_ZS_PAGE_SIZE); extendsInNextPage = o2::raw::RDHUtils::getHeartBeatOrbit(*nextrdh) == o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) && o2::raw::RDHUtils::getMemorySize(*nextrdh) > sizeof(o2::header::RAWDataHeader); } @@ -265,8 +265,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint // Only add extended page on GPU. On CPU the pages are in consecutive memory anyway. // Not adding the page prevents an issue where a page is decoded twice on CPU, when only the extend should be decoded. fragments[ff].second.zsPageLast++; - mCFContext->fragmentData[ff].nPages[iSlice][j]++; - mCFContext->fragmentData[ff].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[ff].nPages[iSector][j]++; + mCFContext->fragmentData[ff].pageDigits[iSector][j].emplace_back(0); } fragmentExtends[ff] = false; } @@ -284,27 +284,27 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint fragments[f].second.zsPageFirst = l; } else { if (pageCounter > (uint32_t)fragments[f].second.pageCounter + 1) { - mCFContext->fragmentData[f].nPages[iSlice][j] += emptyPages + pageCounter - fragments[f].second.pageCounter - 1; + mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages + pageCounter - fragments[f].second.pageCounter - 1; for (uint32_t k2 = fragments[f].second.zsPtrLast - 1; k2 <= k; k2++) { - for (uint32_t l2 = ((int32_t)k2 == fragments[f].second.zsPtrLast - 1) ? fragments[f].second.zsPageLast : 0; l2 < (k2 < k ? mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k2] : l); l2++) { + for (uint32_t l2 = ((int32_t)k2 == fragments[f].second.zsPtrLast - 1) ? fragments[f].second.zsPageLast : 0; l2 < (k2 < k ? mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k2] : l); l2++) { if (doGPU) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0); } else { // CPU cannot skip unneeded pages, so we must keep space to store the invalid dummy clusters - const uint8_t* const pageTmp = ((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k2]) + l2 * TPCZSHDR::TPC_ZS_PAGE_SIZE; + const uint8_t* const pageTmp = ((const uint8_t*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k2]) + l2 * TPCZSHDR::TPC_ZS_PAGE_SIZE; const o2::header::RAWDataHeader* rdhTmp = (const o2::header::RAWDataHeader*)pageTmp; if (o2::raw::RDHUtils::getMemorySize(*rdhTmp) != sizeof(o2::header::RAWDataHeader)) { const TPCZSHDR* const hdrTmp = (const TPCZSHDR*)(rdh_utils::getLink(o2::raw::RDHUtils::getFEEID(*rdhTmp)) == rdh_utils::DLBZSLinkID ? (pageTmp + o2::raw::RDHUtils::getMemorySize(*rdhTmp) - sizeof(TPCZSHDRV2)) : (pageTmp + sizeof(o2::header::RAWDataHeader))); - mCFContext->fragmentData[f].nDigits[iSlice][j] += hdrTmp->nADCsamples; + mCFContext->fragmentData[f].nDigits[iSector][j] += hdrTmp->nADCsamples; } } } } } else if (emptyPages) { - mCFContext->fragmentData[f].nPages[iSlice][j] += emptyPages; + mCFContext->fragmentData[f].nPages[iSector][j] += emptyPages; if (doGPU) { for (uint32_t m = 0; m < emptyPages; m++) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(0); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(0); } } } @@ -312,10 +312,10 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint fragments[f].second.zsPtrLast = k + 1; fragments[f].second.zsPageLast = l + 1; fragments[f].second.pageCounter = pageCounter; - mCFContext->fragmentData[f].nPages[iSlice][j]++; - mCFContext->fragmentData[f].nDigits[iSlice][j] += hdr->nADCsamples; + mCFContext->fragmentData[f].nPages[iSector][j]++; + mCFContext->fragmentData[f].nDigits[iSector][j] += hdr->nADCsamples; if (doGPU) { - mCFContext->fragmentData[f].pageDigits[iSlice][j].emplace_back(hdr->nADCsamples); + mCFContext->fragmentData[f].pageDigits[iSector][j].emplace_back(hdr->nADCsamples); } fragmentExtends[f] = extendsInNextPage; } else { @@ -336,19 +336,19 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint } } for (uint32_t f = 0; f < mCFContext->nFragments; f++) { - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPtrLast = fragments[f].second.zsPtrLast; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPtrFirst = fragments[f].second.zsPtrFirst; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPageLast = fragments[f].second.zsPageLast; - mCFContext->fragmentData[f].minMaxCN[iSlice][j].zsPageFirst = fragments[f].second.zsPageFirst; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrLast = fragments[f].second.zsPtrLast; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPtrFirst = fragments[f].second.zsPtrFirst; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageLast = fragments[f].second.zsPageLast; + mCFContext->fragmentData[f].minMaxCN[iSector][j].zsPageFirst = fragments[f].second.zsPageFirst; } } mCFContext->nPagesTotal += nPages; - mCFContext->nPagesSector[iSlice] = nPages; + mCFContext->nPagesSector[iSector] = nPages; - mCFContext->nDigitsEndpointMax[iSlice] = 0; + mCFContext->nDigitsEndpointMax[iSector] = 0; for (uint32_t i = 0; i < GPUTrackingInOutZS::NENDPOINTS; i++) { - if (endpointAdcSamples[i] > mCFContext->nDigitsEndpointMax[iSlice]) { - mCFContext->nDigitsEndpointMax[iSlice] = endpointAdcSamples[i]; + if (endpointAdcSamples[i] > mCFContext->nDigitsEndpointMax[iSector]) { + mCFContext->nDigitsEndpointMax[iSector] = endpointAdcSamples[i]; } } uint32_t nDigitsFragmentMax = 0; @@ -356,8 +356,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t pagesInFragment = 0; uint32_t digitsInFragment = 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - pagesInFragment += mCFContext->fragmentData[i].nPages[iSlice][j]; - digitsInFragment += mCFContext->fragmentData[i].nDigits[iSlice][j]; + pagesInFragment += mCFContext->fragmentData[i].nPages[iSector][j]; + digitsInFragment += mCFContext->fragmentData[i].nDigits[iSector][j]; } mCFContext->nPagesFragmentMax = std::max(mCFContext->nPagesFragmentMax, pagesInFragment); nDigitsFragmentMax = std::max(nDigitsFragmentMax, digitsInFragment); @@ -371,7 +371,7 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust auto& in = stage ? clustererShadow.mPpeakPositions : clustererShadow.mPpositions; auto& out = stage ? clustererShadow.mPfilteredPeakPositions : clustererShadow.mPpeakPositions; if (doGPU) { - const uint32_t iSlice = clusterer.mISlice; + const uint32_t iSector = clusterer.mISector; auto& count = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; std::vector counts; @@ -387,22 +387,22 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust for (uint32_t i = 1; i < nSteps; i++) { counts.push_back(tmpCount); if (i == 1) { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, stage); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage); } else { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, tmpCount); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount); } tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize; } - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSlice}}, nSteps, tmpCount); + runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount); for (uint32_t i = nSteps - 1; i > 1; i--) { tmpCount = counts[i - 1]; - runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSlice}}, i, clusterer.mScanWorkGroupSize, tmpCount); + runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount); } } - runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSlice}}, 1, stage, in, out); + runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out); } else { auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks; auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; @@ -416,35 +416,35 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust } } -std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSlice, const CfFragment& fragment, int32_t lane) +std::pair GPUChainTracking::RunTPCClusterizer_transferZS(int32_t iSector, const CfFragment& fragment, int32_t lane) { bool doGPU = GetRecoStepsGPU() & RecoStep::TPCClusterFinding; if (mCFContext->abandonTimeframe) { return {0, 0}; } - const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSlice, fragment); + const auto& retVal = TPCClusterizerDecodeZSCountUpdate(iSector, fragment); if (doGPU) { - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; uint32_t nPagesSector = 0; for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { uint32_t nPages = 0; - mInputsHost->mPzsMeta->slice[iSlice].zsPtr[j] = &mInputsShadow->mPzsPtrs[iSlice * GPUTrackingInOutZS::NENDPOINTS + j]; - mInputsHost->mPzsPtrs[iSlice * GPUTrackingInOutZS::NENDPOINTS + j] = clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE; + mInputsHost->mPzsMeta->sector[iSector].zsPtr[j] = &mInputsShadow->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j]; + mInputsHost->mPzsPtrs[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE; for (uint32_t k = clusterer.mMinMaxCN[j].zsPtrFirst; k < clusterer.mMinMaxCN[j].zsPtrLast; k++) { const uint32_t min = (k == clusterer.mMinMaxCN[j].zsPtrFirst) ? clusterer.mMinMaxCN[j].zsPageFirst : 0; - const uint32_t max = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; + const uint32_t max = (k + 1 == clusterer.mMinMaxCN[j].zsPtrLast) ? clusterer.mMinMaxCN[j].zsPageLast : mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; if (max > min) { - char* src = (char*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k] + min * TPCZSHDR::TPC_ZS_PAGE_SIZE; - char* ptrLast = (char*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[j][k] + (max - 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE; + char* src = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + min * TPCZSHDR::TPC_ZS_PAGE_SIZE; + char* ptrLast = (char*)mIOPtrs.tpcZS->sector[iSector].zsPtr[j][k] + (max - 1) * TPCZSHDR::TPC_ZS_PAGE_SIZE; size_t size = (ptrLast - src) + o2::raw::RDHUtils::getMemorySize(*(const o2::header::RAWDataHeader*)ptrLast); GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzs + (nPagesSector + nPages) * TPCZSHDR::TPC_ZS_PAGE_SIZE, src, size, lane, true); } nPages += max - min; } - mInputsHost->mPzsMeta->slice[iSlice].nZSPtr[j] = &mInputsShadow->mPzsSizes[iSlice * GPUTrackingInOutZS::NENDPOINTS + j]; - mInputsHost->mPzsSizes[iSlice * GPUTrackingInOutZS::NENDPOINTS + j] = nPages; - mInputsHost->mPzsMeta->slice[iSlice].count[j] = 1; + mInputsHost->mPzsMeta->sector[iSector].nZSPtr[j] = &mInputsShadow->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j]; + mInputsHost->mPzsSizes[iSector * GPUTrackingInOutZS::NENDPOINTS + j] = nPages; + mInputsHost->mPzsMeta->sector[iSector].count[j] = 1; nPagesSector += nPages; } GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPzsOffsets, clusterer.mPzsOffsets, clusterer.mNMaxPages * sizeof(*clusterer.mPzsOffsets), lane, true); @@ -456,10 +456,10 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) { bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; if (restorePointers) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcClusterer[iSlice].mPzsOffsets = mCFContext->ptrSave[iSlice].zsOffsetHost; - processorsShadow()->tpcClusterer[iSlice].mPzsOffsets = mCFContext->ptrSave[iSlice].zsOffsetDevice; - processorsShadow()->tpcClusterer[iSlice].mPzs = mCFContext->ptrSave[iSlice].zsDevice; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetHost; + processorsShadow()->tpcClusterer[iSector].mPzsOffsets = mCFContext->ptrSave[iSector].zsOffsetDevice; + processorsShadow()->tpcClusterer[iSector].mPzs = mCFContext->ptrSave[iSector].zsDevice; } processorsShadow()->ioPtrs.clustersNative = mCFContext->ptrClusterNativeSave; return 0; @@ -478,54 +478,54 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) mTriggerBuffer->triggers.clear(); } if (mIOPtrs.tpcZS) { - uint32_t nDigitsFragmentMax[NSLICES]; + uint32_t nDigitsFragmentMax[NSECTORS]; mCFContext->zsVersion = -1; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (mIOPtrs.tpcZS->slice[iSlice].count[0]) { - const void* rdh = mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + if (mIOPtrs.tpcZS->sector[iSector].count[0]) { + const void* rdh = mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]; if (rdh && o2::raw::RDHUtils::getVersion() > o2::raw::RDHUtils::getVersion(rdh)) { GPUError("Data has invalid RDH version %d, %d required\n", o2::raw::RDHUtils::getVersion(rdh), o2::raw::RDHUtils::getVersion()); return 1; } } #ifndef GPUCA_NO_VC - if (GetProcessingSettings().prefetchTPCpageScan >= 1 && iSlice < NSLICES - 1) { + if (GetProcessingSettings().prefetchTPCpageScan >= 1 && iSector < NSECTORS - 1) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[iSlice].count[j]; k++) { - for (uint32_t l = 0; l < mIOPtrs.tpcZS->slice[iSlice].nZSPtr[j][k]; l++) { - Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); - Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->slice[iSlice + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[iSector].count[j]; k++) { + for (uint32_t l = 0; l < mIOPtrs.tpcZS->sector[iSector].nZSPtr[j][k]; l++) { + Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE); + Vc::Common::prefetchFar(((const uint8_t*)mIOPtrs.tpcZS->sector[iSector + 1].zsPtr[j][k]) + l * TPCZSHDR::TPC_ZS_PAGE_SIZE + sizeof(o2::header::RAWDataHeader)); } } } } #endif - const auto& x = TPCClusterizerDecodeZSCount(iSlice, fragmentMax); - nDigitsFragmentMax[iSlice] = x.first; - processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits = x.first; + const auto& x = TPCClusterizerDecodeZSCount(iSector, fragmentMax); + nDigitsFragmentMax[iSector] = x.first; + processors()->tpcClusterer[iSector].mPmemory->counters.nDigits = x.first; mRec->MemoryScalers()->nTPCdigits += x.first; } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t nDigitsBase = nDigitsFragmentMax[iSlice]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t nDigitsBase = nDigitsFragmentMax[iSector]; uint32_t threshold = 40000000; uint32_t nDigitsScaled = nDigitsBase > threshold ? nDigitsBase : std::min((threshold + nDigitsBase) / 2, 2 * nDigitsBase); - processors()->tpcClusterer[iSlice].SetNMaxDigits(processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSlice]); + processors()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]); if (doGPU) { - processorsShadow()->tpcClusterer[iSlice].SetNMaxDigits(processors()->tpcClusterer[iSlice].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSlice]); + processorsShadow()->tpcClusterer[iSector].SetNMaxDigits(processors()->tpcClusterer[iSector].mPmemory->counters.nDigits, mCFContext->nPagesFragmentMax, nDigitsScaled, mCFContext->nDigitsEndpointMax[iSector]); } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { - mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSlice].mZSOffsetId, mRec); - mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSlice].mZSId, mRec); + mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSOffsetId, mRec); + mPipelineNotifyCtx->rec->AllocateRegisteredForeignMemory(processors()->tpcClusterer[iSector].mZSId, mRec); } else { - AllocateRegisteredMemory(processors()->tpcClusterer[iSlice].mZSOffsetId); - AllocateRegisteredMemory(processors()->tpcClusterer[iSlice].mZSId); + AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSOffsetId); + AllocateRegisteredMemory(processors()->tpcClusterer[iSector].mZSId); } } } else { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSector]; mRec->MemoryScalers()->nTPCdigits += nDigits; - processors()->tpcClusterer[iSlice].SetNMaxDigits(nDigits, mCFContext->nPagesFragmentMax, nDigits, 0); + processors()->tpcClusterer[iSector].SetNMaxDigits(nDigits, mCFContext->nPagesFragmentMax, nDigits, 0); } } @@ -546,17 +546,17 @@ int32_t GPUChainTracking::RunTPCClusterizer_prepare(bool restorePointers) } mCFContext->fragmentFirst = CfFragment{std::max(mCFContext->tpcMaxTimeBin + 1, maxFragmentLen), maxFragmentLen}; - for (int32_t iSlice = 0; iSlice < GetProcessingSettings().nTPCClustererLanes && iSlice < NSLICES; iSlice++) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - mCFContext->nextPos[iSlice] = RunTPCClusterizer_transferZS(iSlice, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSlice); + for (int32_t iSector = 0; iSector < GetProcessingSettings().nTPCClustererLanes && iSector < NSECTORS; iSector++) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { + mCFContext->nextPos[iSector] = RunTPCClusterizer_transferZS(iSector, mCFContext->fragmentFirst, GetProcessingSettings().nTPCClustererLanes + iSector); } } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mCFContext->ptrSave[iSlice].zsOffsetHost = processors()->tpcClusterer[iSlice].mPzsOffsets; - mCFContext->ptrSave[iSlice].zsOffsetDevice = processorsShadow()->tpcClusterer[iSlice].mPzsOffsets; - mCFContext->ptrSave[iSlice].zsDevice = processorsShadow()->tpcClusterer[iSlice].mPzs; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + mCFContext->ptrSave[iSector].zsOffsetHost = processors()->tpcClusterer[iSector].mPzsOffsets; + mCFContext->ptrSave[iSector].zsOffsetDevice = processorsShadow()->tpcClusterer[iSector].mPzsOffsets; + mCFContext->ptrSave[iSector].zsDevice = processorsShadow()->tpcClusterer[iSector].mPzs; } } return 0; @@ -595,12 +595,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) tpcHitLowOccupancyScalingFactor = std::min(3.5f, (float)threshold / nHitsBase); } } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcClusterer[iSlice].SetMaxData(mIOPtrs); // First iteration to set data sizes + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcClusterer[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes } mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcClusterer[iSlice], true); // Now we allocate + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcClusterer[iSector], true); // Now we allocate } if (mPipelineNotifyCtx && GetProcessingSettings().doublePipelineClusterizer) { RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) @@ -611,7 +611,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); } if (doGPU) { - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSLICES, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } size_t nClsTotal = 0; @@ -651,7 +651,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) mcLinearLabels.data.reserve(mRec->MemoryScalers()->nTPCHits); } - int8_t transferRunning[NSLICES] = {0}; + int8_t transferRunning[NSECTORS] = {0}; uint32_t outputQueueStart = mOutputQueue.size(); auto notifyForeignChainFinished = [this]() { @@ -666,22 +666,22 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) }; bool synchronizeCalibUpdate = false; - for (uint32_t iSliceBase = 0; iSliceBase < NSLICES; iSliceBase += GetProcessingSettings().nTPCClustererLanes) { + for (uint32_t iSectorBase = 0; iSectorBase < NSECTORS; iSectorBase += GetProcessingSettings().nTPCClustererLanes) { std::vector laneHasData(GetProcessingSettings().nTPCClustererLanes, false); - static_assert(NSLICES <= GPUCA_MAX_STREAMS, "Stream events must be able to hold all slices"); - const int32_t maxLane = std::min(GetProcessingSettings().nTPCClustererLanes, NSLICES - iSliceBase); + static_assert(NSECTORS <= GPUCA_MAX_STREAMS, "Stream events must be able to hold all sectors"); + const int32_t maxLane = std::min(GetProcessingSettings().nTPCClustererLanes, NSECTORS - iSectorBase); for (CfFragment fragment = mCFContext->fragmentFirst; !fragment.isEnd(); fragment = fragment.next()) { if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSliceBase, iSliceBase + GetProcessingSettings().nTPCClustererLanes - 1); + GPUInfo("Processing time bins [%d, %d) for sectors %d to %d", fragment.start, fragment.last(), iSectorBase, iSectorBase + GetProcessingSettings().nTPCClustererLanes - 1); } mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { if (doGPU && fragment.index != 0) { SynchronizeStream(lane); // Don't overwrite charge map from previous iteration until cluster computation is finished } - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; clusterer.mPmemory->counters.nPeaks = clusterer.mPmemory->counters.nClusters = 0; clusterer.mPmemory->fragment = fragment; @@ -689,22 +689,22 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) bool setDigitsOnGPU = doGPU && not mIOPtrs.tpcZS; bool setDigitsOnHost = (not doGPU && not mIOPtrs.tpcZS) || propagateMCLabels; auto* inDigits = mIOPtrs.tpcPackedDigits; - size_t numDigits = inDigits->nTPCDigits[iSlice]; + size_t numDigits = inDigits->nTPCDigits[iSector]; if (setDigitsOnGPU) { - GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); + GPUMemCpy(RecoStep::TPCClusterFinding, clustererShadow.mPdigits, inDigits->tpcDigits[iSector], sizeof(clustererShadow.mPdigits[0]) * numDigits, lane, true); } if (setDigitsOnHost) { - clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSlice]); // TODO: Needs fixing (invalid const cast) + clusterer.mPdigits = const_cast(inDigits->tpcDigits[iSector]); // TODO: Needs fixing (invalid const cast) } clusterer.mPmemory->counters.nDigits = numDigits; } if (mIOPtrs.tpcZS) { - if (mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { - clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSlice].first; - clusterer.mPmemory->counters.nPagesSubslice = mCFContext->nextPos[iSlice].second; + if (mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { + clusterer.mPmemory->counters.nPositions = mCFContext->nextPos[iSector].first; + clusterer.mPmemory->counters.nPagesSubsector = mCFContext->nextPos[iSector].second; } else { - clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubslice = 0; + clusterer.mPmemory->counters.nPositions = clusterer.mPmemory->counters.nPagesSubsector = 0; } } TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); @@ -719,46 +719,46 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); if (doGPU) { - if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSlice] && mCFContext->zsVersion != -1) { + if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, mInputsHost->mResourceZS, lane); SynchronizeStream(GetProcessingSettings().nTPCClustererLanes + lane); } SynchronizeStream(mRec->NStreams() - 1); // Wait for copying to constant memory } - if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSlice] || mCFContext->zsVersion == -1)) { + if (mIOPtrs.tpcZS && (mCFContext->abandonTimeframe || !mCFContext->nPagesSector[iSector] || mCFContext->zsVersion == -1)) { clusterer.mPmemory->counters.nPositions = 0; return; } - if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] == 0) { + if (!mIOPtrs.tpcZS && mIOPtrs.tpcPackedDigits->nTPCDigits[iSector] == 0) { clusterer.mPmemory->counters.nPositions = 0; return; } if (propagateMCLabels && fragment.index == 0) { clusterer.PrepareMC(); - clusterer.mPinputLabels = digitsMC->v[iSlice]; + clusterer.mPinputLabels = digitsMC->v[iSector]; if (clusterer.mPinputLabels == nullptr) { - GPUFatal("MC label container missing, sector %d", iSlice); + GPUFatal("MC label container missing, sector %d", iSector); } - if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]) { - GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice], (int32_t)clusterer.mPinputLabels->getIndexedSize()); + if (clusterer.mPinputLabels->getIndexedSize() != mIOPtrs.tpcPackedDigits->nTPCDigits[iSector]) { + GPUFatal("MC label container has incorrect number of entries: %d expected, has %d\n", (int32_t)mIOPtrs.tpcPackedDigits->nTPCDigits[iSector], (int32_t)clusterer.mPinputLabels->getIndexedSize()); } } - if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSlice) { + if (GetProcessingSettings().tpcSingleSector == -1 || GetProcessingSettings().tpcSingleSector == (int32_t)iSector) { if (not mIOPtrs.tpcZS) { - runKernel({GetGrid(1, lane), {iSlice}}, mIOPtrs.tpcZS == nullptr); + runKernel({GetGrid(1, lane), {iSector}}, mIOPtrs.tpcZS == nullptr); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); } else if (propagateMCLabels) { - runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, mIOPtrs.tpcZS == nullptr); + runKernel({GetGrid(1, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, mIOPtrs.tpcZS == nullptr); TransferMemoryResourceLinkToGPU(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); } } if (mIOPtrs.tpcZS) { - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->slice[iSlice].count[0] && mIOPtrs.tpcZS->slice[iSlice].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->slice[iSlice].zsPtr[0][0]) : 0); - uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubslice : GPUTrackingInOutZS::NENDPOINTS; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); + uint32_t nBlocks = doGPU ? clusterer.mPmemory->counters.nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS; (void)tpcTimeBinCut; // TODO: To be used in decoding kernels switch (mCFContext->zsVersion) { @@ -767,48 +767,48 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) break; case ZSVersionRowBased10BitADC: case ZSVersionRowBased12BitADC: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); break; case ZSVersionLinkBasedWithMeta: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); break; case ZSVersionDenseLinkBased: - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}, firstHBF); + runKernel({GetGridBlk(nBlocks, lane), {iSector}}, firstHBF); break; } TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); } // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { - uint32_t iSlice = iSliceBase + lane; + uint32_t iSector = iSectorBase + lane; if (doGPU) { SynchronizeStream(lane); } if (mIOPtrs.tpcZS) { CfFragment f = fragment.next(); - int32_t nextSlice = iSlice; + int32_t nextSector = iSector; if (f.isEnd()) { - nextSlice += GetProcessingSettings().nTPCClustererLanes; + nextSector += GetProcessingSettings().nTPCClustererLanes; f = mCFContext->fragmentFirst; } - if (nextSlice < NSLICES && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSlice] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { - mCFContext->nextPos[nextSlice] = RunTPCClusterizer_transferZS(nextSlice, f, GetProcessingSettings().nTPCClustererLanes + lane); + if (nextSector < NSECTORS && mIOPtrs.tpcZS && mCFContext->nPagesSector[nextSector] && mCFContext->zsVersion != -1 && !mCFContext->abandonTimeframe) { + mCFContext->nextPos[nextSector] = RunTPCClusterizer_transferZS(nextSector, f, GetProcessingSettings().nTPCClustererLanes + lane); } } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; if (clusterer.mPmemory->counters.nPositions == 0) { return; } if (!mIOPtrs.tpcZS) { - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); } if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { clusterer.DumpChargeMap(*mDebugFile, "Charges"); } if (propagateMCLabels) { - runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nDigitsInFragment, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}); } bool checkForNoisyPads = (rec()->GetParam().rec.tpc.maxTimeBinAboveThresholdIn1000Bin > 0) || (rec()->GetParam().rec.tpc.maxConsecTimeBinAboveThreshold > 0); @@ -818,10 +818,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (checkForNoisyPads) { int32_t nBlocks = TPC_PADS_IN_SECTOR / GPUTPCCFCheckPadBaseline::PadsPerCacheline; - runKernel({GetGridBlk(nBlocks, lane), {iSlice}}); + runKernel({GetGridBlk(nBlocks, lane), {iSector}}); } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Peaks"); } @@ -831,17 +831,17 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; if (doGPU) { SynchronizeStream(lane); } if (clusterer.mPmemory->counters.nPeaks == 0) { return; } - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); - runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); } @@ -851,9 +851,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { - uint32_t iSlice = iSliceBase + lane; - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + uint32_t iSector = iSectorBase + lane; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; if (doGPU) { SynchronizeStream(lane); } @@ -871,19 +871,19 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return; } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}}); + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSlice}}, 0); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); if (doGPU && propagateMCLabels) { TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); if (doGPU) { SynchronizeStream(lane); } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 1); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSlice, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); + GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSector, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); } TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); @@ -897,22 +897,22 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) size_t nClsFirst = nClsTotal; bool anyLaneHasData = false; for (int32_t lane = 0; lane < maxLane; lane++) { - uint32_t iSlice = iSliceBase + lane; - std::fill(&tmpNativeAccess->nClusters[iSlice][0], &tmpNativeAccess->nClusters[iSlice][0] + MAXGLOBALPADROW, 0); + uint32_t iSector = iSectorBase + lane; + std::fill(&tmpNativeAccess->nClusters[iSector][0], &tmpNativeAccess->nClusters[iSector][0] + MAXGLOBALPADROW, 0); if (doGPU) { SynchronizeStream(lane); } - GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSlice]; - GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSlice] : clusterer; + GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; + GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; if (laneHasData[lane]) { anyLaneHasData = true; if (buildNativeGPU && GetProcessingSettings().tpccfGatherKernel) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, mRec->NStreams() - 1), {iSlice}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, mRec->NStreams() - 1), {iSector}}, &mInputsShadow->mPclusterNativeBuffer[nClsTotal]); } for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { if (nClsTotal + clusterer.mPclusterInRow[j] > mInputsHost->mNClusterNative) { - clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSlice * 1000 + j, nClsTotal + clusterer.mPclusterInRow[j], mInputsHost->mNClusterNative); + clusterer.raiseError(GPUErrors::ERROR_CF_GLOBAL_CLUSTER_OVERFLOW, iSector * 1000 + j, nClsTotal + clusterer.mPclusterInRow[j], mInputsHost->mNClusterNative); continue; } if (buildNativeGPU) { @@ -922,7 +922,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } else if (buildNativeHost) { GPUMemCpyAlways(RecoStep::TPCClusterFinding, (void*)&tmpNativeClusters[nClsTotal], (const void*)&clustererShadow.mPclusterByRow[j * clusterer.mNMaxClusterPerRow], sizeof(mIOPtrs.clustersNative->clustersLinear[0]) * clusterer.mPclusterInRow[j], mRec->NStreams() - 1, false); } - tmpNativeAccess->nClusters[iSlice][j] += clusterer.mPclusterInRow[j]; + tmpNativeAccess->nClusters[iSector][j] += clusterer.mPclusterInRow[j]; nClsTotal += clusterer.mPclusterInRow[j]; } if (transferRunning[lane]) { @@ -937,15 +937,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) continue; } - runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}); + runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}); GPUTPCCFMCLabelFlattener::setGlobalOffsetsAndAllocate(clusterer, mcLinearLabels); - runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, &mcLinearLabels); + runKernel({GetGrid(GPUCA_ROW_COUNT, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, &mcLinearLabels); clusterer.clearMCMemory(); assert(propagateMCLabels ? mcLinearLabels.header.size() == nClsTotal : true); } if (propagateMCLabels) { for (int32_t lane = 0; lane < maxLane; lane++) { - processors()->tpcClusterer[iSliceBase + lane].clearMCMemory(); + processors()->tpcClusterer[iSectorBase + lane].clearMCMemory(); } } if (buildNativeHost && buildNativeGPU && anyLaneHasData) { @@ -956,10 +956,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } - if (mWaitForFinalInputs && iSliceBase >= 21 && (int32_t)iSliceBase < 21 + GetProcessingSettings().nTPCClustererLanes) { + if (mWaitForFinalInputs && iSectorBase >= 21 && (int32_t)iSectorBase < 21 + GetProcessingSettings().nTPCClustererLanes) { notifyForeignChainFinished(); } - if (mWaitForFinalInputs && iSliceBase >= 30 && (int32_t)iSliceBase < 30 + GetProcessingSettings().nTPCClustererLanes) { + if (mWaitForFinalInputs && iSectorBase >= 30 && (int32_t)iSectorBase < 30 + GetProcessingSettings().nTPCClustererLanes) { mWaitForFinalInputs(); synchronizeCalibUpdate = DoQueuedUpdates(0, false); } @@ -1051,7 +1051,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) SynchronizeStream(0); } if (buildNativeHost && (GetProcessingSettings().deterministicGPUReconstruction || GetProcessingSettings().debugLevel >= 4)) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { std::sort(&tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j]], &tmpNativeClusters[tmpNativeAccess->clusterOffset[i][j] + tmpNativeAccess->nClusters[i][j]]); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 4ea7094416d5e..94d39249d620c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -62,7 +62,7 @@ int32_t GPUChainTracking::RunTPCCompression() O->nAttachedClusters = Compressor.mMemory->nStoredAttachedClusters; O->nUnattachedClusters = Compressor.mMemory->nStoredUnattachedClusters; O->nAttachedClustersReduced = O->nAttachedClusters - O->nTracks; - O->nSliceRows = NSLICES * GPUCA_ROW_COUNT; + O->nSliceRows = NSECTORS * GPUCA_ROW_COUNT; O->nComppressionModes = param().rec.tpc.compressionTypeMask; O->solenoidBz = param().bzkG; O->maxTimeBin = param().continuousMaxTimeBin; @@ -143,11 +143,11 @@ int32_t GPUChainTracking::RunTPCCompression() gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); gatherTimer->Start(); } - GPUMemCpyAlways(myStep, O->nSliceRowClusters, P->nSliceRowClusters, NSLICES * GPUCA_ROW_COUNT * sizeof(O->nSliceRowClusters[0]), outputStream, direction); + GPUMemCpyAlways(myStep, O->nSliceRowClusters, P->nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(O->nSliceRowClusters[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->nTrackClusters, P->nTrackClusters, O->nTracks * sizeof(O->nTrackClusters[0]), outputStream, direction); SynchronizeStream(outputStream); uint32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { uint32_t srcOffset = mIOPtrs.clustersNative->clusterOffset[i][j] * Compressor.mMaxClusterFactorBase1024 / 1024; GPUMemCpyAlways(myStep, O->qTotU + offset, P->qTotU + srcOffset, O->nSliceRowClusters[i * GPUCA_ROW_COUNT + j] * sizeof(O->qTotU[0]), outputStream, direction); @@ -264,7 +264,7 @@ int32_t GPUChainTracking::RunTPCDecompression() inputGPU = cmprClsHost; bool toGPU = true; - runKernel({GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, &mEvents->init}, DecompressorShadow.mNativeClustersIndex, NSLICES * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNativeClustersIndex[0])); + runKernel({GetGridAutoStep(inputStream, RecoStep::TPCDecompression), krnlRunRangeNone, &mEvents->init}, DecompressorShadow.mNativeClustersIndex, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNativeClustersIndex[0])); int32_t nStreams = doGPU ? mRec->NStreams() - 1 : 1; if (cmprClsHost.nAttachedClusters != 0) { std::exclusive_scan(cmprClsHost.nTrackClusters, cmprClsHost.nTrackClusters + cmprClsHost.nTracks, Decompressor.mAttachedClustersOffsets, 0u); // computing clusters offsets for first kernel @@ -294,7 +294,7 @@ int32_t GPUChainTracking::RunTPCDecompression() runKernel({GetGridAuto(iStream), krnlRunRangeNone, {&mEvents->stream[iStream], &mEvents->init}}, startTrack, endTrack); } } - GPUMemCpy(myStep, inputGPUShadow.nSliceRowClusters, cmprClsHost.nSliceRowClusters, NSLICES * GPUCA_ROW_COUNT * sizeof(cmprClsHost.nSliceRowClusters[0]), unattachedStream, toGPU); + GPUMemCpy(myStep, inputGPUShadow.nSliceRowClusters, cmprClsHost.nSliceRowClusters, NSECTORS * GPUCA_ROW_COUNT * sizeof(cmprClsHost.nSliceRowClusters[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.qTotU, cmprClsHost.qTotU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qTotU[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.qMaxU, cmprClsHost.qMaxU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.qMaxU[0]), unattachedStream, toGPU); GPUMemCpy(myStep, inputGPUShadow.flagsU, cmprClsHost.flagsU, cmprClsHost.nUnattachedClusters * sizeof(cmprClsHost.flagsU[0]), unattachedStream, toGPU); @@ -307,7 +307,7 @@ int32_t GPUChainTracking::RunTPCDecompression() SynchronizeStream(inputStream); uint32_t offset = 0; uint32_t decodedAttachedClusters = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { uint32_t linearIndex = i * GPUCA_ROW_COUNT + j; uint32_t unattachedOffset = (linearIndex >= cmprClsHost.nSliceRows) ? 0 : cmprClsHost.nSliceRowClusters[linearIndex]; @@ -353,13 +353,13 @@ int32_t GPUChainTracking::RunTPCDecompression() *mInputsHost->mPclusterNativeAccess = *mClusterNativeAccess; } - uint32_t batchSize = doGPU ? 6 : NSLICES; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice = iSlice + batchSize) { - int32_t iStream = (iSlice / batchSize) % mRec->NStreams(); - runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSlice, batchSize); - uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSlice, mClusterNativeAccess->nClustersSector + iSlice + batchSize, 0u); + uint32_t batchSize = doGPU ? 6 : NSECTORS; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector = iSector + batchSize) { + int32_t iStream = (iSector / batchSize) % mRec->NStreams(); + runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSector, batchSize); + uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSector, mClusterNativeAccess->nClustersSector + iSector + batchSize, 0u); if (!runFiltering) { - GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSlice][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSlice][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); + GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSector][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSector][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); } } SynchronizeGPU(); @@ -367,7 +367,7 @@ int32_t GPUChainTracking::RunTPCDecompression() if (runFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow); WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); - runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSLICES * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); + runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); runKernel(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression)); TransferMemoryResourceLinkToHost(RecoStep::TPCDecompression, Decompressor.mResourceNClusterPerSectorRow, unattachedStream); SynchronizeStream(unattachedStream); @@ -378,7 +378,7 @@ int32_t GPUChainTracking::RunTPCDecompression() DecompressorShadow.mNativeClustersBuffer = mInputsShadow->mPclusterNativeBuffer; Decompressor.mNativeClustersBuffer = mInputsHost->mPclusterNativeOutput; WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { mClusterNativeAccess->nClusters[i][j] = Decompressor.mNClusterPerSectorRow[i * GPUCA_ROW_COUNT + j]; } @@ -402,7 +402,7 @@ int32_t GPUChainTracking::RunTPCDecompression() runKernel(GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression)); const ClusterNativeAccess* decoded = mIOPtrs.clustersNative; if (doGPU) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { ClusterNative* begin = mInputsHost->mPclusterNativeOutput + decoded->clusterOffset[i][j]; ClusterNative* end = begin + decoded->nClusters[i][j]; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 96bc8a3083067..ec6b48a55d50d 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -140,7 +140,7 @@ void addToMap(std::string name, std::map& void GPUChainTracking::PrintMemoryStatistics() { std::map usageMap; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { #ifdef GPUCA_TPC_GEOMETRY_O2 addToMap("TPC Clusterer Sector Peaks", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nPeaks, processors()->tpcClusterer[i].mNMaxPeaks); addToMap("TPC Clusterer Sector Clusters", usageMap, processors()->tpcClusterer[i].mPmemory->counters.nClusters, processors()->tpcClusterer[i].mNMaxClusters); @@ -173,7 +173,7 @@ void GPUChainTracking::PrintMemoryStatistics() void GPUChainTracking::PrintMemoryRelations() { - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { GPUInfo("MEMREL StartHits NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NStartHits()); GPUInfo("MEMREL Tracklets NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracklets()); GPUInfo("MEMREL Tracklets NCl %d NTrkl %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NRowHits()); @@ -193,7 +193,7 @@ void GPUChainTracking::PrepareDebugOutput() WriteToConstantMemory(RecoStep::NoRecoStep, (char*)&processors()->debugOutput - (char*)processors(), &processorsShadow()->debugOutput, sizeof(processors()->debugOutput), -1); memset(processors()->debugOutput.memory(), 0, processors()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); } - runKernel({{BlockCount(), ThreadCount(), 0, RecoStep::TPCSliceTracking}}, (mRec->IsGPU() ? processorsShadow() : processors())->debugOutput.memory(), processorsShadow()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); + runKernel({{BlockCount(), ThreadCount(), 0, RecoStep::TPCSectorTracking}}, (mRec->IsGPU() ? processorsShadow() : processors())->debugOutput.memory(), processorsShadow()->debugOutput.memorySize() * sizeof(processors()->debugOutput.memory()[0])); #endif } @@ -272,7 +272,7 @@ void GPUChainTracking::SanityCheck() uint8_t sector, row; uint32_t cl; trk.getClusterReference(mIOPtrs.outputClusRefsTPCO2, j, sector, row, cl); - if (sector >= GPUCA_NSLICES || row >= GPUCA_ROW_COUNT) { + if (sector >= GPUCA_NSECTORS || row >= GPUCA_ROW_COUNT) { if (nErrors++ < 1000) { GPUError("Invalid sector / row %d / %d", (int32_t)sector, (int32_t)row); continue; @@ -299,7 +299,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { uint32_t countTotal = 0; - for (uint32_t iSector = 0; iSector < GPUCA_NSLICES; iSector++) { + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { uint32_t count = 0; for (uint32_t k = 0; k < clusters->nClusters[iSector][iRow]; k++) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index c159e333a3b18..c4dddd4b8b88f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -15,8 +15,8 @@ #include "GPUChainTracking.h" #include "GPUReconstructionIO.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutput.h" +#include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCTrack.h" @@ -77,7 +77,7 @@ void GPUChainTracking::DumpData(const char* filename) DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); if (mIOPtrs.clustersNative) { if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { - fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); + fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); if (mIOPtrs.clustersNative->clustersMCTruth) { const auto& buffer = mIOPtrs.clustersNative->clustersMCTruth->getBuffer(); std::pair tmp = {buffer.data(), buffer.size()}; @@ -87,9 +87,9 @@ void GPUChainTracking::DumpData(const char* filename) } if (mIOPtrs.tpcPackedDigits) { if (DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && mIOPtrs.tpcPackedDigits->tpcDigitsMC) { - const char* ptrs[NSLICES]; - size_t sizes[NSLICES]; - for (uint32_t i = 0; i < NSLICES; i++) { + const char* ptrs[NSECTORS]; + size_t sizes[NSECTORS]; + for (uint32_t i = 0; i < NSECTORS; i++) { if (mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]) { const auto& buffer = mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); ptrs[i] = buffer.data(); @@ -104,10 +104,10 @@ void GPUChainTracking::DumpData(const char* filename) } if (mIOPtrs.tpcZS) { size_t total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - total += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -115,12 +115,12 @@ void GPUChainTracking::DumpData(const char* filename) char* ptr = pages[0].data(); GPUTrackingInOutZS::GPUTrackingInOutZSCounts counts; total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->slice[i].zsPtr[j][k], mIOPtrs.tpcZS->slice[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); - counts.count[i][j] += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; - total += mIOPtrs.tpcZS->slice[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); + counts.count[i][j] += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -141,8 +141,8 @@ void GPUChainTracking::DumpData(const char* filename) uint32_t n = 1; DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); } - DumpData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, InOutPointerType::SLICE_OUT_TRACK); - DumpData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); + DumpData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + DumpData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); DumpData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, InOutPointerType::MC_INFO_TPC); DumpData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); @@ -181,14 +181,14 @@ int32_t GPUChainTracking::ReadData(const char* filename) fclose(fp); return 1; } - GPUTPCClusterData* ptrClusterData[NSLICES]; + GPUTPCClusterData* ptrClusterData[NSECTORS]; ReadData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, mIOMem.clusterData, InOutPointerType::CLUSTER_DATA, ptrClusterData); - AliHLTTPCRawCluster* ptrRawClusters[NSLICES]; + AliHLTTPCRawCluster* ptrRawClusters[NSECTORS]; ReadData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, mIOMem.rawClusters, InOutPointerType::RAW_CLUSTERS, ptrRawClusters); int32_t nClustersTotal = 0; mIOMem.clusterNativeAccess.reset(new ClusterNativeAccess); if (ReadData(fp, &mIOMem.clusterNativeAccess->clustersLinear, &mIOMem.clusterNativeAccess->nClustersTotal, &mIOMem.clustersNative, InOutPointerType::CLUSTERS_NATIVE)) { - r = fread(&mIOMem.clusterNativeAccess->nClusters[0][0], sizeof(mIOMem.clusterNativeAccess->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp); + r = fread(&mIOMem.clusterNativeAccess->nClusters[0][0], sizeof(mIOMem.clusterNativeAccess->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); mIOMem.clusterNativeAccess->setOffsetPtrs(); mIOPtrs.clustersNative = mIOMem.clusterNativeAccess.get(); std::pair tmp = {nullptr, 0}; @@ -200,12 +200,12 @@ int32_t GPUChainTracking::ReadData(const char* filename) mIOMem.digitMap.reset(new GPUTrackingInOutDigits); if (ReadData(fp, mIOMem.digitMap->tpcDigits, mIOMem.digitMap->nTPCDigits, mIOMem.tpcDigits, InOutPointerType::TPC_DIGIT)) { mIOPtrs.tpcPackedDigits = mIOMem.digitMap.get(); - const char* ptrs[NSLICES]; - size_t sizes[NSLICES]; + const char* ptrs[NSECTORS]; + size_t sizes[NSECTORS]; if (ReadData(fp, ptrs, sizes, mIOMem.tpcDigitsMC, InOutPointerType::TPC_DIGIT_MC)) { mIOMem.tpcDigitMCMap = std::make_unique(); - mIOMem.tpcDigitMCView.reset(new ConstMCLabelContainerView[NSLICES]); - for (uint32_t i = 0; i < NSLICES; i++) { + mIOMem.tpcDigitMCView.reset(new ConstMCLabelContainerView[NSECTORS]); + for (uint32_t i = 0; i < NSECTORS; i++) { if (sizes[i]) { mIOMem.tpcDigitMCView.get()[i] = gsl::span(ptrs[i], ptrs[i] + sizes[i]); mIOMem.tpcDigitMCMap->v[i] = mIOMem.tpcDigitMCView.get() + i; @@ -225,13 +225,13 @@ int32_t GPUChainTracking::ReadData(const char* filename) mIOMem.tpcZSmeta.reset(new GPUTrackingInOutZS); mIOMem.tpcZSmeta2.reset(new GPUTrackingInOutZS::GPUTrackingInOutZSMeta); total = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { mIOMem.tpcZSmeta2->ptr[i][j] = &ptrZSPages[total * TPCZSHDR::TPC_ZS_PAGE_SIZE]; - mIOMem.tpcZSmeta->slice[i].zsPtr[j] = &mIOMem.tpcZSmeta2->ptr[i][j]; + mIOMem.tpcZSmeta->sector[i].zsPtr[j] = &mIOMem.tpcZSmeta2->ptr[i][j]; mIOMem.tpcZSmeta2->n[i][j] = counts.count[i][j]; - mIOMem.tpcZSmeta->slice[i].nZSPtr[j] = &mIOMem.tpcZSmeta2->n[i][j]; - mIOMem.tpcZSmeta->slice[i].count[j] = 1; + mIOMem.tpcZSmeta->sector[i].nZSPtr[j] = &mIOMem.tpcZSmeta2->n[i][j]; + mIOMem.tpcZSmeta->sector[i].count[j] = 1; total += counts.count[i][j]; } } @@ -242,8 +242,8 @@ int32_t GPUChainTracking::ReadData(const char* filename) } uint32_t n; ReadData(fp, &mIOPtrs.settingsTF, &n, &mIOMem.settingsTF, InOutPointerType::TF_SETTINGS); - ReadData(fp, mIOPtrs.sliceTracks, mIOPtrs.nSliceTracks, mIOMem.sliceTracks, InOutPointerType::SLICE_OUT_TRACK); - ReadData(fp, mIOPtrs.sliceClusters, mIOPtrs.nSliceClusters, mIOMem.sliceClusters, InOutPointerType::SLICE_OUT_CLUSTER); + ReadData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, mIOMem.sectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + ReadData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, mIOMem.sectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); ReadData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, &mIOMem.mcLabelsTPC, InOutPointerType::MC_LABEL_TPC); ReadData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, &mIOMem.mcInfosTPC, InOutPointerType::MC_INFO_TPC); ReadData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, &mIOMem.mcInfosTPCCol, InOutPointerType::MC_INFO_TPC); @@ -266,7 +266,7 @@ int32_t GPUChainTracking::ReadData(const char* filename) return 1; } (void)r; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < mIOPtrs.nClusterData[i]; j++) { ptrClusterData[i][j].id = nClustersTotal++; if ((uint32_t)ptrClusterData[i][j].amp >= 25 * 1024) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index f28b99c0d8dd0..6c79d87e50465 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -21,22 +21,22 @@ using namespace o2::gpu; -void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) +void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSector, int8_t mergeMode, GPUReconstruction::krnlDeviceType deviceType) { GPUTPCGMMerger& Merger = processors()->tpcMerger; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCMerging; GPUTPCGMMerger& MergerShadow = doGPU ? processorsShadow()->tpcMerger : Merger; if (GetProcessingSettings().deterministicGPUReconstruction) { - uint32_t nBorderTracks = withinSlice == 1 ? NSLICES : (2 * NSLICES); + uint32_t nBorderTracks = withinSector == 1 ? NSECTORS : (2 * NSECTORS); runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } - uint32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES; + uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); RecordMarker(&mEvents->single, 0); for (uint32_t i = 0; i < n; i++) { int32_t stream = i % mRec->NStreams(); - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSlice, mergeMode); + runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode); } ReleaseEvent(mEvents->single); SynchronizeEventAndRelease(mEvents->init); @@ -44,10 +44,10 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice int32_t stream = i % mRec->NStreams(); int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, i, withinSlice, mergeMode); + int32_t jSector; + Merger.MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode); gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = MergerShadow.BorderRange(i); - gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSlice) + *processors()->tpcTrackers[jSlice].NTracks(); + gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks(); runKernel({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0); runKernel({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1); deviceEvent* e = nullptr; @@ -55,24 +55,24 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSlice if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel ne = std::min(n, mRec->NStreams()); for (int32_t j = 1; j < ne; j++) { - RecordMarker(&mEvents->slice[j], j); + RecordMarker(&mEvents->sector[j], j); } - e = &mEvents->slice[1]; + e = &mEvents->sector[1]; ne--; stream = 0; } - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSlice, mergeMode); + runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSector, mergeMode); } } else { for (uint32_t i = 0; i < n; i++) { - runKernel(GetGridAuto(0, deviceType), i, withinSlice, mergeMode); + runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } - runKernel({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSlice, mergeMode); + runKernel({{2 * n, -WarpSize(), 0, deviceType}}, 0, withinSector, mergeMode); for (uint32_t i = 0; i < n; i++) { - runKernel(GetGridAuto(0, deviceType), i, withinSlice, mergeMode); + runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSlice, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -100,12 +100,12 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } const auto& threadContext = GetThreadContext(); - SynchronizeGPU(); // Need to know the full number of slice tracks + SynchronizeGPU(); // Need to know the full number of sector tracks SetupGPUProcessor(&Merger, true); AllocateRegisteredMemory(Merger.MemoryResOutput(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracks)]); AllocateRegisteredMemory(Merger.MemoryResOutputState(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::sharedClusterMap)]); - if (Merger.CheckSlices()) { + if (Merger.CheckSectors()) { return 1; } @@ -118,48 +118,48 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel(GetGridAuto(0, deviceType), 1); } - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { runKernel({{1, -WarpSize(), 0, deviceType}}, i); runKernel(GetGridAuto(0, deviceType), i); - runKernel(GetGridAuto(0, deviceType), i); + runKernel(GetGridAuto(0, deviceType), i); } if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{1, -WarpSize(), 0, deviceType}}, NSLICES); - runKernel({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 0); + runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS); + runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 0); } - for (uint32_t i = 0; i < NSLICES; i++) { - runKernel({{1, -WarpSize(), 0, deviceType}}, NSLICES + i); + for (uint32_t i = 0; i < NSECTORS; i++) { + runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS + i); runKernel(GetGridAuto(0, deviceType), i); } - runKernel({{1, -WarpSize(), 0, deviceType}}, 2 * NSLICES); + runKernel({{1, -WarpSize(), 0, deviceType}}, 2 * NSECTORS); if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({{GPUCA_NSLICES, -WarpSize(), 0, deviceType}}, 1); + runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSliceTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSLICES * sizeof(*MergerShadowAll.TmpCounter())); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 2, 3, 0); RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 0); RunTPCTrackingMerger_MergeBorderTracks(0, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); - runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); + runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSlices, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); - runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSLICES * sizeof(*MergerShadowAll.TmpCounter())); + runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); @@ -281,9 +281,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) #ifdef GPUCA_TPC_GEOMETRY_O2 if (GetProcessingSettings().createO2Output) { - if (mTPCSliceScratchOnStack) { - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLCD1")); // Return the slice data memory early - mTPCSliceScratchOnStack = false; + if (mTPCSectorScratchOnStack) { + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLCD1")); // Return the sector data memory early + mTPCSectorScratchOnStack = false; } mRec->PushNonPersistentMemory(qStr2Tag("TPCMERG2")); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx similarity index 50% rename from GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx rename to GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 760d2cf2c5f40..df7c513fc1120 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUChainTrackingSliceTracker.cxx +/// \file GPUChainTrackingSectorTracker.cxx /// \author David Rohr #include "GPUChainTracking.h" @@ -24,17 +24,17 @@ using namespace o2::gpu; -int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSlice, int32_t threadId, bool synchronizeOutput) +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput) { - runKernel({GetGridBlk(256, iSlice % mRec->NStreams()), {iSlice}}); - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[iSlice].MemoryResCommon(), iSlice % mRec->NStreams()); + runKernel({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams()); if (synchronizeOutput) { - SynchronizeStream(iSlice % mRec->NStreams()); + SynchronizeStream(iSector % mRec->NStreams()); } return (0); } -int32_t GPUChainTracking::RunTPCTrackingSlices() +int32_t GPUChainTracking::RunTPCTrackingSectors() { if (mRec->GPUStuck()) { GPUWarning("This GPU is stuck, processing of tracking for this event is skipped!"); @@ -43,21 +43,21 @@ int32_t GPUChainTracking::RunTPCTrackingSlices() const auto& threadContext = GetThreadContext(); - int32_t retVal = RunTPCTrackingSlices_internal(); + int32_t retVal = RunTPCTrackingSectors_internal(); if (retVal) { SynchronizeGPU(); } return (retVal != 0); } -int32_t GPUChainTracking::RunTPCTrackingSlices_internal() +int32_t GPUChainTracking::RunTPCTrackingSectors_internal() { if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("Running TPC Slice Tracker"); + GPUInfo("Running TPC Sector Tracker"); } - bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSliceTracking; + bool doGPU = GetRecoStepsGPU() & RecoStep::TPCSectorTracking; if (!param().par.earlyTpcTransform) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); if (doGPU) { processorsShadow()->tpcTrackers[i].Data().SetClusterData(nullptr, mIOPtrs.clustersNative->nClustersSector[i], mIOPtrs.clustersNative->clusterOffset[i][0]); // TODO: not needed I think, anyway copied in SetupGPUProcessor @@ -66,7 +66,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() mRec->MemoryScalers()->nTPCHits = mIOPtrs.clustersNative->nClustersTotal; } else { int32_t offset = 0; - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].Data().SetClusterData(mIOPtrs.clusterData[i], mIOPtrs.nClusterData[i], offset); if (doGPU && GetRecoSteps().isSet(RecoStep::TPCConversion)) { processorsShadow()->tpcTrackers[i].Data().SetClusterData(processorsShadow()->tpcConverter.mClusters + processors()->tpcTrackers[i].Data().ClusterIdOffset(), processors()->tpcTrackers[i].NHitsTotal(), processors()->tpcTrackers[i].Data().ClusterIdOffset()); @@ -77,28 +77,28 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } GPUInfo("Event has %u TPC Clusters, %d TRD Tracklets", (uint32_t)mRec->MemoryScalers()->nTPCHits, mIOPtrs.nTRDTracklets); - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processors()->tpcTrackers[iSlice].SetMaxData(mIOPtrs); // First iteration to set data sizes + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processors()->tpcTrackers[iSector].SetMaxData(mIOPtrs); // First iteration to set data sizes } mRec->ComputeReuseMax(nullptr); // Resolve maximums for shared buffers - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcTrackers[iSlice], false); // Prepare custom allocation for 1st stack level - mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSlice].MemoryResSliceScratch()); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcTrackers[iSector], false); // Prepare custom allocation for 1st stack level + mRec->AllocateRegisteredMemory(processors()->tpcTrackers[iSector].MemoryResSectorScratch()); } mRec->PushNonPersistentMemory(qStr2Tag("TPCSLTRK")); - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - SetupGPUProcessor(&processors()->tpcTrackers[iSlice], true); // Now we allocate - mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSlice]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source. - processors()->tpcTrackers[iSlice].SetupCommonMemory(); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + SetupGPUProcessor(&processors()->tpcTrackers[iSector], true); // Now we allocate + mRec->ResetRegisteredMemoryPointers(&processors()->tpcTrackers[iSector]); // TODO: The above call breaks the GPU ptrs to already allocated memory. This fixes them. Should actually be cleaned up at the source. + processors()->tpcTrackers[iSector].SetupCommonMemory(); } bool streamInit[GPUCA_MAX_STREAMS] = {false}; if (doGPU) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - processorsShadow()->tpcTrackers[iSlice].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); // Initialize Startup Constants - processors()->tpcTrackers[iSlice].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSLICES - 1 - iSlice) / NSLICES) * getKernelProperties().nThreads; - processorsShadow()->tpcTrackers[iSlice].SetGPUTextureBase(mRec->DeviceMemoryBase()); + processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; + processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); } if (PrepareTextures()) { @@ -113,7 +113,7 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() return 2; } - WriteToConstantMemory(RecoStep::TPCSliceTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSLICES, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { streamInit[i] = false; @@ -134,15 +134,15 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); - runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSliceTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); - runKernel(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); + runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); + runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); mRec->ReturnVolatileMemory(); mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; if (doGPU) { - GPUMemCpy(RecoStep::TPCSliceTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); + GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); } else { - TransferMemoryResourceLinkToGPU(RecoStep::TPCSliceTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); + TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); } } if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { @@ -151,82 +151,82 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); } - int32_t streamMap[NSLICES]; + int32_t streamMap[NSECTORS]; bool error = false; - mRec->runParallelOuterLoop(doGPU, NSLICES, [&](uint32_t iSlice) { - GPUTPCTracker& trk = processors()->tpcTrackers[iSlice]; - GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSlice] : trk; - int32_t useStream = (iSlice % mRec->NStreams()); + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { + GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; + GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; + int32_t useStream = (iSector % mRec->NStreams()); if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Creating Slice Data (Slice %d)", iSlice); + GPUInfo("Creating Sector Data (Sector %d)", iSector); } if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; } else { - if (ReadEvent(iSlice, 0)) { + if (ReadEvent(iSector, 0)) { GPUError("Error reading event"); error = 1; return; } } if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}}); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}}); } - if (!doGPU && trk.CheckEmptySlice() && GetProcessingSettings().debugLevel == 0) { + if (!doGPU && trk.CheckEmptySector() && GetProcessingSettings().debugLevel == 0) { return; } if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Slice " << iSlice << "/" << NSLICES << std::endl; + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; if (GetProcessingSettings().debugMask & 1) { if (doGPU) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); } - trk.DumpSliceData(*mDebugFile); + trk.DumpTrackingData(*mDebugFile); } } // Initialize temporary memory where needed if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Slice Data to GPU and initializing temporary memory"); + GPUInfo("Copying Sector Data to GPU and initializing temporary memory"); } - runKernel(GetGridAutoStep(useStream, RecoStep::TPCSliceTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); + runKernel(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSliceTracking, &trk, useStream); // Copy Data to GPU Global Memory + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory } if (GPUDebug("Initialization (3)", useStream)) { throw std::runtime_error("memcpy failure"); } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSlice}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); + runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; if (GetProcessingSettings().keepDisplayMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &trk, -1, true); + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); if (GetProcessingSettings().debugMask & 2) { trk.DumpLinks(*mDebugFile, 0); } } - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); - runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSlice}}); + runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); #ifdef GPUCA_SORT_STARTHITS_GPU if (doGPU) { - runKernel({GetGridAuto(useStream), {iSlice}}); + runKernel({GetGridAuto(useStream), {iSector}}); } #endif if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); + runKernel({GetGrid(1, 1, useStream), {iSector}}); } - DoDebugAndDump(RecoStep::TPCSliceTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { trk.UpdateMaxData(); @@ -235,25 +235,25 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + runKernel({GetGridAuto(useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { trk.DumpHitWeights(*mDebugFile); } } if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { - runKernel({GetGridAuto(useStream), {iSlice}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, 1); + runKernel({GetGridAuto(useStream), {iSector}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSlice}}); + runKernel({GetGrid(1, 1, useStream), {iSector}}); } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, trk.MemoryResCommon(), useStream, &mEvents->slice[iSlice]); - streamMap[iSlice] = useStream; + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); + streamMap[iSector] = useStream; if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Slice %u, Number of tracks: %d", iSlice, *trk.NTracks()); + GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); } - DoDebugAndDump(RecoStep::TPCSliceTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); } }); mRec->SetNActiveThreadsOuterLoop(1); @@ -281,32 +281,32 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } if (GetProcessingSettings().debugLevel >= 4) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - DoDebugAndDump(RecoStep::TPCSliceTracking, 128, processors()->tpcTrackers[iSlice], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, processors()->tpcTrackers[iSector], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); } } - int32_t runSlices = 0; + int32_t runSectors = 0; int32_t useStream = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice += runSlices) { - if (runSlices < GetProcessingSettings().trackletSelectorSlices) { - runSlices++; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector += runSectors) { + if (runSectors < GetProcessingSettings().trackletSelectorSectors) { + runSectors++; } - runSlices = CAMath::Min(runSlices, NSLICES - iSlice); - if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSlices) { - runSlices = getKernelProperties().minBlocks * BlockCount(); + runSectors = CAMath::Min(runSectors, NSECTORS - iSector); + if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSectors) { + runSectors = getKernelProperties().minBlocks * BlockCount(); } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Running TPC Tracklet selector (Stream %d, Slice %d to %d)", useStream, iSlice, iSlice + runSlices); + GPUInfo("Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors); } - runKernel({GetGridAuto(useStream), {iSlice, runSlices}}); - runKernel({{1, -ThreadCount(), useStream}, {iSlice}}, runSlices); - for (uint32_t k = iSlice; k < iSlice + runSlices; k++) { + runKernel({GetGridAuto(useStream), {iSector, runSectors}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, runSectors); + for (uint32_t k = iSector; k < iSector + runSectors; k++) { if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {k}}); + runKernel({GetGrid(1, 1, useStream), {k}}); } - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->slice[k]); + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->sector[k]); streamMap[k] = useStream; } useStream++; @@ -316,128 +316,128 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() } } - mSliceSelectorReady = 0; + mSectorSelectorReady = 0; - std::array transferRunning; + std::array transferRunning; transferRunning.fill(true); if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { if (param().rec.tpc.extrapolationTracking) { mWriteOutputDone.fill(0); } - uint32_t tmpSlice = 0; - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + uint32_t tmpSector = 0; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Transfering Tracks from GPU to Host"); } - if (tmpSlice == iSlice) { - SynchronizeEvents(&mEvents->slice[iSlice]); + if (tmpSector == iSector) { + SynchronizeEvents(&mEvents->sector[iSector]); } - while (tmpSlice < NSLICES && (tmpSlice == iSlice || IsEventDone(&mEvents->slice[tmpSlice]))) { - ReleaseEvent(mEvents->slice[tmpSlice]); - if (*processors()->tpcTrackers[tmpSlice].NTracks() > 0) { - TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, processors()->tpcTrackers[tmpSlice].MemoryResOutput(), streamMap[tmpSlice], &mEvents->slice[tmpSlice]); + while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) { + ReleaseEvent(mEvents->sector[tmpSector]); + if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) { + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]); } else { - transferRunning[tmpSlice] = false; + transferRunning[tmpSector] = false; } - tmpSlice++; + tmpSector++; } if (GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &processors()->tpcTrackers[iSlice], -1, true); + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); if (!GetProcessingSettings().trackletConstructorInPipeline) { if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - processors()->tpcTrackers[iSlice].DumpHitWeights(*mDebugFile); + processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); } } if (!GetProcessingSettings().trackletSelectorInPipeline) { if (GetProcessingSettings().debugMask & 512) { - processors()->tpcTrackers[iSlice].DumpTrackHits(*mDebugFile); + processors()->tpcTrackers[iSector].DumpTrackHits(*mDebugFile); } } } - if (transferRunning[iSlice]) { - SynchronizeEvents(&mEvents->slice[iSlice]); + if (transferRunning[iSector]) { + SynchronizeEvents(&mEvents->sector[iSector]); } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSlice].NTracks(), *processors()->tpcTrackers[iSlice].NTrackHits()); + GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits()); } if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for slice %d", iSlice); + GPUInfo("Data ready for sector %d", iSector); } - mSliceSelectorReady = iSlice; + mSectorSelectorReady = iSector; if (param().rec.tpc.extrapolationTracking) { - for (uint32_t tmpSlice2a = 0; tmpSlice2a <= iSlice; tmpSlice2a++) { - uint32_t tmpSlice2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(tmpSlice2a); - uint32_t sliceLeft, sliceRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice2, sliceLeft, sliceRight); - - if (tmpSlice2 <= iSlice && sliceLeft <= iSlice && sliceRight <= iSlice && mWriteOutputDone[tmpSlice2] == 0) { - ExtrapolationTracking(tmpSlice2, 0); - WriteOutput(tmpSlice2, 0); - mWriteOutputDone[tmpSlice2] = 1; + for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) { + uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a); + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); + + if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mWriteOutputDone[tmpSector2] == 0) { + ExtrapolationTracking(tmpSector2, 0); + WriteOutput(tmpSector2, 0); + mWriteOutputDone[tmpSector2] = 1; } } } else { - WriteOutput(iSlice, 0); + WriteOutput(iSector, 0); } } } if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { - std::vector blocking(NSLICES * mRec->NStreams()); - for (int32_t i = 0; i < NSLICES; i++) { + std::vector blocking(NSECTORS * mRec->NStreams()); + for (int32_t i = 0; i < NSECTORS; i++) { for (int32_t j = 0; j < mRec->NStreams(); j++) { blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; } } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t tmpSlice = GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(iSlice); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { - uint32_t sliceLeft, sliceRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(tmpSlice, sliceLeft, sliceRight); - if (doGPU && !blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()]) { - StreamWaitForEvents(tmpSlice % mRec->NStreams(), &mEvents->slice[sliceLeft]); - blocking[tmpSlice * mRec->NStreams() + sliceLeft % mRec->NStreams()] = true; + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); + if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { + StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]); + blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true; } - if (doGPU && !blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()]) { - StreamWaitForEvents(tmpSlice % mRec->NStreams(), &mEvents->slice[sliceRight]); - blocking[tmpSlice * mRec->NStreams() + sliceRight % mRec->NStreams()] = true; + if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) { + StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]); + blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true; } } - ExtrapolationTracking(tmpSlice, 0, false); + ExtrapolationTracking(tmpSector, 0, false); } } - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - if (doGPU && transferRunning[iSlice]) { - ReleaseEvent(mEvents->slice[iSlice]); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + if (doGPU && transferRunning[iSector]) { + ReleaseEvent(mEvents->sector[iSector]); } } } else { - mSliceSelectorReady = NSLICES; - mRec->runParallelOuterLoop(doGPU, NSLICES, [&](uint32_t iSlice) { + mSectorSelectorReady = NSECTORS; + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSlice, 0); + ExtrapolationTracking(iSector, 0); } if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSlice, 0); + WriteOutput(iSector, 0); } }); mRec->SetNActiveThreadsOuterLoop(1); } if (param().rec.tpc.extrapolationTracking && GetProcessingSettings().debugLevel >= 3) { - for (uint32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - GPUInfo("Slice %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSlice, - processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nTracks, processors()->tpcTrackers[iSlice].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSlice].CommonMemory()->nTrackHits); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + GPUInfo("Sector %d - Tracks: Local %d Extrapolated %d - Hits: Local %d Extrapolated %d", iSector, + processors()->tpcTrackers[iSector].CommonMemory()->nLocalTracks, processors()->tpcTrackers[iSector].CommonMemory()->nTracks, processors()->tpcTrackers[iSector].CommonMemory()->nLocalTrackHits, processors()->tpcTrackers[iSector].CommonMemory()->nTrackHits); } } if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].DumpOutput(*mDebugFile); } } @@ -445,42 +445,42 @@ int32_t GPUChainTracking::RunTPCTrackingSlices_internal() if (DoProfile()) { return (1); } - for (uint32_t i = 0; i < NSLICES; i++) { - mIOPtrs.nSliceTracks[i] = *processors()->tpcTrackers[i].NTracks(); - mIOPtrs.sliceTracks[i] = processors()->tpcTrackers[i].Tracks(); - mIOPtrs.nSliceClusters[i] = *processors()->tpcTrackers[i].NTrackHits(); - mIOPtrs.sliceClusters[i] = processors()->tpcTrackers[i].TrackHits(); + for (uint32_t i = 0; i < NSECTORS; i++) { + mIOPtrs.nSectorTracks[i] = *processors()->tpcTrackers[i].NTracks(); + mIOPtrs.sectorTracks[i] = processors()->tpcTrackers[i].Tracks(); + mIOPtrs.nSectorClusters[i] = *processors()->tpcTrackers[i].NTrackHits(); + mIOPtrs.sectorClusters[i] = processors()->tpcTrackers[i].TrackHits(); if (GetProcessingSettings().keepDisplayMemory && !GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSliceTracking, &processors()->tpcTrackers[i], -1, true); + TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[i], -1, true); } } if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Slice Tracker finished"); + GPUInfo("TPC Sector Tracker finished"); } - mRec->PopNonPersistentMemory(RecoStep::TPCSliceTracking, qStr2Tag("TPCSLTRK")); + mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK")); return 0; } -int32_t GPUChainTracking::ReadEvent(uint32_t iSlice, int32_t threadId) +int32_t GPUChainTracking::ReadEvent(uint32_t iSector, int32_t threadId) { if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running ReadEvent for slice %d on thread %d\n", iSlice, threadId); + GPUInfo("Running ReadEvent for sector %d on thread %d\n", iSector, threadId); } - runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSlice}}); + runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSector}}); if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished ReadEvent for slice %d on thread %d\n", iSlice, threadId); + GPUInfo("Finished ReadEvent for sector %d on thread %d\n", iSector, threadId); } return (0); } -void GPUChainTracking::WriteOutput(int32_t iSlice, int32_t threadId) +void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) { if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running WriteOutput for slice %d on thread %d\n", iSlice, threadId); + GPUInfo("Running WriteOutput for sector %d on thread %d\n", iSector, threadId); } - processors()->tpcTrackers[iSlice].WriteOutputPrepare(); - processors()->tpcTrackers[iSlice].WriteOutput(); + processors()->tpcTrackers[iSector].WriteOutputPrepare(); + processors()->tpcTrackers[iSector].WriteOutput(); if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished WriteOutput for slice %d on thread %d\n", iSlice, threadId); + GPUInfo("Finished WriteOutput for sector %d on thread %d\n", iSector, threadId); } } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index d91fed4046de0..db5e5ae3aeb75 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -30,7 +30,7 @@ using namespace o2::tpc; bool GPUChainTracking::NeedTPCClustersOnGPU() { - return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression); + return (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCConversion) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging) || (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression); } int32_t GPUChainTracking::ConvertNativeToClusterData() @@ -56,7 +56,7 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() } if (!param().par.earlyTpcTransform) { if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during slice data creation / refit"); + GPUInfo("Early transform inactive, skipping TPC Early transformation kernel, transformed on the fly during sector data creation / refit"); } if (transferClusters) { SynchronizeStream(0); // TODO: Synchronize implicitly with next step @@ -64,18 +64,18 @@ int32_t GPUChainTracking::ConvertNativeToClusterData() return 0; } SetupGPUProcessor(&convert, true); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { convert.mMemory->clusters[i] = convertShadow.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; } WriteToConstantMemory(RecoStep::TPCConversion, (char*)&processors()->tpcConverter - (char*)processors(), &convertShadow, sizeof(convertShadow), 0); TransferMemoryResourcesToGPU(RecoStep::TPCConversion, &convert, 0); - runKernel(GetGridBlk(NSLICES * GPUCA_ROW_COUNT, 0)); + runKernel(GetGridBlk(NSECTORS * GPUCA_ROW_COUNT, 0)); TransferMemoryResourcesToHost(RecoStep::TPCConversion, &convert, 0); SynchronizeStream(0); - for (uint32_t i = 0; i < NSLICES; i++) { - mIOPtrs.nClusterData[i] = (i == NSLICES - 1 ? mIOPtrs.clustersNative->nClustersTotal : mIOPtrs.clustersNative->clusterOffset[i + 1][0]) - mIOPtrs.clustersNative->clusterOffset[i][0]; + for (uint32_t i = 0; i < NSECTORS; i++) { + mIOPtrs.nClusterData[i] = (i == NSECTORS - 1 ? mIOPtrs.clustersNative->nClustersTotal : mIOPtrs.clustersNative->clusterOffset[i + 1][0]) - mIOPtrs.clustersNative->clusterOffset[i][0]; mIOPtrs.clusterData[i] = convert.mClusters + mIOPtrs.clustersNative->clusterOffset[i][0]; } mRec->PopNonPersistentMemory(RecoStep::TPCConversion, qStr2Tag("TPCTRANS")); @@ -89,7 +89,7 @@ void GPUChainTracking::ConvertNativeToClusterDataLegacy() *tmp = *mIOPtrs.clustersNative; } GPUReconstructionConvert::ConvertNativeToClusterData(mIOMem.clusterNativeAccess.get(), mIOMem.clusterData, mIOPtrs.nClusterData, processors()->calibObjects.fastTransform, param().continuousMaxTimeBin); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mIOPtrs.clusterData[i] = mIOMem.clusterData[i].get(); if (GetProcessingSettings().registerStandaloneInputMemory) { if (mRec->registerMemoryForGPU(mIOMem.clusterData[i].get(), mIOPtrs.nClusterData[i] * sizeof(*mIOPtrs.clusterData[i]))) { @@ -104,7 +104,7 @@ void GPUChainTracking::ConvertNativeToClusterDataLegacy() void GPUChainTracking::ConvertRun2RawToNative() { GPUReconstructionConvert::ConvertRun2RawToNative(*mIOMem.clusterNativeAccess, mIOMem.clustersNative, mIOPtrs.rawClusters, mIOPtrs.nRawClusters); - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { mIOPtrs.rawClusters[i] = nullptr; mIOPtrs.nRawClusters[i] = 0; mIOMem.rawClusters[i].reset(nullptr); @@ -129,10 +129,10 @@ void GPUChainTracking::ConvertZSEncoder(int32_t version) GPUReconstructionConvert::RunZSEncoderCreateMeta(mIOMem.tpcZSpages.get(), &mIOMem.tpcZSmeta2->n[0][0], &mIOMem.tpcZSmeta2->ptr[0][0], mIOMem.tpcZSmeta.get()); mIOPtrs.tpcZS = mIOMem.tpcZSmeta.get(); if (GetProcessingSettings().registerStandaloneInputMemory) { - for (uint32_t i = 0; i < NSLICES; i++) { + for (uint32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->slice[i].count[j]; k++) { - if (mRec->registerMemoryForGPU(mIOPtrs.tpcZS->slice[i].zsPtr[j][k], mIOPtrs.tpcZS->slice[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE)) { + for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { + if (mRec->registerMemoryForGPU(mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE)) { throw std::runtime_error("Error registering memory for GPU"); } } @@ -151,10 +151,10 @@ int32_t GPUChainTracking::ForwardTPCDigits() if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { throw std::runtime_error("Cannot forward TPC digits with Clusterizer on GPU"); } - std::vector tmp[NSLICES][GPUCA_ROW_COUNT]; + std::vector tmp[NSECTORS][GPUCA_ROW_COUNT]; uint32_t nTotal = 0; const float zsThreshold = param().rec.tpc.zsThreshold; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < mIOPtrs.tpcPackedDigits->nTPCDigits[i]; j++) { const auto& d = mIOPtrs.tpcPackedDigits->tpcDigits[i][j]; if (d.getChargeFloat() >= zsThreshold) { @@ -172,7 +172,7 @@ int32_t GPUChainTracking::ForwardTPCDigits() mIOMem.clustersNative.reset(new ClusterNative[nTotal]); nTotal = 0; mClusterNativeAccess->clustersLinear = mIOMem.clustersNative.get(); - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { mClusterNativeAccess->nClusters[i][j] = tmp[i][j].size(); memcpy(&mIOMem.clustersNative[nTotal], tmp[i][j].data(), tmp[i][j].size() * sizeof(*mClusterNativeAccess->clustersLinear)); diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index 6f3ba4d2b47bf..f35f5fc81a382 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -28,10 +28,10 @@ GPUCA_ERROR_CODE(8, ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, Sector, Value, Max GPUCA_ERROR_CODE(9, ERROR_LOOPER_OVERFLOW) GPUCA_ERROR_CODE(10, ERROR_MERGER_CE_HIT_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(11, ERROR_MERGER_LOOPER_OVERFLOW, Value, Max) -GPUCA_ERROR_CODE(12, ERROR_SLICEDATA_FIRSTHITINBIN_OVERFLOW, Sector, Value, Max) -GPUCA_ERROR_CODE(13, ERROR_SLICEDATA_HITINROW_OVERFLOW, SectorRow, Value, Max) -GPUCA_ERROR_CODE(14, ERROR_SLICEDATA_BIN_OVERFLOW, SectorRow, Value, Max) -GPUCA_ERROR_CODE(15, ERROR_SLICEDATA_Z_OVERFLOW, Sector, Value) +GPUCA_ERROR_CODE(12, ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, Sector, Value, Max) +GPUCA_ERROR_CODE(13, ERROR_SECTORDATA_HITINROW_OVERFLOW, SectorRow, Value, Max) +GPUCA_ERROR_CODE(14, ERROR_SECTORDATA_BIN_OVERFLOW, SectorRow, Value, Max) +GPUCA_ERROR_CODE(15, ERROR_SECTORDATA_Z_OVERFLOW, Sector, Value) GPUCA_ERROR_CODE(16, ERROR_MERGER_HIT_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(17, ERROR_MERGER_TRACK_OVERFLOW, Value, Max) GPUCA_ERROR_CODE(18, ERROR_COMPRESSION_ROW_HIT_OVERFLOW, SectorRow, Value, Max) diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx index 445bb1a9c56fd..a5457bf3f2f23 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx @@ -27,8 +27,8 @@ void* GPUTrackingInputProvider::SetPointersInputZS(void* mem) { if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding) { computePointerWithAlignment(mem, mPzsMeta); - computePointerWithAlignment(mem, mPzsSizes, GPUTrackingInOutZS::NSLICES * GPUTrackingInOutZS::NENDPOINTS); - computePointerWithAlignment(mem, mPzsPtrs, GPUTrackingInOutZS::NSLICES * GPUTrackingInOutZS::NENDPOINTS); + computePointerWithAlignment(mem, mPzsSizes, GPUTrackingInOutZS::NSECTORS * GPUTrackingInOutZS::NENDPOINTS); + computePointerWithAlignment(mem, mPzsPtrs, GPUTrackingInOutZS::NSECTORS * GPUTrackingInOutZS::NENDPOINTS); } return mem; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h index 70d9676c4fe26..77a6f262f03e0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h @@ -25,7 +25,7 @@ namespace gpu /** * @class GPUTPCGMBorderTrack * - * The class describes TPC slice tracks at sector borders. + * The class describes TPC sector tracks at sector borders. * Used in GPUTPCGMMerger * */ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index a0b2c7b12246a..43a214cf37d3e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -42,13 +42,13 @@ #include "GPUCommonConstants.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUTPCTrackLinearisation.h" #include "GPUTPCGMTrackParam.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUTPCGMBorderTrack.h" #include "DataFormatsTPC/ClusterNative.h" @@ -81,20 +81,20 @@ struct MergeLooperParam { #include "GPUMemorySizeScalers.h" GPUTPCGMMerger::GPUTPCGMMerger() - : mTrackLinks(nullptr), mNTotalSliceTracks(0), mNMaxTracks(0), mNMaxSingleSliceTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSliceTrackInfos(nullptr), mSliceTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) + : mTrackLinks(nullptr), mNTotalSectorTracks(0), mNMaxTracks(0), mNMaxSingleSectorTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSectorTrackInfos(nullptr), mSectorTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) { //* constructor - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mNextSliceInd[iSlice] = iSlice + 1; - mPrevSliceInd[iSlice] = iSlice - 1; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + mNextSectorInd[iSector] = iSector + 1; + mPrevSectorInd[iSector] = iSector - 1; } - int32_t mid = NSLICES / 2 - 1; - int32_t last = NSLICES - 1; - mNextSliceInd[mid] = 0; - mPrevSliceInd[0] = mid; - mNextSliceInd[last] = NSLICES / 2; - mPrevSliceInd[NSLICES / 2] = last; + int32_t mid = NSECTORS / 2 - 1; + int32_t last = NSECTORS - 1; + mNextSectorInd[mid] = 0; + mPrevSectorInd[0] = mid; + mNextSectorInd[last] = NSECTORS / 2; + mPrevSectorInd[NSECTORS / 2] = last; } // DEBUG CODE @@ -103,13 +103,13 @@ GPUTPCGMMerger::GPUTPCGMMerger() void GPUTPCGMMerger::CheckMergedTracks() { - std::vector trkUsed(SliceTrackInfoLocalTotal()); - for (int32_t i = 0; i < SliceTrackInfoLocalTotal(); i++) { + std::vector trkUsed(SectorTrackInfoLocalTotal()); + for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { trkUsed[i] = false; } - for (int32_t itr = 0; itr < SliceTrackInfoLocalTotal(); itr++) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; + for (int32_t itr = 0; itr < SectorTrackInfoLocalTotal(); itr++) { + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { continue; } @@ -117,9 +117,9 @@ void GPUTPCGMMerger::CheckMergedTracks() continue; } int32_t leg = 0; - GPUTPCGMSliceTrack *trbase = &track, *tr = &track; + GPUTPCGMSectorTrack *trbase = &track, *tr = &track; while (true) { - int32_t iTrk = tr - mSliceTrackInfos; + int32_t iTrk = tr - mSectorTrackInfos; if (trkUsed[iTrk]) { GPUError("FAILURE: double use"); } @@ -127,12 +127,12 @@ void GPUTPCGMMerger::CheckMergedTracks() int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { - tr = &(mSliceTrackInfos[jtr]); + tr = &(mSectorTrackInfos[jtr]); continue; } jtr = trbase->NextNeighbour(); if (jtr >= 0) { - trbase = &(mSliceTrackInfos[jtr]); + trbase = &(mSectorTrackInfos[jtr]); tr = trbase; if (tr->PrevSegmentNeighbour() >= 0) { break; @@ -143,7 +143,7 @@ void GPUTPCGMMerger::CheckMergedTracks() break; } } - for (int32_t i = 0; i < SliceTrackInfoLocalTotal(); i++) { + for (int32_t i = 0; i < SectorTrackInfoLocalTotal(); i++) { if (trkUsed[i] == false) { GPUError("FAILURE: trk missed"); } @@ -164,11 +164,11 @@ inline const auto* resolveMCLabels(const o2::dataformat template int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { - GPUTPCGMSliceTrack* sliceTrack = nullptr; + GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; if constexpr (std::is_same::value) { - sliceTrack = &mSliceTrackInfos[trk.TrackID()]; - nClusters = sliceTrack->OrigTrack()->NHits(); + sectorTrack = &mSectorTrackInfos[trk.TrackID()]; + nClusters = sectorTrack->OrigTrack()->NHits(); } else { nClusters = trk.NClusters(); } @@ -176,9 +176,9 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const for (int32_t i = 0; i < nClusters; i++) { int32_t id; if constexpr (std::is_same::value) { - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sliceTrack->Slice()]; - const GPUTPCHitId& ic = tracker.TrackHits()[sliceTrack->OrigTrack()->FirstHitID() + i]; - id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sliceTrack->Slice()][0]; + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; + const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; + id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; } else { id = mClusters[trk.FirstClusterRef() + i].num; } @@ -203,27 +203,27 @@ int64_t GPUTPCGMMerger::GetTrackLabel(const S& trk) const #endif // END DEBUG CODE -void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSliceTrack* trk, std::ostream& out) const +void GPUTPCGMMerger::PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const { - const GPUTPCGMSliceTrack* orgTrack = trk; + const GPUTPCGMSectorTrack* orgTrack = trk; while (trk->PrevSegmentNeighbour() >= 0) { - trk = &mSliceTrackInfos[trk->PrevSegmentNeighbour()]; + trk = &mSectorTrackInfos[trk->PrevSegmentNeighbour()]; } - const GPUTPCGMSliceTrack* orgTower = trk; + const GPUTPCGMSectorTrack* orgTower = trk; while (trk->PrevNeighbour() >= 0) { - trk = &mSliceTrackInfos[trk->PrevNeighbour()]; + trk = &mSectorTrackInfos[trk->PrevNeighbour()]; } - int32_t nextId = trk - mSliceTrackInfos; - out << "Graph of track " << (orgTrack - mSliceTrackInfos) << "\n"; + int32_t nextId = trk - mSectorTrackInfos; + out << "Graph of track " << (orgTrack - mSectorTrackInfos) << "\n"; while (nextId >= 0) { - trk = &mSliceTrackInfos[nextId]; + trk = &mSectorTrackInfos[nextId]; if (trk->PrevSegmentNeighbour() >= 0) { out << "TRACK TREE INVALID!!! " << trk->PrevSegmentNeighbour() << " --> " << nextId << "\n"; } out << (trk == orgTower ? "--" : " "); while (nextId >= 0) { - GPUTPCGMSliceTrack* trk2 = &mSliceTrackInfos[nextId]; + GPUTPCGMSectorTrack* trk2 = &mSectorTrackInfos[nextId]; if (trk != trk2 && (trk2->PrevNeighbour() >= 0 || trk2->NextNeighbour() >= 0)) { out << " (TRACK TREE INVALID!!! " << trk2->PrevNeighbour() << " <-- " << nextId << " --> " << trk2->NextNeighbour() << ") "; } @@ -241,28 +241,28 @@ void GPUTPCGMMerger::InitializeProcessor() {} void* GPUTPCGMMerger::SetPointersMerger(void* mem) { - computePointerWithAlignment(mem, mSliceTrackInfos, mNTotalSliceTracks); - computePointerWithAlignment(mem, mSliceTrackInfoIndex, NSLICES * 2 + 1); + computePointerWithAlignment(mem, mSectorTrackInfos, mNTotalSectorTracks); + computePointerWithAlignment(mem, mSectorTrackInfoIndex, NSECTORS * 2 + 1); if (mRec->GetProcessingSettings().deterministicGPUReconstruction) { - computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSliceTracks, mNMaxTracks * 2)); + computePointerWithAlignment(mem, mTmpSortMemory, std::max(mNTotalSectorTracks, mNMaxTracks * 2)); } void* memBase = mem; - computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSliceTracks); // MergeBorders & Resolve - computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSliceTracks); + computePointerWithAlignment(mem, mBorderMemory, 2 * mNTotalSectorTracks); // MergeBorders & Resolve + computePointerWithAlignment(mem, mBorderRangeMemory, 2 * mNTotalSectorTracks); int32_t nTracks = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); - mBorder[iSlice] = mBorderMemory + 2 * nTracks; - mBorder[NSLICES + iSlice] = mBorderMemory + 2 * nTracks + n; - mBorderRange[iSlice] = mBorderRangeMemory + 2 * nTracks; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + const int32_t n = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks(); + mBorder[iSector] = mBorderMemory + 2 * nTracks; + mBorder[NSECTORS + iSector] = mBorderMemory + 2 * nTracks + n; + mBorderRange[iSector] = mBorderRangeMemory + 2 * nTracks; nTracks += n; } - computePointerWithAlignment(mem, mTrackLinks, mNTotalSliceTracks); - computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSliceTracks); + computePointerWithAlignment(mem, mTrackLinks, mNTotalSectorTracks); + computePointerWithAlignment(mem, mTrackCCRoots, mNTotalSectorTracks); void* memMax = mem; mem = memBase; - computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSLICES * mNMaxSingleSliceTracks); // UnpackResetIds - RefitSliceTracks - UnpackSliceGlobal + computePointerWithAlignment(mem, mTrackIDs, GPUCA_NSECTORS * mNMaxSingleSectorTracks); // UnpackResetIds - RefitSectorTracks - UnpackSectorGlobal memMax = (void*)std::max((size_t)mem, (size_t)memMax); mem = memBase; computePointerWithAlignment(mem, mTrackSort, mNMaxTracks); // PrepareClustersForFit0 - SortTracksQPt - PrepareClustersForFit1 - PrepareClustersForFit1 / Finalize0 - Finalize2 @@ -362,28 +362,28 @@ void GPUTPCGMMerger::RegisterMemoryAllocation() void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) { - mNTotalSliceTracks = 0; + mNTotalSectorTracks = 0; mNClusters = 0; - mNMaxSingleSliceTracks = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSlice].NTracks(); - mNTotalSliceTracks += ntrk; - mNClusters += *mRec->GetConstantMem().tpcTrackers[iSlice].NTrackHits(); - if (mNMaxSingleSliceTracks < ntrk) { - mNMaxSingleSliceTracks = ntrk; + mNMaxSingleSectorTracks = 0; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + uint32_t ntrk = *mRec->GetConstantMem().tpcTrackers[iSector].NTracks(); + mNTotalSectorTracks += ntrk; + mNClusters += *mRec->GetConstantMem().tpcTrackers[iSector].NTrackHits(); + if (mNMaxSingleSectorTracks < ntrk) { + mNMaxSingleSectorTracks = ntrk; } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { - mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSliceTracks, mNTotalSliceTracks); + mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); } else { - mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSliceTracks); + mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); } if (io.clustersNative) { mNMaxClusters = io.clustersNative->nClustersTotal; - } else if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking) { + } else if (mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking) { mNMaxClusters = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { mNMaxClusters += mRec->GetConstantMem().tpcTrackers[i].NHitsTotal(); } } else { @@ -392,15 +392,15 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxLooperMatches = mNMaxClusters / 4; // We have that much scratch memory anyway } -int32_t GPUTPCGMMerger::CheckSlices() +int32_t GPUTPCGMMerger::CheckSectors() { - for (int32_t i = 0; i < NSLICES; i++) { - if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSliceTracks) { - throw std::runtime_error("mNMaxSingleSliceTracks too small"); + for (int32_t i = 0; i < NSECTORS; i++) { + if (mRec->GetConstantMem().tpcTrackers[i].CommonMemory()->nLocalTracks > (int32_t)mNMaxSingleSectorTracks) { + throw std::runtime_error("mNMaxSingleSectorTracks too small"); } } - if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - throw std::runtime_error("Must run also slice tracking"); + if (!(mRec->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + throw std::runtime_error("Must run also sector tracking"); } return 0; } @@ -409,13 +409,13 @@ int32_t GPUTPCGMMerger::CheckSlices() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SliceTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } } -GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int32_t slice) +GPUd() int32_t GPUTPCGMMerger::RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector) { GPUTPCGMPropagator prop; prop.SetMaterialTPC(); @@ -431,9 +431,9 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c trk.SinPhi() = inTrack->Param().GetSinPhi(); trk.DzDs() = inTrack->Param().GetDzDs(); trk.QPt() = inTrack->Param().GetQPt(); - trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(slice, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); - trk.ShiftZ(this, slice, sliceTrack.ClusterZT0(), sliceTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead - sliceTrack.SetX2(0.f); + trk.TZOffset() = Param().par.earlyTpcTransform ? inTrack->Param().GetZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, inTrack->Param().GetZOffset(), Param().continuousMaxTimeBin); + trk.ShiftZ(this, sector, sectorTrack.ClusterZT0(), sectorTrack.ClusterZTN(), inTrack->Param().GetX(), inTrack->Param().GetX()); // We do not store the inner / outer cluster X, so we just use the track X instead + sectorTrack.SetX2(0.f); for (int32_t way = 0; way < 2; way++) { if (way) { prop.SetFitInProjections(true); @@ -447,74 +447,74 @@ GPUd() int32_t GPUTPCGMMerger::RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, c for (int32_t i = start; i != end; i += incr) { float x, y, z; int32_t row, flags; - const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[slice]; + const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sector]; const GPUTPCHitId& ic = tracker.TrackHits()[inTrack->FirstHitID() + i]; int32_t clusterIndex = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()); row = ic.RowIndex(); - const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[slice][0] + clusterIndex]; + const ClusterNative& cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sector][0] + clusterIndex]; flags = cl.getFlags(); if (Param().par.earlyTpcTransform) { x = tracker.Data().ClusterData()[clusterIndex].x; y = tracker.Data().ClusterData()[clusterIndex].y; z = tracker.Data().ClusterData()[clusterIndex].z - trk.TZOffset(); } else { - GetConstantMem()->calibObjects.fastTransformHelper->Transform(slice, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(sector, row, cl.getPad(), cl.getTime(), x, y, z, trk.TZOffset()); } if (prop.PropagateToXAlpha(x, alpha, true)) { return way == 0; } trk.ConstrainSinPhi(); - if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, slice, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge + if (prop.Update(y, z, row, Param(), flags & GPUTPCGMMergedTrackHit::clustererAndSharedFlags, 0, nullptr, false, sector, -1.f, 0.f, 0.f)) { // TODO: Use correct time / avgCharge return way == 0; } trk.ConstrainSinPhi(); } if (way) { - sliceTrack.SetParam2(trk); + sectorTrack.SetParam2(trk); } else { - sliceTrack.Set(trk, inTrack, alpha, slice); + sectorTrack.Set(trk, inTrack, alpha, sector); } } return 0; } -GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSliceTrack& track, int32_t iSlice, const GPUTPCTrack* sliceTr) +GPUd() void GPUTPCGMMerger::SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - const GPUTPCHitId& ic1 = trk.TrackHits()[sliceTr->FirstHitID()]; - const GPUTPCHitId& ic2 = trk.TrackHits()[sliceTr->FirstHitID() + sliceTr->NHits() - 1]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; + const GPUTPCHitId& ic1 = trk.TrackHits()[sectorTr->FirstHitID()]; + const GPUTPCHitId& ic2 = trk.TrackHits()[sectorTr->FirstHitID() + sectorTr->NHits() - 1]; int32_t clusterIndex1 = trk.Data().ClusterDataIndex(trk.Data().Row(ic1.RowIndex()), ic1.HitIndex()); int32_t clusterIndex2 = trk.Data().ClusterDataIndex(trk.Data().Row(ic2.RowIndex()), ic2.HitIndex()); if (Param().par.earlyTpcTransform) { track.SetClusterZT(trk.Data().ClusterData()[clusterIndex1].z, trk.Data().ClusterData()[clusterIndex2].z); } else { - const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + const ClusterNative* cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; track.SetClusterZT(cl[clusterIndex1].getTime(), cl[clusterIndex2].getTime()); } } GPUd() void GPUTPCGMMerger::UnpackSaveNumber(int32_t id) { - mSliceTrackInfoIndex[id] = mMemory->nUnpackedTracks; + mSectorTrackInfoIndex[id] = mMemory->nUnpackedTracks; } -GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; - float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = mMemory->firstExtrapolatedTracks[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; + float alpha = Param().Alpha(iSector); + const GPUTPCTrack* sectorTr = mMemory->firstExtrapolatedTracks[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; uint32_t nTracks = *trk.NTracks(); for (uint32_t itr = nLocalTracks + iBlock * nThreads + iThread; itr < nTracks; itr += nBlocks * nThreads) { - sliceTr = &trk.Tracks()[itr]; - int32_t localId = mTrackIDs[(sliceTr->LocalTrackId() >> 24) * mNMaxSingleSliceTracks + (sliceTr->LocalTrackId() & 0xFFFFFF)]; + sectorTr = &trk.Tracks()[itr]; + int32_t localId = mTrackIDs[(sectorTr->LocalTrackId() >> 24) * mNMaxSingleSectorTracks + (sectorTr->LocalTrackId() & 0xFFFFFF)]; if (localId == -1) { continue; } uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); - GPUTPCGMSliceTrack& track = mSliceTrackInfos[myTrack]; - SetTrackClusterZT(track, iSlice, sliceTr); - track.Set(this, sliceTr, alpha, iSlice); + GPUTPCGMSectorTrack& track = mSectorTrackInfos[myTrack]; + SetTrackClusterZT(track, iSector, sectorTr); + track.Set(this, sectorTr, alpha, iSector); track.SetGlobalSectorTrackCov(); track.SetPrevNeighbour(-1); track.SetNextNeighbour(-1); @@ -524,45 +524,45 @@ GPUd() void GPUTPCGMMerger::UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, } } -GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; for (uint32_t i = iBlock * nThreads + iThread; i < nLocalTracks; i += nBlocks * nThreads) { - mTrackIDs[iSlice * mNMaxSingleSliceTracks + i] = -1; + mTrackIDs[iSector * mNMaxSingleSectorTracks + i] = -1; } } -GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice) +GPUd() void GPUTPCGMMerger::RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[iSector]; uint32_t nLocalTracks = trk.CommonMemory()->nLocalTracks; - float alpha = Param().Alpha(iSlice); - const GPUTPCTrack* sliceTr = nullptr; + float alpha = Param().Alpha(iSector); + const GPUTPCTrack* sectorTr = nullptr; for (uint32_t itr = iBlock * nThreads + iThread; itr < nLocalTracks; itr += nBlocks * nThreads) { - sliceTr = &trk.Tracks()[itr]; - GPUTPCGMSliceTrack track; - SetTrackClusterZT(track, iSlice, sliceTr); + sectorTr = &trk.Tracks()[itr]; + GPUTPCGMSectorTrack track; + SetTrackClusterZT(track, iSector, sectorTr); if (Param().rec.tpc.mergerCovSource == 0) { - track.Set(this, sliceTr, alpha, iSlice); - if (!track.FilterErrors(this, iSlice, GPUCA_MAX_SIN_PHI, 0.1f)) { + track.Set(this, sectorTr, alpha, iSector); + if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { continue; } } else if (Param().rec.tpc.mergerCovSource == 1) { - track.Set(this, sliceTr, alpha, iSlice); + track.Set(this, sectorTr, alpha, iSector); track.CopyBaseTrackCov(); } else if (Param().rec.tpc.mergerCovSource == 2) { - if (RefitSliceTrack(track, sliceTr, alpha, iSlice)) { - track.Set(this, sliceTr, alpha, iSlice); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed - if (!track.FilterErrors(this, iSlice, GPUCA_MAX_SIN_PHI, 0.1f)) { + if (RefitSectorTrack(track, sectorTr, alpha, iSector)) { + track.Set(this, sectorTr, alpha, iSector); // TODO: Why does the refit fail, it shouldn't, this workaround should be removed + if (!track.FilterErrors(this, iSector, GPUCA_MAX_SIN_PHI, 0.1f)) { continue; } } } - CADEBUG(GPUInfo("INPUT Slice %d, Track %u, QPt %f DzDs %f", iSlice, itr, track.QPt(), track.DzDs())); + CADEBUG(GPUInfo("INPUT Sector %d, Track %u, QPt %f DzDs %f", iSector, itr, track.QPt(), track.DzDs())); track.SetPrevNeighbour(-1); track.SetNextNeighbour(-1); track.SetNextSegmentNeighbour(-1); @@ -570,25 +570,25 @@ GPUd() void GPUTPCGMMerger::RefitSliceTracks(int32_t nBlocks, int32_t nThreads, track.SetExtrapolatedTrackId(0, -1); track.SetExtrapolatedTrackId(1, -1); uint32_t myTrack = CAMath::AtomicAdd(&mMemory->nUnpackedTracks, 1u); - mTrackIDs[iSlice * mNMaxSingleSliceTracks + sliceTr->LocalTrackId()] = myTrack; - mSliceTrackInfos[myTrack] = track; + mTrackIDs[iSector * mNMaxSingleSectorTracks + sectorTr->LocalTrackId()] = myTrack; + mSectorTrackInfos[myTrack] = track; } } GPUd() void GPUTPCGMMerger::LinkExtrapolatedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (int32_t itr = SliceTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SliceTrackInfoGlobalLast(NSLICES - 1); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& extrapolatedTrack = mSliceTrackInfos[itr]; - GPUTPCGMSliceTrack& localTrack = mSliceTrackInfos[extrapolatedTrack.LocalTrackId()]; + for (int32_t itr = SectorTrackInfoGlobalFirst(0) + iBlock * nThreads + iThread; itr < SectorTrackInfoGlobalLast(NSECTORS - 1); itr += nThreads * nBlocks) { + GPUTPCGMSectorTrack& extrapolatedTrack = mSectorTrackInfos[itr]; + GPUTPCGMSectorTrack& localTrack = mSectorTrackInfos[extrapolatedTrack.LocalTrackId()]; if (localTrack.ExtrapolatedTrackId(0) != -1 || !CAMath::AtomicCAS(&localTrack.ExtrapolatedTrackIds()[0], -1, itr)) { localTrack.SetExtrapolatedTrackId(1, itr); } } } -GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam) +GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam) { - //* prepare slice tracks for merging with next/previous/same sector + //* prepare sector tracks for merging with next/previous/same sector //* each track transported to the border line float fieldBz = Param().bzCLight; @@ -614,21 +614,21 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh float cosAlpha = CAMath::Cos(dAlpha); float sinAlpha = CAMath::Sin(dAlpha); - GPUTPCGMSliceTrack trackTmp; - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - const GPUTPCGMSliceTrack* track = &mSliceTrackInfos[itr]; - int32_t iSlice = track->Slice(); + GPUTPCGMSectorTrack trackTmp; + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + const GPUTPCGMSectorTrack* track = &mSectorTrackInfos[itr]; + int32_t iSector = track->Sector(); - if (track->PrevSegmentNeighbour() >= 0 && track->Slice() == mSliceTrackInfos[track->PrevSegmentNeighbour()].Slice()) { + if (track->PrevSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->PrevSegmentNeighbour()].Sector()) { continue; } - if (useOrigTrackParam) { // TODO: Check how far this makes sense with slice track refit + if (useOrigTrackParam) { // TODO: Check how far this makes sense with sector track refit if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperQPtB5Limit) { continue; } - const GPUTPCGMSliceTrack* trackMin = track; - while (track->NextSegmentNeighbour() >= 0 && track->Slice() == mSliceTrackInfos[track->NextSegmentNeighbour()].Slice()) { - track = &mSliceTrackInfos[track->NextSegmentNeighbour()]; + const GPUTPCGMSectorTrack* trackMin = track; + while (track->NextSegmentNeighbour() >= 0 && track->Sector() == mSectorTrackInfos[track->NextSegmentNeighbour()].Sector()) { + track = &mSectorTrackInfos[track->NextSegmentNeighbour()]; if (track->OrigTrack()->Param().X() < trackMin->OrigTrack()->Param().X()) { trackMin = track; } @@ -638,7 +638,7 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh if (Param().rec.tpc.mergerCovSource == 2 && trackTmp.X2() != 0.f) { trackTmp.UseParam2(); } else { - trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Slice()); + trackTmp.Set(this, trackMin->OrigTrack(), trackMin->Alpha(), trackMin->Sector()); } } else { if (CAMath::Abs(track->QPt()) * Param().qptB5Scaler < Param().rec.tpc.mergerLooperSecondHorizontalQPtB5Limit) { @@ -663,19 +663,19 @@ GPUd() void GPUTPCGMMerger::MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nTh if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); } - uint32_t myTrack = CAMath::AtomicAdd(&nB[iSlice], 1u); - B[iSlice][myTrack] = b; + uint32_t myTrack = CAMath::AtomicAdd(&nB[iSector], 1u); + B[iSector][myTrack] = b; } } } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { - CADEBUG(GPUInfo("\nMERGING Slices %d %d NTracks %d %d CROSS %d", iSlice1, iSlice2, N1, N2, mergeMode)); - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); - bool sameSlice = (iSlice1 == iSlice2); + CADEBUG(GPUInfo("\nMERGING Sectors %d %d NTracks %d %d CROSS %d", iSector1, iSector2, N1, N2, mergeMode)); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); + bool sameSector = (iSector1 == iSector2); for (int32_t itr = iBlock * nThreads + iThread; itr < N1; itr += nThreads * nBlocks) { GPUTPCGMBorderTrack& b = B1[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); @@ -684,17 +684,17 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } else if (d > 3) { d = 3; } - CADEBUG(printf(" Input Slice 1 %d Track %d: ", iSlice1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); + CADEBUG(printf(" Input Sector 1 %d Track %d: ", iSector1, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); GPUTPCGMBorderRange range; range.fId = itr; range.fMin = b.Par()[1] + b.ZOffsetLinear() - d; range.fMax = b.Par()[1] + b.ZOffsetLinear() + d; range1[itr] = range; - if (sameSlice) { + if (sameSector) { range2[itr] = range; } } - if (!sameSlice) { + if (!sameSector) { for (int32_t itr = iBlock * nThreads + iThread; itr < N2; itr += nThreads * nBlocks) { GPUTPCGMBorderTrack& b = B2[itr]; float d = CAMath::Max(0.5f, 3.5f * CAMath::Sqrt(b.Cov()[1])); @@ -703,7 +703,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } else if (d > 3) { d = 3; } - CADEBUG(printf(" Input Slice 2 %d Track %d: ", iSlice2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); + CADEBUG(printf(" Input Sector 2 %d Track %d: ", iSector2, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf(" - D %8.3f\n", d)); GPUTPCGMBorderRange range; range.fId = itr; range.fMin = b.Par()[1] + b.ZOffsetLinear() - d; @@ -714,11 +714,11 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); if (iThread == 0) { if (iBlock == 0) { @@ -790,7 +790,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThrea } template <> -GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode) { // int32_t statAll = 0, statMerged = 0; float factor2ys = Param().rec.tpc.trackMergerFactor2YS; @@ -805,10 +805,10 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea int32_t minNPartHits = Param().rec.tpc.trackMergerMinPartHits; int32_t minNTotalHits = Param().rec.tpc.trackMergerMinTotalHits; - bool sameSlice = (iSlice1 == iSlice2); + bool sameSector = (iSector1 == iSector2); - GPUTPCGMBorderRange* range1 = mBorderRange[iSlice1]; - GPUTPCGMBorderRange* range2 = mBorderRange[iSlice2] + *GetConstantMem()->tpcTrackers[iSlice2].NTracks(); + GPUTPCGMBorderRange* range1 = mBorderRange[iSector1]; + GPUTPCGMBorderRange* range2 = mBorderRange[iSector2] + *GetConstantMem()->tpcTrackers[iSector2].NTracks(); int32_t i2 = 0; for (int32_t i1 = iBlock * nThreads + iThread; i1 < N1; i1 += nThreads * nBlocks) { @@ -829,7 +829,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea if (r2.fMin > r1.fMax) { break; } - if (sameSlice && (r1.fId >= r2.fId)) { + if (sameSector && (r1.fId >= r2.fId)) { continue; } // do check @@ -857,7 +857,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea } } - GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSlice1, iSlice2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); }); + GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksAll, b1.TrackID(), weight)) { MergedTrackStreamer(b1, b2, "merge_all_tracks", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksAll)); }); if (!b1.CheckChi2Y(b2, factor2ys)) { CADEBUG2(continue, printf("!Y\n")); @@ -891,7 +891,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea if (iBest2 < 0) { continue; } - GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSlice1, iSlice2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); }); + GPUCA_DEBUG_STREAMER_CHECK(float weight = b1.Par()[4] * b1.Par()[4]; if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamMergeBorderTracksBest, b1.TrackID(), weight)) { MergedTrackStreamer(b1, MergedTrackStreamerFindBorderTrack(B2, N2, iBest2), "merge_best_track", iSector1, iSector2, mergeMode, weight, o2::utils::DebugStreamer::getSamplingFrequency(o2::utils::StreamFlags::streamMergeBorderTracksBest)); }); // statMerged++; @@ -906,77 +906,77 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea #endif } } - // GPUInfo("STAT: slices %d, %d: all %d merged %d", iSlice1, iSlice2, statAll, statMerged); + // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged); } -GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSlice, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) const +GPUdii() void GPUTPCGMMerger::MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const { - if (withinSlice == 1) { // Merge tracks within the same slice - jSlice = iSlice; - n1 = n2 = mMemory->tmpCounter[iSlice]; - b1 = b2 = mBorder[iSlice]; - } else if (withinSlice == -1) { // Merge tracks accross the central electrode - jSlice = (iSlice + NSLICES / 2); - const int32_t offset = mergeMode == 2 ? NSLICES : 0; - n1 = mMemory->tmpCounter[iSlice + offset]; - n2 = mMemory->tmpCounter[jSlice + offset]; - b1 = mBorder[iSlice + offset]; - b2 = mBorder[jSlice + offset]; - } else { // Merge tracks of adjacent slices - jSlice = mNextSliceInd[iSlice]; - n1 = mMemory->tmpCounter[iSlice]; - n2 = mMemory->tmpCounter[NSLICES + jSlice]; - b1 = mBorder[iSlice]; - b2 = mBorder[NSLICES + jSlice]; + if (withinSector == 1) { // Merge tracks within the same sector + jSector = iSector; + n1 = n2 = mMemory->tmpCounter[iSector]; + b1 = b2 = mBorder[iSector]; + } else if (withinSector == -1) { // Merge tracks accross the central electrode + jSector = (iSector + NSECTORS / 2); + const int32_t offset = mergeMode == 2 ? NSECTORS : 0; + n1 = mMemory->tmpCounter[iSector + offset]; + n2 = mMemory->tmpCounter[jSector + offset]; + b1 = mBorder[iSector + offset]; + b2 = mBorder[jSector + offset]; + } else { // Merge tracks of adjacent sectors + jSector = mNextSectorInd[iSector]; + n1 = mMemory->tmpCounter[iSector]; + n2 = mMemory->tmpCounter[NSECTORS + jSector]; + b1 = mBorder[iSector]; + b2 = mBorder[NSECTORS + jSector]; } } template -GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) +GPUd() void GPUTPCGMMerger::MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode) { int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, iSlice, withinSlice, mergeMode); - MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, iSlice, b1, n1, jSlice, b2, n2, mergeMode); + int32_t jSector; + MergeBorderTracksSetup(n1, n2, b1, b2, jSector, iSector, withinSector, mergeMode); + MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, iSector, b1, n1, jSector, b2, n2, mergeMode); } #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); #endif -GPUd() void GPUTPCGMMerger::MergeWithinSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) +GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { float x0 = Param().tpcGeometry.Row2X(63); const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; - int32_t iSlice = track.Slice(); + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; + int32_t iSector = track.Sector(); GPUTPCGMBorderTrack b; if (track.TransportToX(this, x0, Param().bzCLight, b, maxSin)) { b.SetTrackID(itr); - CADEBUG(printf("WITHIN SLICE %d Track %d - ", iSlice, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n")); + CADEBUG(printf("WITHIN SECTOR %d Track %d - ", iSector, itr); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Par()[i]); } printf(" - "); for (int32_t i = 0; i < 5; i++) { printf("%8.3f ", b.Cov()[i]); } printf("\n")); b.SetNClusters(track.NClusters()); - uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSlice], 1u); - mBorder[iSlice][myTrack] = b; + uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[iSector], 1u); + mBorder[iSector][myTrack] = b; } } } -GPUd() void GPUTPCGMMerger::MergeSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam) +GPUd() void GPUTPCGMMerger::MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam) { bool part2 = iBlock & 1; int32_t border = part2 ? border1 : border0; GPUAtomic(uint32_t)* n = mMemory->tmpCounter; GPUTPCGMBorderTrack** b = mBorder; if (part2) { - n += NSLICES; - b += NSLICES; + n += NSECTORS; + b += NSECTORS; } - MergeSlicesPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam); + MergeSectorsPrepareStep2((nBlocks + !part2) >> 1, nThreads, iBlock >> 1, iThread, border, b, n, useOrigTrackParam); } GPUdi() void GPUTPCGMMerger::setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end) @@ -1013,7 +1013,7 @@ GPUd() void GPUTPCGMMerger::hookEdge(int32_t u, int32_t v) GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsSetup(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t i = start + iThread; i < end; i += nThreads) { mTrackCCRoots[i] = i; } @@ -1024,7 +1024,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookLinks(int32_t nBlo // Compute connected components in parallel, step 1. // Source: Adaptive Work-Efficient Connected Components on the GPU, Sutton et al, 2016 (https://arxiv.org/pdf/1612.01178.pdf) int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t itr = start + iThread; itr < end; itr += nThreads) { hookEdge(itr, mTrackLinks[itr]); } @@ -1039,12 +1039,12 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsHookNeighbors(int32_t } int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks / 4, iBlock / 4, start, end); int32_t myNeighbor = iBlock % 4; for (int32_t itr = start + iThread; itr < end; itr += nThreads) { - int32_t v = mSliceTrackInfos[itr].AnyNeighbour(myNeighbor); + int32_t v = mSectorTrackInfos[itr].AnyNeighbour(myNeighbor); hookEdge(itr, v); } } @@ -1053,7 +1053,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlo { // Compute connected components in parallel, step 2. int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); for (int32_t itr = start + iThread; itr < end; itr += nThreads) { int32_t root = itr; int32_t next = mTrackCCRoots[root]; @@ -1068,7 +1068,7 @@ GPUd() void GPUTPCGMMerger::ResolveFindConnectedComponentsMultiJump(int32_t nBlo } } -GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll) +GPUd() void GPUTPCGMMerger::ResolveMergeSectors(GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll) { if (!mergeAll) { /*int32_t neighborType = useOrigTrackParam ? 1 : 0; @@ -1079,7 +1079,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int if (neighborType) old1 = newTrack2.PrevNeighbour(1); if ( old1 >= 0 ) { - GPUTPCGMSliceTrack &oldTrack1 = mSliceTrackInfos[old1]; + GPUTPCGMSectorTrack &oldTrack1 = mSectorTrackInfos[old1]; if ( oldTrack1.NClusters() < newTrack1.NClusters() ) { newTrack2.SetPrevNeighbour( -1, neighborType ); oldTrack1.SetNextNeighbour( -1, neighborType ); @@ -1090,7 +1090,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int if (neighborType) old2 = newTrack1.NextNeighbour(1); if ( old2 >= 0 ) { - GPUTPCGMSliceTrack &oldTrack2 = mSliceTrackInfos[old2]; + GPUTPCGMSectorTrack &oldTrack2 = mSectorTrackInfos[old2]; if ( oldTrack2.NClusters() < newTrack2.NClusters() ) { oldTrack2.SetPrevNeighbour( -1, neighborType ); @@ -1101,26 +1101,26 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } int32_t start, end; - setBlockRange(SliceTrackInfoLocalTotal(), nBlocks, iBlock, start, end); + setBlockRange(SectorTrackInfoLocalTotal(), nBlocks, iBlock, start, end); - for (int32_t baseIdx = 0; baseIdx < SliceTrackInfoLocalTotal(); baseIdx += nThreads) { + for (int32_t baseIdx = 0; baseIdx < SectorTrackInfoLocalTotal(); baseIdx += nThreads) { int32_t itr = baseIdx + iThread; - bool inRange = itr < SliceTrackInfoLocalTotal(); + bool inRange = itr < SectorTrackInfoLocalTotal(); int32_t itr2 = -1; if (inRange) { itr2 = mTrackLinks[itr]; } - bool resolveSlice = (itr2 > -1); - if (resolveSlice) { + bool resolveSector = (itr2 > -1); + if (resolveSector) { int32_t root = mTrackCCRoots[itr]; - resolveSlice &= (start <= root) && (root < end); + resolveSector &= (start <= root) && (root < end); } - int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSlice)); + int16_t smemIdx = work_group_scan_inclusive_add(int16_t(resolveSector)); - if (resolveSlice) { + if (resolveSector) { smem.iTrack1[smemIdx - 1] = itr; smem.iTrack2[smemIdx - 1] = itr2; } @@ -1130,16 +1130,16 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int continue; } - const int32_t nSlices = smemIdx; + const int32_t nSectors = smemIdx; - for (int32_t i = 0; i < nSlices; i++) { + for (int32_t i = 0; i < nSectors; i++) { itr = smem.iTrack1[i]; itr2 = smem.iTrack2[i]; - GPUTPCGMSliceTrack* track1 = &mSliceTrackInfos[itr]; - GPUTPCGMSliceTrack* track2 = &mSliceTrackInfos[itr2]; - GPUTPCGMSliceTrack* track1Base = track1; - GPUTPCGMSliceTrack* track2Base = track2; + GPUTPCGMSectorTrack* track1 = &mSectorTrackInfos[itr]; + GPUTPCGMSectorTrack* track2 = &mSectorTrackInfos[itr2]; + GPUTPCGMSectorTrack* track1Base = track1; + GPUTPCGMSectorTrack* track2Base = track2; bool sameSegment = CAMath::Abs(track1->NClusters() > track2->NClusters() ? track1->QPt() : track2->QPt()) * Param().qptB5Scaler < 2 || track1->QPt() * track2->QPt() > 0; // GPUInfo("\nMerge %d with %d - same segment %d", itr, itr2, (int32_t) sameSegment); @@ -1147,23 +1147,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int // PrintMergeGraph(track2, std::cout); while (track2->PrevSegmentNeighbour() >= 0) { - track2 = &mSliceTrackInfos[track2->PrevSegmentNeighbour()]; + track2 = &mSectorTrackInfos[track2->PrevSegmentNeighbour()]; } if (sameSegment) { if (track1 == track2) { continue; } while (track1->PrevSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->PrevSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()]; if (track1 == track2) { goto NextTrack; } } GPUCommonAlgorithm::swap(track1, track1Base); for (int32_t k = 0; k < 2; k++) { - GPUTPCGMSliceTrack* tmp = track1Base; + GPUTPCGMSectorTrack* tmp = track1Base; while (tmp->Neighbour(k) >= 0) { - tmp = &mSliceTrackInfos[tmp->Neighbour(k)]; + tmp = &mSectorTrackInfos[tmp->Neighbour(k)]; if (tmp == track2) { goto NextTrack; } @@ -1171,23 +1171,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; if (track1 == track2) { goto NextTrack; } } } else { while (track1->PrevSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->PrevSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->PrevSegmentNeighbour()]; } if (track1 == track2) { continue; } for (int32_t k = 0; k < 2; k++) { - GPUTPCGMSliceTrack* tmp = track1; + GPUTPCGMSectorTrack* tmp = track1; while (tmp->Neighbour(k) >= 0) { - tmp = &mSliceTrackInfos[tmp->Neighbour(k)]; + tmp = &mSectorTrackInfos[tmp->Neighbour(k)]; if (tmp == track2) { goto NextTrack; } @@ -1210,18 +1210,18 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int bool goUp = z2max - z1min > z1max - z2min; if (track1->Neighbour(goUp) < 0 && track2->Neighbour(!goUp) < 0) { - track1->SetNeighbor(track2 - mSliceTrackInfos, goUp); - track2->SetNeighbor(track1 - mSliceTrackInfos, !goUp); + track1->SetNeighbor(track2 - mSectorTrackInfos, goUp); + track2->SetNeighbor(track1 - mSectorTrackInfos, !goUp); // GPUInfo("Result (simple neighbor)"); // PrintMergeGraph(track1, std::cout); continue; } else if (track1->Neighbour(goUp) < 0) { - track2 = &mSliceTrackInfos[track2->Neighbour(!goUp)]; + track2 = &mSectorTrackInfos[track2->Neighbour(!goUp)]; GPUCommonAlgorithm::swap(track1, track2); } else if (track2->Neighbour(!goUp) < 0) { - track1 = &mSliceTrackInfos[track1->Neighbour(goUp)]; + track1 = &mSectorTrackInfos[track1->Neighbour(goUp)]; } else { // Both would work, but we use the simpler one - track1 = &mSliceTrackInfos[track1->Neighbour(goUp)]; + track1 = &mSectorTrackInfos[track1->Neighbour(goUp)]; } track1Base = track1; } @@ -1229,11 +1229,11 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int track2Base = track2; if (!sameSegment) { while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; } } - track1->SetNextSegmentNeighbour(track2 - mSliceTrackInfos); - track2->SetPrevSegmentNeighbour(track1 - mSliceTrackInfos); + track1->SetNextSegmentNeighbour(track2 - mSectorTrackInfos); + track2->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos); // k = 0: Merge right side // k = 1: Merge left side for (int32_t k = 0; k < 2; k++) { @@ -1241,23 +1241,23 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int track2 = track2Base; while (track2->Neighbour(k) >= 0) { if (track1->Neighbour(k) >= 0) { - GPUTPCGMSliceTrack* track1new = &mSliceTrackInfos[track1->Neighbour(k)]; - GPUTPCGMSliceTrack* track2new = &mSliceTrackInfos[track2->Neighbour(k)]; + GPUTPCGMSectorTrack* track1new = &mSectorTrackInfos[track1->Neighbour(k)]; + GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)]; track2->SetNeighbor(-1, k); track2new->SetNeighbor(-1, k ^ 1); track1 = track1new; while (track1->NextSegmentNeighbour() >= 0) { - track1 = &mSliceTrackInfos[track1->NextSegmentNeighbour()]; + track1 = &mSectorTrackInfos[track1->NextSegmentNeighbour()]; } - track1->SetNextSegmentNeighbour(track2new - mSliceTrackInfos); - track2new->SetPrevSegmentNeighbour(track1 - mSliceTrackInfos); + track1->SetNextSegmentNeighbour(track2new - mSectorTrackInfos); + track2new->SetPrevSegmentNeighbour(track1 - mSectorTrackInfos); track1 = track1new; track2 = track2new; } else { - GPUTPCGMSliceTrack* track2new = &mSliceTrackInfos[track2->Neighbour(k)]; + GPUTPCGMSectorTrack* track2new = &mSectorTrackInfos[track2->Neighbour(k)]; track1->SetNeighbor(track2->Neighbour(k), k); track2->SetNeighbor(-1, k); - track2new->SetNeighbor(track1 - mSliceTrackInfos, k ^ 1); + track2new->SetNeighbor(track1 - mSectorTrackInfos, k ^ 1); } } } @@ -1268,7 +1268,7 @@ GPUd() void GPUTPCGMMerger::ResolveMergeSlices(GPUResolveSharedMemory& smem, int } } -GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) +GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr) { if (Param().rec.tpc.mergerCERowLimit > 0 && CAMath::Abs(track->QPt()) * Param().qptB5Scaler < 0.3f && (cls.row < Param().rec.tpc.mergerCERowLimit || cls.row >= GPUCA_ROW_COUNT - Param().rec.tpc.mergerCERowLimit)) { return; @@ -1280,13 +1280,13 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const G } else { float x, y; auto& cln = mConstantMem->ioPtrs.clustersNative->clustersLinear[cls.num]; - GPUTPCConvertImpl::convert(*mConstantMem, cls.slice, cls.row, cln.getPad(), cln.getTime(), x, y, z); + GPUTPCConvertImpl::convert(*mConstantMem, cls.sector, cls.row, cln.getPad(), cln.getTime(), x, y, z); } if (!Param().par.continuousTracking && CAMath::Abs(z) > 10) { return; } - int32_t slice = track->Slice(); + int32_t sector = track->Sector(); for (int32_t attempt = 0; attempt < 2; attempt++) { GPUTPCGMBorderTrack b; const float x0 = Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.row); @@ -1301,7 +1301,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSliceTrack* track, const G b.SetZOffsetLinear(-b.ZOffsetLinear()); } b.SetRow(cls.row); - uint32_t id = slice + attempt * NSLICES; + uint32_t id = sector + attempt * NSECTORS; uint32_t myTrack = CAMath::AtomicAdd(&mMemory->tmpCounter[id], 1u); mBorder[id][myTrack] = b; break; @@ -1385,8 +1385,8 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i cls[mClusters[trk[1]->FirstClusterRef()].num].getTime(), cls[mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1].num].getTime(), &mClusters[trk[0]->FirstClusterRef()], &mClusters[trk[0]->FirstClusterRef() + trk[0]->NClusters() - 1], &mClusters[trk[1]->FirstClusterRef()], &mClusters[trk[1]->FirstClusterRef() + trk[1]->NClusters() - 1], clsmax); - const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->slice, clsmax->row, cls[clsmax->num].getPad()), 0.f); - trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSLICES / 2, trk[1]->Param().TZOffset() - offset); + const float offset = CAMath::Max(tmax - mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(clsmax->sector, clsmax->row, cls[clsmax->num].getPad()), 0.f); + trk[1]->Param().Z() += mConstantMem->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trk[1]->CSide() * NSECTORS / 2, trk[1]->Param().TZOffset() - offset); trk[1]->Param().TZOffset() = offset; } } @@ -1492,11 +1492,11 @@ struct GPUTPCGMMerger_CompareClusterIds { GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - GPUTPCGMSliceTrack* trackParts[kMaxParts]; + GPUTPCGMSectorTrack* trackParts[kMaxParts]; - for (int32_t itr = iBlock * nThreads + iThread; itr < SliceTrackInfoLocalTotal(); itr += nThreads * nBlocks) { + for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSliceTrack& track = mSliceTrackInfos[itr]; + GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { continue; @@ -1507,7 +1507,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread int32_t nParts = 0; int32_t nHits = 0; int32_t leg = 0; - GPUTPCGMSliceTrack *trbase = &track, *tr = &track; + GPUTPCGMSectorTrack *trbase = &track, *tr = &track; tr->SetPrevSegmentNeighbour(1000000000); while (true) { if (nParts >= kMaxParts) { @@ -1525,23 +1525,23 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (nParts >= kMaxParts) { break; } - if (nHits + mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { + if (nHits + mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters() > kMaxClusters) { break; } - trackParts[nParts] = &mSliceTrackInfos[tr->ExtrapolatedTrackId(i)]; + trackParts[nParts] = &mSectorTrackInfos[tr->ExtrapolatedTrackId(i)]; trackParts[nParts++]->SetLeg(leg); - nHits += mSliceTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); + nHits += mSectorTrackInfos[tr->ExtrapolatedTrackId(i)].NClusters(); } } int32_t jtr = tr->NextSegmentNeighbour(); if (jtr >= 0) { - tr = &(mSliceTrackInfos[jtr]); + tr = &(mSectorTrackInfos[jtr]); tr->SetPrevSegmentNeighbour(1000000002); continue; } jtr = trbase->NextNeighbour(); if (jtr >= 0) { - trbase = &(mSliceTrackInfos[jtr]); + trbase = &(mSectorTrackInfos[jtr]); tr = trbase; if (tr->PrevSegmentNeighbour() >= 0) { break; @@ -1555,7 +1555,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { - GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSliceTrack* a, const GPUTPCGMSliceTrack* b) { + GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { #ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith if (a->X() != b->X()) { return (a->X() > b->X()); @@ -1581,15 +1581,15 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread trackCluster trackClusters[kMaxClusters]; nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { - const GPUTPCGMSliceTrack* t = trackParts[ipart]; + const GPUTPCGMSectorTrack* t = trackParts[ipart]; CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { - const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Slice()]; + const GPUTPCTracker& trk = GetConstantMem()->tpcTrackers[t->Sector()]; const GPUTPCHitId& ic = trk.TrackHits()[t->OrigTrack()->FirstHitID() + i]; - uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Slice()][0]; - *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Slice(), t->Leg()}; + uint32_t id = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[t->Sector()][0]; + *c2 = trackCluster{id, (uint8_t)ic.RowIndex(), t->Sector(), t->Leg()}; } nHits += nTrackHits; } @@ -1654,7 +1654,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } GPUTPCGMMerger_CompareClusterIdsLooper::clcomparestruct clusterSort[kMaxClusters]; for (int32_t iPart = 0; iPart < nParts; iPart++) { - const GPUTPCGMSliceTrack* t = trackParts[iPart]; + const GPUTPCGMSectorTrack* t = trackParts[iPart]; int32_t nTrackHits = t->NClusters(); for (int32_t j = 0; j < nTrackHits; j++) { int32_t i = nTmpHits + j; @@ -1706,7 +1706,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { - const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].slice].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].slice].Data().ClusterIdOffset()]; + const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1723,7 +1723,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].state = state & GPUTPCGMMergedTrackHit::clustererAndSharedFlags; // Only allow edge, deconvoluted, and shared flags cl[i].row = trackClusters[i].row; cl[i].num = trackClusters[i].id; - cl[i].slice = trackClusters[i].slice; + cl[i].sector = trackClusters[i].sector; cl[i].leg = trackClusters[i].leg; } @@ -1743,7 +1743,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetNClusters(nHits); mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); - const GPUTPCGMSliceTrack& p2 = *trackParts[firstTrackIndex]; + const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; @@ -2018,8 +2018,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { - const int32_t slice = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].slice; - const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(slice, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100); + const int32_t sector = mClusters[trk.FirstClusterRef() + trk.NClusters() - 1].sector; + const float refz = p.GetZ() + (Param().par.earlyTpcTransform ? p.GetTZOffset() : GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, p.GetTZOffset(), Param().continuousMaxTimeBin)) + (trk.CSide() ? -100 : 100); float sinA, cosA; CAMath::SinCos(trk.GetAlpha(), sinA, cosA); float gx = cosA * p.GetX() - sinA * p.GetY(); @@ -2043,20 +2043,20 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, for (uint32_t k = 0;k < trk.NClusters();k++) { float xx, yy, zz; if (Param().par.earlyTpcTransform) { - const float zOffset = (mClusters[trk.FirstClusterRef() + k].slice < 18) == (mClusters[trk.FirstClusterRef() + 0].slice < 18) ? p.GetTZOffset() : -p.GetTZOffset(); + const float zOffset = (mClusters[trk.FirstClusterRef() + k].sector < 18) == (mClusters[trk.FirstClusterRef() + 0].sector < 18) ? p.GetTZOffset() : -p.GetTZOffset(); xx = mClustersXYZ[trk.FirstClusterRef() + k].x; yy = mClustersXYZ[trk.FirstClusterRef() + k].y; zz = mClustersXYZ[trk.FirstClusterRef() + k].z - zOffset; } else { const ClusterNative& GPUrestrict() cl = GetConstantMem()->ioPtrs.clustersNative->clustersLinear[mClusters[trk.FirstClusterRef() + k].num]; - GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].slice, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); + GetConstantMem()->calibObjects.fastTransformHelper->Transform(mClusters[trk.FirstClusterRef() + k].sector, mClusters[trk.FirstClusterRef() + k].row, cl.getPad(), cl.getTime(), xx, yy, zz, p.GetTZOffset()); } float sa2, ca2; - CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].slice), sa2, ca2); + CAMath::SinCos(Param().Alpha(mClusters[trk.FirstClusterRef() + k].sector), sa2, ca2); float cx = ca2 * xx - sa2 * yy; float cy = ca2 * yy + sa2 * xx; float dist = CAMath::Sqrt((cx - gmx) * (cx - gmx) + (cy - gmy) * (cy - gmy)); - printf("Hit %3d/%3d slice %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].slice, cx, cy, dist); + printf("Hit %3d/%3d sector %d xy %f %f R %f\n", k, trk.NClusters(), (int32_t)mClusters[trk.FirstClusterRef() + k].sector, cx, cy, dist); }*/ } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 7e309dcb79a9c..c04b3eb1a1703 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -19,7 +19,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGMBorderTrack.h" #include "GPUTPCGMMergedTrack.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUCommonDef.h" #include "GPUProcessor.h" #include "GPUTPCGMMergerTypes.h" @@ -46,8 +46,8 @@ namespace o2 { namespace gpu { -class GPUTPCSliceTrack; -class GPUTPCSliceOutput; +class GPUTPCSectorTrack; +class GPUTPCSectorOutput; class GPUTPCGMTrackParam; class GPUTPCTracker; class GPUChainTracking; @@ -66,7 +66,7 @@ class GPUTPCGMMerger : public GPUProcessor ~GPUTPCGMMerger() = default; GPUTPCGMMerger(const GPUTPCGMMerger&) = delete; const GPUTPCGMMerger& operator=(const GPUTPCGMMerger&) const = delete; - static constexpr const int32_t NSLICES = GPUCA_NSLICES; //* N slices + static constexpr const int32_t NSECTORS = GPUCA_NSECTORS; //* N sectors struct memory { GPUAtomic(uint32_t) nRetryRefit; @@ -76,15 +76,15 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; - const GPUTPCTrack* firstExtrapolatedTracks[NSLICES]; - GPUAtomic(uint32_t) tmpCounter[2 * NSLICES]; + const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; + GPUAtomic(uint32_t) tmpCounter[2 * NSECTORS]; GPUAtomic(uint32_t) nLooperMatchCandidates; }; struct trackCluster { uint32_t id; uint8_t row; - uint8_t slice; + uint8_t sector; uint8_t leg; }; @@ -141,8 +141,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() o2::MCCompLabel* OutputTracksTPCO2MC() { return mOutputTracksTPCO2MC; } GPUhdi() uint32_t NOutputTracksTPCO2() const { return mMemory->nO2Tracks; } GPUhdi() uint32_t NOutputClusRefsTPCO2() const { return mMemory->nO2ClusRefs; } - GPUhdi() GPUTPCGMSliceTrack* SliceTrackInfos() { return mSliceTrackInfos; } - GPUhdi() int32_t NMaxSingleSliceTracks() const { return mNMaxSingleSliceTracks; } + GPUhdi() GPUTPCGMSectorTrack* SectorTrackInfos() { return mSectorTrackInfos; } + GPUhdi() int32_t NMaxSingleSectorTracks() const { return mNMaxSingleSectorTracks; } GPUhdi() int32_t* TrackIDs() { return mTrackIDs; } GPUhdi() int32_t* TmpSortMemory() { return mTmpSortMemory; } @@ -154,21 +154,21 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() uint16_t MemoryResOutputO2MC() const { return mMemoryResOutputO2MC; } GPUd() uint16_t MemoryResOutputO2Scratch() const { return mMemoryResOutputO2Scratch; } - GPUd() int32_t RefitSliceTrack(GPUTPCGMSliceTrack& sliceTrack, const GPUTPCTrack* inTrack, float alpha, int32_t slice); - GPUd() void SetTrackClusterZT(GPUTPCGMSliceTrack& track, int32_t iSlice, const GPUTPCTrack* sliceTr); + GPUd() int32_t RefitSectorTrack(GPUTPCGMSectorTrack& sectorTrack, const GPUTPCTrack* inTrack, float alpha, int32_t sector); + GPUd() void SetTrackClusterZT(GPUTPCGMSectorTrack& track, int32_t iSector, const GPUTPCTrack* sectorTr); - int32_t CheckSlices(); - GPUd() void RefitSliceTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); - GPUd() void UnpackSliceGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); + int32_t CheckSectors(); + GPUd() void RefitSectorTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); + GPUd() void UnpackSectorGlobal(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); GPUd() void UnpackSaveNumber(int32_t id); - GPUd() void UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice); + GPUd() void UnpackResetIds(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector); GPUd() void MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output); - GPUd() void MergeWithinSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void MergeSlicesPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam); + GPUd() void MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); + GPUd() void MergeSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t border0, int32_t border1, int8_t useOrigTrackParam); template - GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); - GPUd() void MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSlice, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) const; + GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector, int8_t withinSector, int8_t mergeMode); + GPUd() void MergeBorderTracksSetup(int32_t& n1, int32_t& n2, GPUTPCGMBorderTrack*& b1, GPUTPCGMBorderTrack*& b2, int32_t& jSector, int32_t iSector, int8_t withinSector, int8_t mergeMode) const; template GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax); GPUd() void SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); @@ -186,17 +186,17 @@ class GPUTPCGMMerger : public GPUProcessor GPUd() void ResolveFindConnectedComponentsHookNeighbors(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ResolveFindConnectedComponentsHookLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void ResolveFindConnectedComponentsMultiJump(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); - GPUd() void ResolveMergeSlices(gputpcgmmergertypes::GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll); + GPUd() void ResolveMergeSectors(gputpcgmmergertypes::GPUResolveSharedMemory& smem, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int8_t useOrigTrackParam, int8_t mergeAll); GPUd() void MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); GPUd() void MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread); #ifndef GPUCA_GPUCODE - void DumpSliceTracks(std::ostream& out) const; - void DumpMergeRanges(std::ostream& out, int32_t withinSlice, int32_t mergeMode) const; + void DumpSectorTracks(std::ostream& out) const; + void DumpMergeRanges(std::ostream& out, int32_t withinSector, int32_t mergeMode) const; void DumpTrackLinks(std::ostream& out, bool output, const char* type) const; - void DumpMergedWithinSlices(std::ostream& out) const; - void DumpMergedBetweenSlices(std::ostream& out) const; + void DumpMergedWithinSectors(std::ostream& out) const; + void DumpMergedBetweenSectors(std::ostream& out) const; void DumpCollected(std::ostream& out) const; void DumpMergeCE(std::ostream& out) const; void DumpFitPrepare(std::ostream& out) const; @@ -204,33 +204,33 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFinal(std::ostream& out) const; template - void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const; - void MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const; + void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; + void MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; const GPUTPCGMBorderTrack& MergedTrackStreamerFindBorderTrack(const GPUTPCGMBorderTrack* tracks, int32_t N, int32_t trackId) const; void DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) const; - std::vector StreamerOccupancyBin(int32_t iSlice, int32_t iRow, float time) const; - std::vector StreamerUncorrectedZY(int32_t iSlice, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const; + std::vector StreamerOccupancyBin(int32_t iSector, int32_t iRow, float time) const; + std::vector StreamerUncorrectedZY(int32_t iSector, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const; void DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, float yy, float zz, const GPUTPCGMMergedTrackHit& cluster, const o2::tpc::ClusterNative& clusterNative, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop, const gputpcgmmergertypes::InterpolationErrorHit& interpolation, int8_t rejectChi2, bool refit, int32_t retVal, float avgInvCharge, float posY, float posZ, int16_t clusterState, int32_t retValReject, float err2Y, float err2Z) const; #endif - GPUdi() int32_t SliceTrackInfoFirst(int32_t iSlice) const { return mSliceTrackInfoIndex[iSlice]; } - GPUdi() int32_t SliceTrackInfoLast(int32_t iSlice) const { return mSliceTrackInfoIndex[iSlice + 1]; } - GPUdi() int32_t SliceTrackInfoGlobalFirst(int32_t iSlice) const { return mSliceTrackInfoIndex[NSLICES + iSlice]; } - GPUdi() int32_t SliceTrackInfoGlobalLast(int32_t iSlice) const { return mSliceTrackInfoIndex[NSLICES + iSlice + 1]; } - GPUdi() int32_t SliceTrackInfoLocalTotal() const { return mSliceTrackInfoIndex[NSLICES]; } - GPUdi() int32_t SliceTrackInfoTotal() const { return mSliceTrackInfoIndex[2 * NSLICES]; } + GPUdi() int32_t SectorTrackInfoFirst(int32_t iSector) const { return mSectorTrackInfoIndex[iSector]; } + GPUdi() int32_t SectorTrackInfoLast(int32_t iSector) const { return mSectorTrackInfoIndex[iSector + 1]; } + GPUdi() int32_t SectorTrackInfoGlobalFirst(int32_t iSector) const { return mSectorTrackInfoIndex[NSECTORS + iSector]; } + GPUdi() int32_t SectorTrackInfoGlobalLast(int32_t iSector) const { return mSectorTrackInfoIndex[NSECTORS + iSector + 1]; } + GPUdi() int32_t SectorTrackInfoLocalTotal() const { return mSectorTrackInfoIndex[NSECTORS]; } + GPUdi() int32_t SectorTrackInfoTotal() const { return mSectorTrackInfoIndex[2 * NSECTORS]; } private: - GPUd() void MergeSlicesPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); + GPUd() void MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iBorder, GPUTPCGMBorderTrack** B, GPUAtomic(uint32_t) * nB, bool useOrigTrackParam = false); template - GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSlice1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSlice2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); + GPUd() void MergeBorderTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t iSector1, GPUTPCGMBorderTrack* B1, int32_t N1, int32_t iSector2, GPUTPCGMBorderTrack* B2, int32_t N2, int32_t mergeMode = 0); - GPUd() void MergeCEFill(const GPUTPCGMSliceTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr); + GPUd() void MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUTPCGMMergedTrackHit& cls, const GPUTPCGMMergedTrackHitXYZ* clsXYZ, int32_t itr); void CheckMergedTracks(); #ifndef GPUCA_GPUCODE - void PrintMergeGraph(const GPUTPCGMSliceTrack* trk, std::ostream& out) const; + void PrintMergeGraph(const GPUTPCGMSectorTrack* trk, std::ostream& out) const; template int64_t GetTrackLabelA(const S& trk) const; template @@ -240,15 +240,15 @@ class GPUTPCGMMerger : public GPUProcessor GPUdi() void setBlockRange(int32_t elems, int32_t nBlocks, int32_t iBlock, int32_t& start, int32_t& end); GPUdi() void hookEdge(int32_t u, int32_t v); - int32_t mNextSliceInd[NSLICES]; - int32_t mPrevSliceInd[NSLICES]; + int32_t mNextSectorInd[NSECTORS]; + int32_t mPrevSectorInd[NSECTORS]; int32_t* mTrackLinks; int32_t* mTrackCCRoots; // root of the connected component of this track - uint32_t mNTotalSliceTracks; // maximum number of incoming slice tracks + uint32_t mNTotalSectorTracks; // maximum number of incoming sector tracks uint32_t mNMaxTracks; // maximum number of output tracks - uint32_t mNMaxSingleSliceTracks; // max N tracks in one slice + uint32_t mNMaxSingleSectorTracks; // max N tracks in one sector uint32_t mNMaxOutputTrackClusters; // max number of clusters in output tracks (double-counting shared clusters) uint32_t mNMaxClusters; // max total unique clusters (in event) uint32_t mNMaxLooperMatches; // Maximum number of candidate pairs for looper matching @@ -261,11 +261,11 @@ class GPUTPCGMMerger : public GPUProcessor uint16_t mMemoryResOutputO2MC; uint16_t mMemoryResOutputO2Scratch; - int32_t mNClusters; // Total number of incoming clusters (from slice tracks) + int32_t mNClusters; // Total number of incoming clusters (from sector tracks) GPUTPCGMMergedTrack* mOutputTracks; //* array of output merged tracks GPUdEdxInfo* mOutputTracksdEdx; //* dEdx information - GPUTPCGMSliceTrack* mSliceTrackInfos; //* additional information for slice tracks - int32_t* mSliceTrackInfoIndex; + GPUTPCGMSectorTrack* mSectorTrackInfos; //* additional information for sector tracks + int32_t* mSectorTrackInfoIndex; GPUTPCGMMergedTrackHit* mClusters; GPUTPCGMMergedTrackHitXYZ* mClustersXYZ; GPUAtomic(uint32_t) * mClusterAttachment; @@ -284,9 +284,9 @@ class GPUTPCGMMerger : public GPUProcessor tmpSort* mTrackSortO2; GPUAtomic(uint32_t) * mSharedCount; // Must be uint32_t unfortunately for atomic support GPUTPCGMBorderTrack* mBorderMemory; // memory for border tracks - GPUTPCGMBorderTrack* mBorder[2 * NSLICES]; + GPUTPCGMBorderTrack* mBorder[2 * NSECTORS]; gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRangeMemory; // memory for border tracks - gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSLICES]; // memory for border tracks + gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSECTORS]; // memory for border tracks memory* mMemory; uint32_t* mRetryRefitIds; GPUTPCGMLoopData* mLoopData; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 0463966c582a5..d6dfcc8424e65 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -25,12 +25,12 @@ #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUParam.inc" #include "GPUTPCGMTrackParam.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUTPCGMBorderTrack.h" #include "GPUReconstruction.h" #include "GPUDebugStreamer.h" @@ -41,17 +41,17 @@ using namespace o2::gpu; using namespace gputpcgmmergertypes; -void GPUTPCGMMerger::DumpSliceTracks(std::ostream& out) const +void GPUTPCGMMerger::DumpSectorTracks(std::ostream& out) const { std::streamsize ss = out.precision(); out << std::setprecision(2); - out << "\nTPC Merger Slice Tracks\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - out << "Slice Track Info Index " << (mSliceTrackInfoIndex[iSlice + 1] - mSliceTrackInfoIndex[iSlice]) << " / " << (mSliceTrackInfoIndex[NSLICES + iSlice + 1] - mSliceTrackInfoIndex[NSLICES + iSlice]) << "\n"; + out << "\nTPC Merger Sector Tracks\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + out << "Sector Track Info Index " << (mSectorTrackInfoIndex[iSector + 1] - mSectorTrackInfoIndex[iSector]) << " / " << (mSectorTrackInfoIndex[NSECTORS + iSector + 1] - mSectorTrackInfoIndex[NSECTORS + iSector]) << "\n"; for (int32_t iGlobal = 0; iGlobal < 2; iGlobal++) { out << " Track type " << iGlobal << "\n"; - for (int32_t j = mSliceTrackInfoIndex[iSlice + NSLICES * iGlobal]; j < mSliceTrackInfoIndex[iSlice + NSLICES * iGlobal + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + for (int32_t j = mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal]; j < mSectorTrackInfoIndex[iSector + NSECTORS * iGlobal + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; out << " Track " << j << ": LocalId " << (iGlobal ? (trk.LocalTrackId() >> 24) : -1) << "/" << (iGlobal ? (trk.LocalTrackId() & 0xFFFFFF) : -1) << " X " << trk.X() << " offsetz " << trk.TZOffset() << " A " << trk.Alpha() << " Y " << trk.Y() << " Z " << trk.Z() << " SinPhi " << trk.SinPhi() << " CosPhi " << trk.CosPhi() << " SecPhi " << trk.SecPhi() << " Tgl " << trk.DzDs() << " QPt " << trk.QPt() << "\n"; } } @@ -59,29 +59,29 @@ void GPUTPCGMMerger::DumpSliceTracks(std::ostream& out) const out << std::setprecision(ss); } -void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSlice, int32_t mergeMode) const +void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, int32_t mergeMode) const { - int32_t n = withinSlice == -1 ? NSLICES / 2 : NSLICES; + int32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; for (int32_t i = 0; i < n; i++) { int32_t n1, n2; GPUTPCGMBorderTrack *b1, *b2; - int32_t jSlice; - MergeBorderTracksSetup(n1, n2, b1, b2, jSlice, i, withinSlice, mergeMode); - const int32_t nTrk = *mRec->GetConstantMem().tpcTrackers[jSlice].NTracks(); + int32_t jSector; + MergeBorderTracksSetup(n1, n2, b1, b2, jSector, i, withinSector, mergeMode); + const int32_t nTrk = *mRec->GetConstantMem().tpcTrackers[jSector].NTracks(); const gputpcgmmergertypes::GPUTPCGMBorderRange* range1 = BorderRange(i); - const gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = BorderRange(jSlice) + nTrk; - out << "\nBorder Tracks : i " << i << " withinSlice " << withinSlice << " mergeMode " << mergeMode << "\n"; + const gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = BorderRange(jSector) + nTrk; + out << "\nBorder Tracks : i " << i << " withinSector " << withinSector << " mergeMode " << mergeMode << "\n"; for (int32_t k = 0; k < n1; k++) { out << " " << k << ": t " << b1[k].TrackID() << " ncl " << b1[k].NClusters() << " row " << (mergeMode > 0 ? b1[k].Row() : -1) << " par " << b1[k].Par()[0] << " " << b1[k].Par()[1] << " " << b1[k].Par()[2] << " " << b1[k].Par()[3] << " " << b1[k].Par()[4] << " offset " << b1[k].ZOffsetLinear() << " cov " << b1[k].Cov()[0] << " " << b1[k].Cov()[1] << " " << b1[k].Cov()[2] << " " << b1[k].Cov()[3] << " " << b1[k].Cov()[4] << " covd " << b1[k].CovD()[0] << " " << b1[k].CovD()[1] << "\n"; } - if (i != jSlice) { + if (i != jSector) { for (int32_t k = 0; k < n2; k++) { out << " " << k << ": t " << b2[k].TrackID() << " ncl " << b2[k].NClusters() << " row " << (mergeMode > 0 ? b2[k].Row() : -1) << " par " << b2[k].Par()[0] << " " << b2[k].Par()[1] << " " << b2[k].Par()[2] << " " << b2[k].Par()[3] << " " << b2[k].Par()[4] << " offset " << b2[k].ZOffsetLinear() << " cov " << b2[k].Cov()[0] << " " << b2[k].Cov()[1] << " " << b2[k].Cov()[2] << " " << b2[k].Cov()[3] << " " << b2[k].Cov()[4] << " covd " << b2[k].CovD()[0] << " " << b2[k].CovD()[1] << "\n"; } } - out << "\nBorder Range : i " << i << " withinSlice " << withinSlice << " mergeMode " << mergeMode << "\n"; + out << "\nBorder Range : i " << i << " withinSector " << withinSector << " mergeMode " << mergeMode << "\n"; for (int32_t k = 0; k < n1; k++) { out << " " << k << ": " << range1[k].fId << " " << range1[k].fMin << " " << range1[k].fMax << "\n"; } @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSlice, int void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SliceTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -102,13 +102,13 @@ void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* } } -void GPUTPCGMMerger::DumpMergedWithinSlices(std::ostream& out) const +void GPUTPCGMMerger::DumpMergedWithinSectors(std::ostream& out) const { - DumpTrackLinks(out, false, "within Slices"); - out << "\nTPC Merger Merge Within Slices\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (int32_t j = mSliceTrackInfoIndex[iSlice]; j < mSliceTrackInfoIndex[iSlice + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + DumpTrackLinks(out, false, "within Sectors"); + out << "\nTPC Merger Merge Within Sectors\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (int32_t j = mSectorTrackInfoIndex[iSector]; j < mSectorTrackInfoIndex[iSector + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; if (trk.NextSegmentNeighbour() >= 0 || trk.PrevSegmentNeighbour() >= 0) { out << " Track " << j << ": Neighbour " << trk.PrevSegmentNeighbour() << " / " << trk.NextSegmentNeighbour() << "\n"; } @@ -116,13 +116,13 @@ void GPUTPCGMMerger::DumpMergedWithinSlices(std::ostream& out) const } } -void GPUTPCGMMerger::DumpMergedBetweenSlices(std::ostream& out) const +void GPUTPCGMMerger::DumpMergedBetweenSectors(std::ostream& out) const { - DumpTrackLinks(out, false, "between Slices"); - out << "\nTPC Merger Merge Between Slices\n"; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - for (int32_t j = mSliceTrackInfoIndex[iSlice]; j < mSliceTrackInfoIndex[iSlice + 1]; j++) { - const auto& trk = mSliceTrackInfos[j]; + DumpTrackLinks(out, false, "between Sectors"); + out << "\nTPC Merger Merge Between Sectors\n"; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (int32_t j = mSectorTrackInfoIndex[iSector]; j < mSectorTrackInfoIndex[iSector + 1]; j++) { + const auto& trk = mSectorTrackInfos[j]; if (trk.NextNeighbour() >= 0 || trk.PrevNeighbour() >= 0) { out << " Track " << j << ": Neighbour " << trk.PrevNeighbour() << " / " << trk.NextNeighbour() << "\n"; } @@ -242,24 +242,24 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const } template -inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const +inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const { #ifdef DEBUG_STREAMER std::vector hits1(152), hits2(152); for (int32_t i = 0; i < 152; i++) { hits1[i] = hits2[i] = -1; } - const GPUTPCTracker& tracker1 = GetConstantMem()->tpcTrackers[slice1]; - const GPUTPCGMSliceTrack& sliceTrack1 = mSliceTrackInfos[b1.TrackID()]; - const GPUTPCTrack& inTrack1 = *sliceTrack1.OrigTrack(); + const GPUTPCTracker& tracker1 = GetConstantMem()->tpcTrackers[sector1]; + const GPUTPCGMSectorTrack& sectorTrack1 = mSectorTrackInfos[b1.TrackID()]; + const GPUTPCTrack& inTrack1 = *sectorTrack1.OrigTrack(); for (int32_t i = 0; i < inTrack1.NHits(); i++) { const GPUTPCHitId& ic1 = tracker1.TrackHits()[inTrack1.FirstHitID() + i]; int32_t clusterIndex = tracker1.Data().ClusterDataIndex(tracker1.Data().Row(ic1.RowIndex()), ic1.HitIndex()); hits1[ic1.RowIndex()] = clusterIndex; } - const GPUTPCTracker& tracker2 = GetConstantMem()->tpcTrackers[slice2]; - const GPUTPCGMSliceTrack& sliceTrack2 = mSliceTrackInfos[b2.TrackID()]; - const GPUTPCTrack& inTrack2 = *sliceTrack2.OrigTrack(); + const GPUTPCTracker& tracker2 = GetConstantMem()->tpcTrackers[sector2]; + const GPUTPCGMSectorTrack& sectorTrack2 = mSectorTrackInfos[b2.TrackID()]; + const GPUTPCTrack& inTrack2 = *sectorTrack2.OrigTrack(); for (int32_t i = 0; i < inTrack2.NHits(); i++) { const GPUTPCHitId& ic2 = tracker2.TrackHits()[inTrack2.FirstHitID() + i]; int32_t clusterIndex = tracker2.Data().ClusterDataIndex(tracker2.Data().Row(ic2.RowIndex()), ic2.HitIndex()); @@ -268,17 +268,17 @@ inline void GPUTPCGMMerger::MergedTrackStreamerInternal(const GPUTPCGMBorderTrac std::string debugname = std::string("debug_") + name; std::string treename = std::string("tree_") + name; - o2::utils::DebugStreamer::instance()->getStreamer(debugname.c_str(), "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName(treename.c_str()).data() << "slice1=" << slice1 << "slice2=" << slice2 << "b1=" << b1 << "b2=" << b2 << "clusters1=" << hits1 << "clusters2=" << hits2 << "sliceTrack1=" << sliceTrack1 << "sliceTrack2=" << sliceTrack2 << "mergeMode=" << mergeMode << "weight=" << weight << "fraction=" << frac << "\n"; + o2::utils::DebugStreamer::instance()->getStreamer(debugname.c_str(), "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName(treename.c_str()).data() << "sector1=" << sector1 << "sector2=" << sector2 << "b1=" << b1 << "b2=" << b2 << "clusters1=" << hits1 << "clusters2=" << hits2 << "sectorTrack1=" << sectorTrack1 << "sectorTrack2=" << sectorTrack2 << "mergeMode=" << mergeMode << "weight=" << weight << "fraction=" << frac << "\n"; #endif } -void GPUTPCGMMerger::MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t slice1, int32_t slice2, int32_t mergeMode, float weight, float frac) const +void GPUTPCGMMerger::MergedTrackStreamer(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const { #ifdef DEBUG_STREAMER if (mergeMode == 0) { - MergedTrackStreamerInternal<0>(b1, b2, name, slice1, slice2, mergeMode, weight, frac); + MergedTrackStreamerInternal<0>(b1, b2, name, sector1, sector2, mergeMode, weight, frac); } else if (mergeMode >= 1 && mergeMode <= 0) { - // MergedTrackStreamerInternal<1>(b1, b2, name, slice1, slice2, mergeMode, weight, frac); Not yet working + // MergedTrackStreamerInternal<1>(b1, b2, name, sector1, sector2, mergeMode, weight, frac); Not yet working } #endif } @@ -323,7 +323,7 @@ void GPUTPCGMMerger::DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) con } } -std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSlice, int32_t iRow, float time) const +std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSector, int32_t iRow, float time) const { static int32_t size = getenv("O2_DEBUG_STREAMER_OCCUPANCY_NBINS") ? atoi(getenv("O2_DEBUG_STREAMER_OCCUPANCY_NBINS")) : Param().rec.tpc.occupancyMapTimeBinsAverage; std::vector retVal(1 + 2 * size); @@ -337,11 +337,11 @@ std::vector GPUTPCGMMerger::StreamerOccupancyBin(int32_t iSlice, int32 return retVal; } -std::vector GPUTPCGMMerger::StreamerUncorrectedZY(int32_t iSlice, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const +std::vector GPUTPCGMMerger::StreamerUncorrectedZY(int32_t iSector, int32_t iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const { std::vector retVal(2); #ifdef DEBUG_STREAMER - GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(iSlice, iRow, track.GetY(), track.GetZ(), retVal[0], retVal[1]); + GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(iSector, iRow, track.GetY(), track.GetZ(), retVal[0], retVal[1]); #endif return retVal; } @@ -350,12 +350,12 @@ void GPUTPCGMMerger::DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, f { #ifdef DEBUG_STREAMER float time = clusterNative.getTime(); - auto occupancyBins = StreamerOccupancyBin(cluster.slice, cluster.row, time); - auto uncorrectedYZ = StreamerUncorrectedZY(cluster.slice, cluster.row, track, prop); + auto occupancyBins = StreamerOccupancyBin(cluster.sector, cluster.row, time); + auto uncorrectedYZ = StreamerUncorrectedZY(cluster.sector, cluster.row, track, prop); float invCharge = 1.f / clusterNative.qMax; int32_t iRow = cluster.row; float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / Param().tpcGeometry.Row2X(iRow) : 0.f); - const float clAlpha = Param().Alpha(cluster.slice); + const float clAlpha = Param().Alpha(cluster.sector); uint32_t occupancyTotal = Param().occupancyTotal; o2::utils::DebugStreamer::instance()->getStreamer("debug_update_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_update_track").data() << "iTrk=" << iTrk diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 4f654c0fa7beb..d72d59a6250e7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -37,21 +37,21 @@ GPUdii() void GPUTPCGMMergerFollowLoopers::Thread<0>(int32_t nBlocks, int32_t nT } template <> -GPUdii() void GPUTPCGMMergerUnpackResetIds::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerUnpackResetIds::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.UnpackResetIds(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.UnpackResetIds(nBlocks, nThreads, iBlock, iThread, iSector); } template <> -GPUdii() void GPUTPCGMMergerSliceRefit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerSectorRefit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.RefitSliceTracks(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.RefitSectorTracks(nBlocks, nThreads, iBlock, iThread, iSector); } template <> -GPUdii() void GPUTPCGMMergerUnpackGlobal::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice) +GPUdii() void GPUTPCGMMergerUnpackGlobal::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector) { - merger.UnpackSliceGlobal(nBlocks, nThreads, iBlock, iThread, iSlice); + merger.UnpackSectorGlobal(nBlocks, nThreads, iBlock, iThread, iSector); } template <> @@ -89,7 +89,7 @@ GPUdii() void GPUTPCGMMergerResolve::Thread<3>(int32_t nBlocks, int32_t nThreads template <> GPUdii() void GPUTPCGMMergerResolve::Thread<4>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t useOrigTrackParam, int8_t mergeAll) { - merger.ResolveMergeSlices(smem, nBlocks, nThreads, iBlock, iThread, useOrigTrackParam, mergeAll); + merger.ResolveMergeSectors(smem, nBlocks, nThreads, iBlock, iThread, useOrigTrackParam, mergeAll); } template <> @@ -101,13 +101,13 @@ GPUdii() void GPUTPCGMMergerClearLinks::Thread<0>(int32_t nBlocks, int32_t nThre template <> GPUdii() void GPUTPCGMMergerMergeWithinPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - merger.MergeWithinSlicesPrepare(nBlocks, nThreads, iBlock, iThread); + merger.MergeWithinSectorsPrepare(nBlocks, nThreads, iBlock, iThread); } template <> -GPUdii() void GPUTPCGMMergerMergeSlicesPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t border0, int32_t border1, int8_t useOrigTrackParam) +GPUdii() void GPUTPCGMMergerMergeSectorsPrepare::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t border0, int32_t border1, int8_t useOrigTrackParam) { - merger.MergeSlicesPrepare(nBlocks, nThreads, iBlock, iThread, border0, border1, useOrigTrackParam); + merger.MergeSectorsPrepare(nBlocks, nThreads, iBlock, iThread, border0, border1, useOrigTrackParam); } template @@ -116,14 +116,14 @@ GPUdii() void GPUTPCGMMergerMergeBorders::Thread(int32_t nBlocks, int32_t nThrea merger.MergeBorderTracks(nBlocks, nThreads, iBlock, iThread, args...); } #if !defined(GPUCA_GPUCODE) || defined(GPUCA_GPUCODE_DEVICE) // FIXME: DR: WORKAROUND to avoid CUDA bug creating host symbols for device code. -template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); -template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode); +template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode); +template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<2>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode); template GPUdni() void GPUTPCGMMergerMergeBorders::Thread<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, gputpcgmmergertypes::GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax); #endif template <> -GPUdii() void GPUTPCGMMergerMergeBorders::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSlice, int8_t withinSlice, int8_t mergeMode) +GPUdii() void GPUTPCGMMergerMergeBorders::Thread<1>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t iSector, int8_t withinSector, int8_t mergeMode) { - merger.MergeBorderTracks<1>(2, nThreads, iBlock & 1, iThread, iBlock / 2, withinSlice, mergeMode); + merger.MergeBorderTracks<1>(2, nThreads, iBlock & 1, iThread, iBlock / 2, withinSector, mergeMode); } template <> diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index dec9befa25ce2..63592eb09eb3d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -48,18 +48,18 @@ class GPUTPCGMMergerFollowLoopers : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerSliceRefit : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerSectorRefit : public GPUTPCGMMergerGeneral { public: template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSector); }; class GPUTPCGMMergerUnpackGlobal : public GPUTPCGMMergerGeneral { public: template - GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSlice); + GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger, int32_t iSector); }; class GPUTPCGMMergerUnpackSaveNumber : public GPUTPCGMMergerGeneral @@ -100,7 +100,7 @@ class GPUTPCGMMergerMergeWithinPrepare : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -class GPUTPCGMMergerMergeSlicesPrepare : public GPUTPCGMMergerGeneral +class GPUTPCGMMergerMergeSectorsPrepare : public GPUTPCGMMergerGeneral { public: template diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 203968e091014..45293bae9820b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -193,7 +193,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks continue; } int32_t clusterIdGlobal = trackClusters[tracks[i].FirstClusterRef() + j].num; - int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].slice; + int32_t sector = trackClusters[tracks[i].FirstClusterRef() + j].sector; int32_t globalRow = trackClusters[tracks[i].FirstClusterRef() + j].row; int32_t clusterIdInRow = clusterIdGlobal - clusters->clusterOffset[sector][globalRow]; clIndArr[nOutCl2] = clusterIdInRow; @@ -214,11 +214,11 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks if (merger.Param().par.continuousTracking) { time0 = tracks[i].GetParam().GetTZOffset(); if (cce) { - bool lastSide = trackClusters[tracks[i].FirstClusterRef()].slice < MAXSECTOR / 2; + bool lastSide = trackClusters[tracks[i].FirstClusterRef()].sector < MAXSECTOR / 2; float delta = 0.f; for (uint32_t iCl = 1; iCl < tracks[i].NClusters(); iCl++) { auto& cacl1 = trackClusters[tracks[i].FirstClusterRef() + iCl]; - if (lastSide ^ (cacl1.slice < MAXSECTOR / 2)) { + if (lastSide ^ (cacl1.sector < MAXSECTOR / 2)) { auto& cl1 = clusters->clustersLinear[cacl1.num]; auto& cl2 = clusters->clustersLinear[trackClusters[tracks[i].FirstClusterRef() + iCl - 1].num]; delta = CAMath::Abs(cl1.getTime() - cl2.getTime()) * 0.5f; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 9f344a04739fd..e15d6fe8b17bd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -614,8 +614,8 @@ GPUd() void GPUTPCGMPropagator::GetErr2(float& GPUrestrict() err2Y, float& GPUre param.GetClusterErrors2(sector, iRow, posZ, snp, tgl, time, avgCharge, charge, err2Y, err2Z); } param.UpdateClusterError2ByState(clusterState, err2Y, err2Z); - float statErr2 = param.GetSystematicClusterErrorIFC2(trackX, trackY, posZ, sector >= (GPUCA_NSLICES / 2)); - if (sector >= GPUCA_NSLICES / 2 + 1 && sector <= GPUCA_NSLICES / 2 + 2) { + float statErr2 = param.GetSystematicClusterErrorIFC2(trackX, trackY, posZ, sector >= (GPUCA_NSECTORS / 2)); + if (sector >= GPUCA_NSECTORS / 2 + 1 && sector <= GPUCA_NSECTORS / 2 + 2) { statErr2 += param.GetSystematicClusterErrorC122(trackX, trackY, sector); } err2Y += statErr2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx similarity index 85% rename from GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx rename to GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index 6c8641517b80d..a439e6e653039 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -9,12 +9,12 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGMSliceTrack.cxx +/// \file GPUTPCGMSectorTrack.cxx /// \author Sergey Gorbunov, David Rohr #include "GPUParam.h" #include "GPUTPCGMBorderTrack.h" -#include "GPUTPCGMSliceTrack.h" +#include "GPUTPCGMSectorTrack.h" #include "GPUO2DataTypes.h" #include "GPUTPCGMMerger.h" #include "GPUTPCConvertImpl.h" @@ -23,10 +23,10 @@ using namespace o2::gpu; using namespace o2::tpc; -GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sliceTr, float alpha, int32_t slice) +GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector) { - const GPUTPCBaseTrackParam& t = sliceTr->Param(); - mOrigTrack = sliceTr; + const GPUTPCBaseTrackParam& t = sectorTr->Param(); + mOrigTrack = sectorTr; mParam.mX = t.GetX(); mParam.mY = t.GetY(); mParam.mZ = t.GetZ(); @@ -36,18 +36,18 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMMerger* merger, const GPUTPCTr mParam.mCosPhi = CAMath::Sqrt(1.f - mParam.mSinPhi * mParam.mSinPhi); mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; - mSlice = slice; + mSector = sector; if (merger->Param().par.earlyTpcTransform) { mTZOffset = t.GetZOffset(); } else { - mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(slice, t.GetZOffset(), merger->Param().continuousMaxTimeBin); + mTZOffset = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convZOffsetToVertexTime(sector, t.GetZOffset(), merger->Param().continuousMaxTimeBin); } - mNClusters = sliceTr->NHits(); + mNClusters = sectorTr->NHits(); } -GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sliceTr, float alpha, int32_t slice) +GPUd() void GPUTPCGMSectorTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector) { - mOrigTrack = sliceTr; + mOrigTrack = sectorTr; mParam.mX = trk.GetX(); mParam.mY = trk.GetY(); mParam.mZ = trk.GetZ(); @@ -57,9 +57,9 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCT mParam.mCosPhi = CAMath::Sqrt(1.f - mParam.mSinPhi * mParam.mSinPhi); mParam.mSecPhi = 1.f / mParam.mCosPhi; mAlpha = alpha; - mSlice = slice; + mSector = sector; mTZOffset = trk.GetTZOffset(); - mNClusters = sliceTr->NHits(); + mNClusters = sectorTr->NHits(); mParam.mC0 = trk.GetCov(0); mParam.mC2 = trk.GetCov(2); mParam.mC3 = trk.GetCov(3); @@ -71,7 +71,7 @@ GPUd() void GPUTPCGMSliceTrack::Set(const GPUTPCGMTrackParam& trk, const GPUTPCT mParam.mC14 = trk.GetCov(14); } -GPUd() void GPUTPCGMSliceTrack::SetParam2(const GPUTPCGMTrackParam& trk) +GPUd() void GPUTPCGMSectorTrack::SetParam2(const GPUTPCGMTrackParam& trk) { mParam2.mX = trk.GetX(); mParam2.mY = trk.GetY(); @@ -92,18 +92,18 @@ GPUd() void GPUTPCGMSliceTrack::SetParam2(const GPUTPCGMTrackParam& trk) mParam2.mC14 = trk.GetCov(14); } -GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSlice, float maxSinPhi, float sinPhiMargin) +GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi, float sinPhiMargin) { float lastX; // float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks float y, z; int32_t row, index; - const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSlice]; + const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSector]; const GPUTPCHitId& ic = trk.TrackHits()[mOrigTrack->FirstHitID() + mOrigTrack->NHits() - 1]; - index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSlice][0]; + index = trk.Data().ClusterDataIndex(trk.Data().Row(ic.RowIndex()), ic.HitIndex()) + merger->GetConstantMem()->ioPtrs.clustersNative->clusterOffset[iSector][0]; row = ic.RowIndex(); const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[index]; - GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSlice, row, cl.getPad(), cl.getTime(), lastX, y, z); + GPUTPCConvertImpl::convert(*merger->GetConstantMem(), iSector, row, cl.getPad(), cl.getTime(), lastX, y, z); const int32_t N = 3; @@ -116,10 +116,10 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 float kdx205 = 2.f + kdx * kdx * 0.5f; { - merger->Param().GetClusterErrors2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, mParam.mC0, mParam.mC2); // TODO: provide correct time and row + merger->Param().GetClusterErrors2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, mParam.mC0, mParam.mC2); // TODO: provide correct time and row #ifndef GPUCA_TPC_GEOMETRY_O2 float C0a, C2a; - merger->Param().GetClusterErrorsSeeding2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); + merger->Param().GetClusterErrorsSeeding2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); if (C0a > mParam.mC0) { mParam.mC0 = C0a; } @@ -176,10 +176,10 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 float dz = dS * mParam.mDzDs; float ex1i = 1.f / ex1; { - merger->Param().GetClusterErrors2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Provide correct time / row + merger->Param().GetClusterErrors2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Provide correct time / row #ifndef GPUCA_TPC_GEOMETRY_O2 float C0a, C2a; - merger->Param().GetClusterErrorsSeeding2(iSlice, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); + merger->Param().GetClusterErrorsSeeding2(iSector, 0, mParam.mZ, mParam.mSinPhi, mParam.mDzDs, -1.f, C0a, C2a); if (C0a > err2Y) { err2Y = C0a; } @@ -274,7 +274,7 @@ GPUd() bool GPUTPCGMSliceTrack::FilterErrors(const GPUTPCGMMerger* merger, int32 return ok; } -GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const +GPUd() bool GPUTPCGMSectorTrack::TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov) const { Bz = -Bz; float ex = mParam.mCosPhi; @@ -318,7 +318,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, fl if (merger->Param().par.earlyTpcTransform) { b.SetZOffsetLinear(mTZOffset); } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSlice, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); } if (!doCov) { @@ -366,7 +366,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToX(GPUTPCGMMerger* merger, float x, fl return 1; } -GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float newX, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const +GPUd() bool GPUTPCGMSectorTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float newX, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const { //* @@ -474,7 +474,7 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float if (merger->Param().par.earlyTpcTransform) { b.SetZOffsetLinear(mTZOffset); } else { - b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSlice, mTZOffset, merger->Param().continuousMaxTimeBin)); + b.SetZOffsetLinear(merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(mSector, mTZOffset, merger->Param().continuousMaxTimeBin)); } b.SetCov(0, c00 + h2 * h2c22 + h4 * h4c44 + 2.f * (h2 * c20ph4c42 + h4 * c40)); @@ -490,9 +490,9 @@ GPUd() bool GPUTPCGMSliceTrack::TransportToXAlpha(GPUTPCGMMerger* merger, float return 1; } -GPUd() void GPUTPCGMSliceTrack::CopyBaseTrackCov() +GPUd() void GPUTPCGMSectorTrack::CopyBaseTrackCov() { - const float* GPUrestrict() cov = mOrigTrack->Param().mC; + const float* GPUrestrict() cov = mOrigTrack -> Param().mC; mParam.mC0 = cov[0]; mParam.mC2 = cov[2]; mParam.mC3 = cov[3]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h similarity index 84% rename from GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h rename to GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 47841a616a13e..924100c435fd9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSliceTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCGMSliceTrack.h +/// \file GPUTPCGMSectorTrack.h /// \author Sergey Gorbunov, David Rohr -#ifndef GPUTPCGMSLICETRACK_H -#define GPUTPCGMSLICETRACK_H +#ifndef GPUTPCGMSECTORTRACK_H +#define GPUTPCGMSECTORTRACK_H #include "GPUTPCTrack.h" #include "GPUTPCGMTrackParam.h" @@ -25,17 +25,17 @@ namespace o2 namespace gpu { /** - * @class GPUTPCGMSliceTrack + * @class GPUTPCGMSectorTrack * - * The class describes TPC slice tracks used in GPUTPCGMMerger + * The class describes TPC sector tracks used in GPUTPCGMMerger */ class GPUTPCGMMerger; -class GPUTPCGMSliceTrack +class GPUTPCGMSectorTrack { public: GPUd() float Alpha() const { return mAlpha; } - GPUd() uint8_t Slice() const { return mSlice; } - GPUd() bool CSide() const { return mSlice >= 18; } + GPUd() uint8_t Sector() const { return mSector; } + GPUd() bool CSide() const { return mSector >= 18; } GPUd() int32_t NClusters() const { return mNClusters; } GPUd() int32_t PrevNeighbour() const { return mNeighbour[0]; } GPUd() int32_t NextNeighbour() const { return mNeighbour[1]; } @@ -75,9 +75,9 @@ class GPUTPCGMSliceTrack mClusterZT[1] = v2; } - GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sliceTr, float alpha, int32_t slice); + GPUd() void Set(const GPUTPCGMTrackParam& trk, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); GPUd() void SetParam2(const GPUTPCGMTrackParam& trk); - GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sliceTr, float alpha, int32_t slice); + GPUd() void Set(const GPUTPCGMMerger* merger, const GPUTPCTrack* sectorTr, float alpha, int32_t sector); GPUd() void UseParam2() { mParam = mParam2; } GPUd() void SetX2(float v) { mParam2.mX = v; } GPUd() float X2() const { return mParam2.mX; } @@ -103,7 +103,7 @@ class GPUTPCGMSliceTrack GPUd() void SetNextSegmentNeighbour(int32_t v) { mSegmentNeighbour[1] = v; } GPUd() void SetLeg(uint8_t v) { mLeg = v; } - GPUd() void CopyParamFrom(const GPUTPCGMSliceTrack& t) + GPUd() void CopyParamFrom(const GPUTPCGMSectorTrack& t) { mParam.mX = t.mParam.mX; mParam.mY = t.mParam.mY; @@ -116,31 +116,31 @@ class GPUTPCGMSliceTrack mAlpha = t.mAlpha; } - GPUd() bool FilterErrors(const GPUTPCGMMerger* merger, int32_t iSlice, float maxSinPhi = GPUCA_MAX_SIN_PHI, float sinPhiMargin = 0.f); + GPUd() bool FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi = GPUCA_MAX_SIN_PHI, float sinPhiMargin = 0.f); GPUd() bool TransportToX(GPUTPCGMMerger* merger, float x, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi, bool doCov = true) const; GPUd() bool TransportToXAlpha(GPUTPCGMMerger* merger, float x, float sinAlpha, float cosAlpha, float Bz, GPUTPCGMBorderTrack& b, float maxSinPhi) const; GPUd() void CopyBaseTrackCov(); - struct sliceTrackParam { + struct sectorTrackParam { float mX, mY, mZ, mSinPhi, mDzDs, mQPt, mCosPhi, mSecPhi; // parameters float mC0, mC2, mC3, mC5, mC7, mC9, mC10, mC12, mC14; // covariances }; private: - const GPUTPCTrack* mOrigTrack; // pointer to original slice track - sliceTrackParam mParam; // Track parameters - sliceTrackParam mParam2; // Parameters at other side + const GPUTPCTrack* mOrigTrack; // pointer to original sector track + sectorTrackParam mParam; // Track parameters + sectorTrackParam mParam2; // Parameters at other side float mTZOffset; // Z offset with early transform, T offset otherwise float mAlpha; // alpha angle float mClusterZT[2]; // Minimum maximum cluster Z / T int32_t mNClusters; // N clusters int32_t mNeighbour[2]; // int32_t mSegmentNeighbour[2]; // - int32_t mLocalTrackId; // Corrected local track id in terms of GMSliceTracks array for extrapolated tracks, UNDEFINED for local tracks! + int32_t mLocalTrackId; // Corrected local track id in terms of GMSectorTracks array for extrapolated tracks, UNDEFINED for local tracks! int32_t mExtrapolatedTrackIds[2]; // IDs of associated extrapolated tracks - uint8_t mSlice; // slice of this track segment + uint8_t mSector; // sector of this track segment uint8_t mLeg; // Leg of this track segment - ClassDefNV(GPUTPCGMSliceTrack, 1); + ClassDefNV(GPUTPCGMSectorTrack, 1); }; } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 790e911a1d865..be1d3803312fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -67,7 +67,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ prop.SetPolynomialField(¶m.polynomialField); prop.SetMaxSinPhi(maxSinPhi); prop.SetToyMCEventsFlag(param.par.toyMCEventsFlag); - if ((clusters[0].slice < 18) == (clusters[N - 1].slice < 18)) { + if ((clusters[0].sector < 18) == (clusters[N - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, N); } if (param.rec.tpc.mergerInterpolateErrors) { @@ -82,7 +82,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float covYYUpd = 0.f; float lastUpdateX = -1.f; uint8_t lastRow = 255; - uint8_t lastSlice = 255; + uint8_t lastSector = 255; uint8_t storeOuter = 0; for (int32_t iWay = 0; iWay < nWays; iWay++) { @@ -124,9 +124,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool noFollowCircle = false, noFollowCircle2 = false; int32_t goodRows = 0; for (int32_t ihit = ihitStart; ihit >= 0 && ihit < maxN; ihit += wayDirection) { - const bool crossCE = lastSlice != 255 && ((lastSlice < 18) ^ (clusters[ihit].slice < 18)); + const bool crossCE = lastSector != 255 && ((lastSector < 18) ^ (clusters[ihit].sector < 18)); if (crossCE) { - lastSlice = clusters[ihit].slice; + lastSector = clusters[ihit].sector; noFollowCircle2 = true; } @@ -151,19 +151,19 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ int32_t ihitMergeFirst = ihit; uint8_t clusterState = clusters[ihit].state; - const float clAlpha = param.Alpha(clusters[ihit].slice); + const float clAlpha = param.Alpha(clusters[ihit].sector); float xx, yy, zz; if (param.par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].slice < 18) == (clusters[0].slice < 18) ? mTZOffset : -mTZOffset; + const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; xx = clustersXYZ[ihit].x; yy = clustersXYZ[ihit].y; zz = clustersXYZ[ihit].z - zOffset; } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].slice, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), xx, yy, zz, mTZOffset); } // clang-format off - CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f (Missed %d)\n", ihit, maxN, (int32_t)clusters[ihit].row, clAlpha, (int32_t)clusters[ihit].slice, xx, yy, zz, nMissed)); + CADEBUG(printf("\tHit %3d/%3d Row %3d: Cluster Alpha %8.3f %3d, X %8.3f - Y %8.3f, Z %8.3f (Missed %d)\n", ihit, maxN, (int32_t)clusters[ihit].row, clAlpha, (int32_t)clusters[ihit].sector, xx, yy, zz, nMissed)); // CADEBUG(if ((uint32_t)merger->GetTrackingChain()->mIOPtrs.nMCLabelsTPC > clusters[ihit].num)) // CADEBUG({printf(" MC:"); for (int32_t i = 0; i < 3; i++) {int32_t mcId = merger->GetTrackingChain()->mIOPtrs.mcLabelsTPC[clusters[ihit].num].fClusterID[i].fMCID; if (mcId >= 0) printf(" %d", mcId); } } printf("\n")); // clang-format on @@ -186,14 +186,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool changeDirection = (cluster.leg - lastLeg) & 1; // clang-format off CADEBUG(if (changeDirection) printf("\t\tChange direction\n")); - CADEBUG(printf("\tLeg %3d Slice %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.slice, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); + CADEBUG(printf("\tLeg %3d Sector %2d %4sTrack Alpha %8.3f %s, X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) %28s --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", (int32_t)cluster.leg, (int32_t)cluster.sector, "", prop.GetAlpha(), (CAMath::Abs(prop.GetAlpha() - clAlpha) < 0.01 ? " " : " R!"), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), "", sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10])); // clang-format on if (allowModification && changeDirection && !noFollowCircle && !noFollowCircle2) { bool tryFollow = lastRow != 255; if (tryFollow) { const GPUTPCGMTrackParam backup = *this; const float backupAlpha = prop.GetAlpha(); - if (FollowCircle<0>(merger, prop, lastSlice, lastRow, iTrk, clAlpha, xx, yy, cluster.slice, cluster.row, inFlyDirection)) { + if (FollowCircle<0>(merger, prop, lastSector, lastRow, iTrk, clAlpha, xx, yy, cluster.sector, cluster.row, inFlyDirection)) { CADEBUG(printf("Error during follow circle, resetting track!\n")); *this = backup; prop.SetTrack(this, backupAlpha); @@ -202,10 +202,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } if (tryFollow) { - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, false, cluster.slice); + MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, false, cluster.sector); lastUpdateX = mX; lastLeg = cluster.leg; - lastSlice = cluster.slice; + lastSector = cluster.sector; lastRow = 255; N++; resetT0 = initResetT0(); @@ -217,7 +217,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.slice, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection, param); } @@ -234,14 +234,14 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ err = prop.PropagateToXAlpha(xx, clAlpha, inFlyDirection); } } - if (lastRow == 255 || CAMath::Abs((int32_t)lastRow - (int32_t)cluster.row) > 5 || lastSlice != cluster.slice || (param.rec.tpc.trackFitRejectMode < 0 && -nMissed <= param.rec.tpc.trackFitRejectMode)) { + if (lastRow == 255 || CAMath::Abs((int32_t)lastRow - (int32_t)cluster.row) > 5 || lastSector != cluster.sector || (param.rec.tpc.trackFitRejectMode < 0 && -nMissed <= param.rec.tpc.trackFitRejectMode)) { goodRows = 0; } else { goodRows++; } if (err == 0) { lastRow = cluster.row; - lastSlice = cluster.slice; + lastSector = cluster.sector; } // clang-format off CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f - Err %d", "", prop.GetAlpha(), mX, mP[0], mP[1], mP[4], prop.GetQPt0(), mP[2], prop.GetSinPhi0(), mP[0] - yy, mP[1] - zz, sqrtf(mC[0]), sqrtf(mC[2]), sqrtf(mC[5]), sqrtf(mC[14]), mC[10], err)); @@ -265,9 +265,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (CAMath::Abs(yy - mP[0]) > CAMath::Abs(yy - mirrordY)) { CADEBUG(printf(" - Mirroring!!!")); if (allowModification) { - AttachClustersMirror<0>(merger, cluster.slice, cluster.row, iTrk, yy, prop); // TODO: Never true, will always call FollowCircle above, really??? + AttachClustersMirror<0>(merger, cluster.sector, cluster.row, iTrk, yy, prop); // TODO: Never true, will always call FollowCircle above, really??? } - MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.slice); + MirrorTo(prop, yy, zz, inFlyDirection, param, cluster.row, clusterState, true, cluster.sector); noFollowCircle = false; lastUpdateX = mX; @@ -285,7 +285,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ float uncorrectedY = -1e6f; if (allowModification) { - uncorrectedY = AttachClusters(merger, cluster.slice, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop); + uncorrectedY = AttachClusters(merger, cluster.sector, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop); } const int32_t err2 = mNDF > 0 && CAMath::Abs(prop.GetSinPhi0()) >= maxSinForUpdate; @@ -335,7 +335,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ const float invCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? (1.f / merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; float invAvgCharge = (sumInvSqrtCharge += invSqrtCharge) / ++nAvgCharge; invAvgCharge *= invAvgCharge; - retVal = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.slice, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); + retVal = prop.Update(yy, zz, cluster.row, param, clusterState, rejectChi2, &interpolation.hit[ihit], refit, cluster.sector, time, invAvgCharge, invCharge GPUCA_DEBUG_STREAMER_CHECK(, &debugVals)); } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamUpdateTrack, iTrk)) { merger->DebugStreamerUpdate(iTrk, ihit, xx, yy, zz, cluster, merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num], *this, prop, interpolation.hit[ihit], rejectChi2, refit, retVal, sumInvSqrtCharge / nAvgCharge * sumInvSqrtCharge / nAvgCharge, yy, zz, clusterState, debugVals.retVal, debugVals.err2Y, debugVals.err2Z); @@ -384,7 +384,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ pad /= clusterCount; relTime /= clusterCount; relTime = relTime - CAMath::Round(relTime); - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.slice, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } else if (retVal >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track if (allowModification) { @@ -398,7 +398,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (((nWays - iWay) & 1) && (clusters[0].slice < 18) == (clusters[maxN - 1].slice < 18)) { + if (((nWays - iWay) & 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, maxN); } } @@ -486,9 +486,9 @@ GPUd() void GPUTPCGMTrackParam::MirrorTo(GPUTPCGMPropagator& GPUrestrict() prop, GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* GPUrestrict() clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* GPUrestrict() merger, GPUTPCGMPropagator& GPUrestrict() prop, float& GPUrestrict() xx, float& GPUrestrict() yy, float& GPUrestrict() zz, int32_t maxN, float clAlpha, uint8_t& GPUrestrict() clusterState, bool rejectChi2) { - if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].slice == clusters[ihit + wayDirection].slice && clusters[ihit].leg == clusters[ihit + wayDirection].leg) { + if (ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg) { float maxDistY, maxDistZ; - prop.GetErr2(maxDistY, maxDistZ, merger->Param(), zz, clusters[ihit].row, 0, clusters[ihit].slice, -1.f, 0.f, 0.f); // TODO: Use correct time, avgCharge + prop.GetErr2(maxDistY, maxDistZ, merger->Param(), zz, clusters[ihit].row, 0, clusters[ihit].sector, -1.f, 0.f, 0.f); // TODO: Use correct time, avgCharge maxDistY = (maxDistY + mC[0]) * 20.f; maxDistZ = (maxDistZ + mC[2]) * 20.f; int32_t noReject = 0; // Cannot reject if simple estimation of y/z fails (extremely unlike case) @@ -505,7 +505,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t while (true) { float clx, cly, clz, clamp; if (merger->Param().par.earlyTpcTransform) { - const float zOffset = (clusters[ihit].slice < 18) == (clusters[0].slice < 18) ? mTZOffset : -mTZOffset; + const float zOffset = (clusters[ihit].sector < 18) == (clusters[0].sector < 18) ? mTZOffset : -mTZOffset; clx = clustersXYZ[ihit].x; cly = clustersXYZ[ihit].y; clz = clustersXYZ[ihit].z - zOffset; @@ -513,7 +513,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[clusters[ihit].num]; clamp = cl.qTot; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].slice, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(clusters[ihit].sector, clusters[ihit].row, cl.getPad(), cl.getTime(), clx, cly, clz, mTZOffset); } float dy = cly - projY; float dz = clz - projZ; @@ -530,7 +530,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t clusterState |= clusters[ihit].state; count += clamp; } - if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].slice == clusters[ihit + wayDirection].slice && clusters[ihit].leg == clusters[ihit + wayDirection].leg)) { + if (!(ihit + wayDirection >= 0 && ihit + wayDirection < maxN && clusters[ihit].row == clusters[ihit + wayDirection].row && clusters[ihit].sector == clusters[ihit + wayDirection].sector && clusters[ihit].leg == clusters[ihit + wayDirection].leg)) { break; } ihit += wayDirection; @@ -546,7 +546,7 @@ GPUd() int32_t GPUTPCGMTrackParam::MergeDoubleRowClusters(int32_t& ihit, int32_t return 0; } -GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop) +GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop) { float Y, Z; if (Merger->Param().par.earlyTpcTransform) { @@ -554,21 +554,21 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric Z = mP[1]; } else { float X = 0; - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(slice, iRow, mP[0], mP[1], X); + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(sector, iRow, mP[0], mP[1], X); if (prop.GetPropagatedYZ(X, Y, Z)) { Y = mP[0]; Z = mP[1]; } } - return AttachClusters(Merger, slice, iRow, iTrack, goodLeg, Y, Z); + return AttachClusters(Merger, sector, iRow, iTrack, goodLeg, Y, Z); } -GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z) +GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z) { if (Merger->Param().rec.tpc.disableRefitAttachment & 1) { return -1e6f; } - const GPUTPCTracker& GPUrestrict() tracker = *(Merger->GetConstantMem()->tpcTrackers + slice); + const GPUTPCTracker& GPUrestrict() tracker = *(Merger -> GetConstantMem()->tpcTrackers + sector); const GPUTPCRow& GPUrestrict() row = tracker.Row(iRow); #ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); @@ -578,7 +578,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (slice >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(slice, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -586,7 +586,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric int32_t bin, ny, nz; float err2Y, err2Z; - Merger->Param().GetClusterErrors2(slice, iRow, Z, mP[2], mP[3], -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge + Merger->Param().GetClusterErrors2(sector, iRow, Z, mP[2], mP[3], -1.f, 0.f, 0.f, err2Y, err2Z); // TODO: Use correct time/avgCharge const float sy2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Y + CAMath::Abs(mC[0]))); // Cov can be bogus when following circle const float sz2 = CAMath::Min(Merger->Param().rec.tpc.tubeMaxSize2, Merger->Param().rec.tpc.tubeChi2 * (err2Z + CAMath::Abs(mC[2]))); // In that case we should provide the track error externally const float tubeY = CAMath::Sqrt(sy2); @@ -598,7 +598,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric uncorrectedY = Y; uncorrectedZ = Z; } else { - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, Y, Z, uncorrectedY, uncorrectedZ); + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, Y, Z, uncorrectedY, uncorrectedZ); } if (CAMath::Abs(uncorrectedY) > row.getTPCMaxY()) { @@ -640,7 +640,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return uncorrectedY; } -GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& GPUrestrict() prop, bool inFlyDirection, float maxSinPhi, bool dodEdx) +GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& GPUrestrict() prop, bool inFlyDirection, float maxSinPhi, bool dodEdx) { static constexpr float kSectAngle = 2 * M_PI / 18.f; if (Merger->Param().rec.tpc.disableRefitAttachment & 2) { @@ -664,14 +664,14 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP } if (dodEdx && iRow + step == toRow) { float yUncorrected, zUncorrected; - Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(slice, iRow, mP[0], mP[1], yUncorrected, zUncorrected); - uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(slice, iRow, yUncorrected)); - if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(slice, iRow, pad))) { + Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, mP[0], mP[1], yUncorrected, zUncorrected); + uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(sector, iRow, yUncorrected)); + if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { dodEdx = false; } } CADEBUG(printf("Attaching in row %d\n", iRow)); - AttachClusters(Merger, slice, iRow, iTrack, goodLeg, prop); + AttachClusters(Merger, sector, iRow, iTrack, goodLeg, prop); } return dodEdx; } @@ -696,7 +696,7 @@ GPUdii() void GPUTPCGMTrackParam::StoreOuter(gputpcgmmergertypes::GPUTPCOuterPar outerParam->alpha = prop.GetAlpha(); } -GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSlice, int32_t toRow, bool inFlyDirection, float alpha) +GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha) { uint32_t nLoopData = CAMath::AtomicAdd(&Merger->Memory()->nLoopData, 1u); if (nLoopData >= Merger->NMaxTracks()) { @@ -711,9 +711,9 @@ GPUdic(0, 1) void GPUTPCGMTrackParam::StoreAttachMirror(const GPUTPCGMMerger* GP data.toAlpha = toAlpha; data.toY = toY; data.toX = toX; - data.slice = slice; + data.sector = sector; data.row = iRow; - data.toSlice = toSlice; + data.toSector = toSector; data.toRow = toRow; data.inFlyDirection = inFlyDirection; Merger->LoopData()[nLoopData] = data; @@ -733,28 +733,28 @@ GPUdii() void GPUTPCGMTrackParam::RefitLoop(const GPUTPCGMMerger* GPUrestrict() GPUTPCGMLoopData& data = Merger->LoopData()[loopIdx]; prop.SetTrack(&data.param, data.alpha); - if (data.toSlice == -1) { - data.param.AttachClustersMirror<1>(Merger, data.slice, data.row, data.track, data.toY, prop, true); + if (data.toSector == -1) { + data.param.AttachClustersMirror<1>(Merger, data.sector, data.row, data.track, data.toY, prop, true); } else { - data.param.FollowCircle<1>(Merger, prop, data.slice, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSlice, data.toRow, data.inFlyDirection, true); + data.param.FollowCircle<1>(Merger, prop, data.sector, data.row, data.track, data.toAlpha, data.toX, data.toY, data.toSector, data.toRow, data.inFlyDirection, true); } } template -GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSlice, int32_t toRow, bool inFlyDirection, bool phase2) +GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& GPUrestrict() prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2) { static constexpr float kSectAngle = 2 * M_PI / 18.f; if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { return 1; } if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { - StoreAttachMirror(Merger, slice, iRow, iTrack, toAlpha, toY, toX, toSlice, toRow, inFlyDirection, prop.GetAlpha()); + StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); return 1; } const GPUParam& GPUrestrict() param = Merger->Param(); bool right; float dAlpha = toAlpha - prop.GetAlpha(); - int32_t sliceSide = slice >= (GPUCA_NSLICES / 2) ? (GPUCA_NSLICES / 2) : 0; + int32_t sectorSide = sector >= (GPUCA_NSECTORS / 2) ? (GPUCA_NSECTORS / 2) : 0; if (CAMath::Abs(dAlpha) > 0.001f) { right = CAMath::Abs(dAlpha) < CAMath::Pi() ? (dAlpha > 0) : (dAlpha < 0); } else { @@ -764,16 +764,16 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr int32_t targetRow = up ? (GPUCA_ROW_COUNT - 1) : 0; float lrFactor = mP[2] < 0 ? -1.f : 1.f; // !(right ^ down) // TODO: shouldn't it be "right ? 1.f : -1.f", but that gives worse results... // clang-format off - CADEBUG(printf("CIRCLE Track %d: Slice %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f - Next hit: Slice %d Alpha %f X %f Y %f - Right %d Up %d dAlpha %f lrFactor %f\n", iTrack, slice, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], toSlice, toAlpha, toX, toY, (int32_t)right, (int32_t)up, dAlpha, lrFactor)); + CADEBUG(printf("CIRCLE Track %d: Sector %d Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f - Next hit: Sector %d Alpha %f X %f Y %f - Right %d Up %d dAlpha %f lrFactor %f\n", iTrack, sector, prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3], toSector, toAlpha, toX, toY, (int32_t)right, (int32_t)up, dAlpha, lrFactor)); // clang-format on - AttachClustersPropagate(Merger, slice, iRow, targetRow, iTrack, false, prop, inFlyDirection, 0.7f); + AttachClustersPropagate(Merger, sector, iRow, targetRow, iTrack, false, prop, inFlyDirection, 0.7f); if (prop.RotateToAlpha(prop.GetAlpha() + (CAMath::Pi() / 2.f) * lrFactor)) { return 1; } CADEBUG(printf("\tRotated: X %f Y %f Z %f SinPhi %f (Alpha %f / %f)\n", mP[0], mX, mP[1], mP[2], prop.GetAlpha(), prop.GetAlpha() + CAMath::Pi() / 2.f)); - while (slice != toSlice || FollowCircleChk(lrFactor, toY, toX, up, right)) { - while ((slice != toSlice) ? (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f)) : FollowCircleChk(lrFactor, toY, toX, up, right)) { + while (sector != toSector || FollowCircleChk(lrFactor, toY, toX, up, right)) { + while ((sector != toSector) ? (CAMath::Abs(mX) <= CAMath::Abs(mP[0]) * CAMath::Tan(kSectAngle / 2.f)) : FollowCircleChk(lrFactor, toY, toX, up, right)) { int32_t err = prop.PropagateToXAlpha(mX + 1.f, prop.GetAlpha(), inFlyDirection); if (err) { CADEBUG(printf("\t\tpropagation error (%d)\n", err)); @@ -785,22 +785,22 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr float rowX = Merger->Param().tpcGeometry.Row2X(j); if (CAMath::Abs(rowX - (-mP[0] * lrFactor)) < 1.5f) { CADEBUG(printf("\t\tAttempt row %d (Y %f Z %f)\n", j, mX * lrFactor, mP[1])); - AttachClusters(Merger, slice, j, iTrack, false, mX * lrFactor, mP[1]); + AttachClusters(Merger, sector, j, iTrack, false, mX * lrFactor, mP[1]); } } } - if (slice != toSlice) { + if (sector != toSector) { if (right) { - if (++slice >= sliceSide + 18) { - slice -= 18; + if (++sector >= sectorSide + 18) { + sector -= 18; } } else { - if (--slice < sliceSide) { - slice += 18; + if (--sector < sectorSide) { + sector += 18; } } - CADEBUG(printf("\tRotating to slice %d\n", slice)); - if (prop.RotateToAlpha(param.Alpha(slice) + (CAMath::Pi() / 2.f) * lrFactor)) { + CADEBUG(printf("\tRotating to sector %d\n", sector)); + if (prop.RotateToAlpha(param.Alpha(sector) + (CAMath::Pi() / 2.f) * lrFactor)) { CADEBUG(printf("\t\trotation error\n")); prop.RotateToAlpha(prop.GetAlpha() - (CAMath::Pi() / 2.f) * lrFactor); return 1; @@ -835,7 +835,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } } prop.PropagateToXAlpha(Merger->Param().tpcGeometry.Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); - AttachClustersPropagate(Merger, slice, iRow, toRow, iTrack, false, prop, inFlyDirection); + AttachClustersPropagate(Merger, sector, iRow, toRow, iTrack, false, prop, inFlyDirection); } if (prop.PropagateToXAlpha(toX, prop.GetAlpha(), inFlyDirection)) { mX = toX; @@ -845,7 +845,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } template -GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop, bool phase2) +GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& GPUrestrict() prop, bool phase2) { static constexpr float kSectAngle = 2 * M_PI / 18.f; @@ -853,7 +853,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU return; } if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { - StoreAttachMirror(Merger, slice, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); + StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); return; } // Note that the coordinate system is rotated by 90 degree swapping X and Y! @@ -908,7 +908,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU float rowX = mX + Merger->Param().tpcGeometry.Row2X(j) - myRowX; if (CAMath::Abs(rowX - paramX) < 1.5f) { // printf("Attempt row %d\n", j); - AttachClusters(Merger, slice, j, iTrack, false, mP[2] > 0 ? X : -X, Z); + AttachClusters(Merger, sector, j, iTrack, false, mP[2] > 0 ? X : -X, Z); } } } @@ -933,10 +933,10 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, xInner = merger->Param().tpcGeometry.Row2X(clusters[N - 1].row); xOuter = merger->Param().tpcGeometry.Row2X(clusters[0].row); } - ShiftZ(merger, clusters[0].slice, tzInner, tzOuter, xInner, xOuter); + ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); } -GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t slice, float tz1, float tz2, float x1, float x2) +GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merger, int32_t sector, float tz1, float tz2, float x1, float x2) { if (!merger->Param().par.continuousTracking) { return; @@ -989,9 +989,9 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge baset = tz2; basex = x2; } - float refZ = ((slice < GPUCA_NSLICES / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; + float refZ = ((sector < GPUCA_NSECTORS / 2) ? merger->Param().rec.tpc.defaultZOffsetOverR : -merger->Param().rec.tpc.defaultZOffsetOverR) * basex; float basez; - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(slice, baset, basez, mTZOffset); + merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->TransformIdealZ(sector, baset, basez, mTZOffset); deltaZ = basez - refZ; } } @@ -1016,11 +1016,11 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge mTZOffset += deltaZ; mP[1] -= deltaZ; } else { - float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(slice, deltaZ); + float deltaT = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaZtoDeltaTimeInTimeFrame(sector, deltaZ); mTZOffset += deltaT; mP[1] -= deltaZ; const float maxT = CAMath::Min(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getT0(); - const float minT = CAMath::Max(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(slice); + const float minT = CAMath::Max(tz1, tz2) - merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->getMaxDriftTime(sector); // printf("T Check: Clusters %f %f, min %f max %f vtx %f\n", tz1, tz2, minT, maxT, mTZOffset); deltaT = 0.f; if (mTZOffset < minT) { @@ -1030,7 +1030,7 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ(const GPUTPCGMMerger* GPUrestrict() merge deltaT = maxT - mTZOffset; } if (deltaT != 0.f) { - deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(slice, deltaT); + deltaZ = merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(sector, deltaT); // printf("Moving clusters to TPC Range: QPt %f, New mTZOffset %f, t1 %f, t2 %f, Shift %f in Z: %f to %f --> %f to %f in T\n", mP[4], mTZOffset + deltaT, tz1, tz2, deltaZ, tz2 - mTZOffset, tz1 - mTZOffset, tz2 - mTZOffset - deltaT, tz1 - mTZOffset - deltaT); mTZOffset += deltaT; mP[1] -= deltaZ; @@ -1126,7 +1126,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr if (track.OK()) { int32_t ind = track.FirstClusterRef(); const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].slice); + float alphaa = param.Alpha(merger->Clusters()[ind].sector); float xx, yy, zz; if (merger->Param().par.earlyTpcTransform) { xx = merger->ClustersXYZ()[ind].x; @@ -1134,7 +1134,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); } else { const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].slice, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); + merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); } float sinA, cosA; CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index a2d7dcf2b3e3d..1b2468b51000e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -43,7 +43,7 @@ class GPUTPCGMPropagator; * @class GPUTPCGMTrackParam * * GPUTPCGMTrackParam class describes the track parametrisation - * which is used by the GPUTPCGMTracker slice tracker. + * which is used by the GPUTPCGMTracker sector tracker. * */ class GPUTPCGMTrackParam @@ -148,15 +148,15 @@ class GPUTPCGMTrackParam GPUd() void MirrorTo(GPUTPCGMPropagator& prop, float toY, float toZ, bool inFlyDirection, const GPUParam& param, uint8_t row, uint8_t clusterState, bool mirrorParameters, int8_t sector); GPUd() int32_t MergeDoubleRowClusters(int32_t& ihit, int32_t wayDirection, GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, GPUTPCGMPropagator& prop, float& xx, float& yy, float& zz, int32_t maxN, float clAlpha, uint8_t& clusterState, bool rejectChi2); - GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); - GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use - GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); + GPUd() bool AttachClustersPropagate(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t lastRow, int32_t toRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop, bool inFlyDirection, float maxSinPhi = GPUCA_MAX_SIN_PHI, bool checkdEdx = false); + GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, GPUTPCGMPropagator& prop); // Returns uncorrectedY for later use + GPUd() float AttachClusters(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, bool goodLeg, float Y, float Z); // We force to compile these twice, for RefitLoop and for Fit, for better optimization template - GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop, bool phase2 = false); + GPUd() void AttachClustersMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toY, GPUTPCGMPropagator& prop, bool phase2 = false); template - GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSlice, int32_t toRow, bool inFlyDirection, bool phase2 = false); - GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t slice, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSlice, int32_t toRow, bool inFlyDirection, float alpha); + GPUd() int32_t FollowCircle(const GPUTPCGMMerger* GPUrestrict() Merger, GPUTPCGMPropagator& prop, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toX, float toY, int32_t toSector, int32_t toRow, bool inFlyDirection, bool phase2 = false); + GPUd() void StoreAttachMirror(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t sector, int32_t iRow, int32_t iTrack, float toAlpha, float toY, float toX, int32_t toSector, int32_t toRow, bool inFlyDirection, float alpha); GPUd() void StoreOuter(gputpcgmmergertypes::GPUTPCOuterParam* outerParam, const GPUTPCGMPropagator& prop, int32_t phase); GPUd() static void RefitLoop(const GPUTPCGMMerger* GPUrestrict() Merger, int32_t loopIdx); @@ -187,7 +187,7 @@ class GPUTPCGMTrackParam } GPUd() void Rotate(float alpha); - GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t slice, float tzInner, float tzOuter, float x1, float x2); + GPUd() void ShiftZ(const GPUTPCGMMerger* merger, int32_t sector, float tzInner, float tzOuter, float x1, float x2); GPUd() void ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, GPUTPCGMMergedTrackHitXYZ* clustersXYZ, const GPUTPCGMMerger* merger, int32_t N); GPUd() static float Reciprocal(float x) { return 1.f / x; } @@ -235,9 +235,9 @@ struct GPUTPCGMLoopData { float toX; float alpha; float toAlpha; - uint8_t slice; + uint8_t sector; uint8_t row; - int8_t toSlice; + int8_t toSector; uint8_t toRow; uint8_t inFlyDirection; }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 78015b347a8c6..78eea63edecdd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -62,7 +62,7 @@ void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPC continue; } - AliTPCtrackerRow& row = tpctracker->GetRow(cls.slice % 18, cls.row); + AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); uint32_t clIndexOffline = 0; AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); if (!clOffline) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index be057172a968f..a21593b7ba9e9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -26,7 +26,7 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGlobalDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int8_t) { - for (int32_t i = iBlock * nThreads + iThread; i < GPUCA_NSLICES * merger.NMaxSingleSliceTracks(); i++) { + for (int32_t i = iBlock * nThreads + iThread; i < GPUCA_NSECTORS * merger.NMaxSingleSectorTracks(); i++) { merger.TrackIDs()[i] = -1; } } @@ -37,12 +37,12 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0 && tmp[j] != j) { - auto getTrackIDIndex = [&merger](const int32_t iSlice, const int32_t iTrack) { - const int32_t kEnd = merger.NMaxSingleSliceTracks(); + auto getTrackIDIndex = [&merger](const int32_t iSector, const int32_t iTrack) { + const int32_t kEnd = merger.NMaxSingleSectorTracks(); for (int32_t k = 0; k < kEnd; k++) { - if (merger.TrackIDs()[iSlice * merger.NMaxSingleSliceTracks() + k] == iTrack) { + if (merger.TrackIDs()[iSector * merger.NMaxSingleSectorTracks() + k] == iTrack) { return k; } } @@ -70,23 +70,23 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::ThreadUniform(-1.5, 1.5); double theta = 2 * TMath::ATan(1. / TMath::Exp(eta)); double lambda = theta - TMath::Pi() / 2; // double theta = gRandom->Uniform(-60,60)*TMath::Pi()/180.; double pt = .1 * std::pow(10, gRandom->Uniform(0, 2.2)); double q = 1.; - int32_t iSlice = GetSlice(phi); - phi = phi - GetSliceAngle(iSlice); + int32_t iSector = GetSector(phi); + phi = phi - GetSectorAngle(iSector); // std::cout<<"phi = "<) { cl = &trkX.getCluster(mPtrackHitReferences, i, *mPclusterNative, sector, row); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h similarity index 98% rename from GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h rename to GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h index 0eabd82e59a02..7f30d0b568517 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h @@ -27,7 +27,7 @@ class GPUTPCTrackParam; * @class GPUTPCBaseTrackParam * * GPUTPCBaseTrackParam class contains track parameters - * used in output of the GPUTPCTracker slice tracker. + * used in output of the GPUTPCTracker sector tracker. * This class is used for transfer between tracker and merger and does not contain the covariance matrice */ struct GPUTPCBaseTrackParam { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h b/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCClusterData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx similarity index 78% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx index bada60b9cec80..ae71bcdb541ca 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.cxx @@ -23,15 +23,15 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread(i const GPUTrackingInOutPointers& GPUrestrict() ioPtrs = processors.ioPtrs; const o2::tpc::ClusterNativeAccess* GPUrestrict() clusters = ioPtrs.clustersNative; GPUParam& GPUrestrict() param = processors.param; - const int32_t iSliceRow = iBlock * nThreads + iThread; - if (iSliceRow >= GPUCA_ROW_COUNT * GPUCA_NSLICES) { + const int32_t iSectorRow = iBlock * nThreads + iThread; + if (iSectorRow >= GPUCA_ROW_COUNT * GPUCA_NSECTORS) { return; } - const uint32_t iSlice = iSliceRow / GPUCA_ROW_COUNT; - const uint32_t iRow = iSliceRow % GPUCA_ROW_COUNT; - for (uint32_t i = 0; i < clusters->nClusters[iSlice][iRow]; i++) { - const uint32_t bin = clusters->clusters[iSlice][iRow][i].getTime() / param.rec.tpc.occupancyMapTimeBins; - map[bin].bin[iSlice][iRow]++; + const uint32_t iSector = iSectorRow / GPUCA_ROW_COUNT; + const uint32_t iRow = iSectorRow % GPUCA_ROW_COUNT; + for (uint32_t i = 0; i < clusters->nClusters[iSector][iRow]; i++) { + const uint32_t bin = clusters->clusters[iSector][iRow][i].getTime() / param.rec.tpc.occupancyMapTimeBins; + map[bin].bin[iSector][iRow]++; } } @@ -47,8 +47,8 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread(i int32_t binmax = CAMath::Min(GPUTPCClusterOccupancyMapBin::getNBins(param), bin + param.rec.tpc.occupancyMapTimeBinsAverage + 1); uint32_t sum = 0; for (int32_t i = binmin; i < binmax; i++) { - for (int32_t iSliceRow = 0; iSliceRow < GPUCA_NSLICES * GPUCA_ROW_COUNT; iSliceRow++) { - sum += (&map[i].bin[0][0])[iSliceRow]; + for (int32_t iSectorRow = 0; iSectorRow < GPUCA_NSECTORS * GPUCA_ROW_COUNT; iSectorRow++) { + sum += (&map[i].bin[0][0])[iSectorRow]; } } sum /= binmax - binmin; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h similarity index 95% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h index 91f5816f69df2..de8eb8622adb1 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateOccupancyMap.h @@ -29,7 +29,7 @@ class GPUTPCCreateOccupancyMap : public GPUKernelTemplate enum K { defaultKernel = 0, fill = 0, fold = 1 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args); }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx similarity index 68% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx index bd33927408a26..641326a8a2caa 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.cxx @@ -9,17 +9,17 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCCreateSliceData.cxx +/// \file GPUTPCCreateTrackingData.cxx /// \author David Rohr -#include "GPUTPCCreateSliceData.h" +#include "GPUTPCCreateTrackingData.h" #include "GPUTPCTracker.h" #include "GPUCommonMath.h" using namespace o2::gpu; template <> -GPUdii() void GPUTPCCreateSliceData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCCreateTrackingData::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& s, processorType& GPUrestrict() tracker) { - tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISlice(), s.tmp); + tracker.Data().InitFromClusterData(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem(), tracker.ISector(), s.tmp); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h similarity index 83% rename from GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 9065b220bb44d..99bfc2d8fa804 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCCreateSliceData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCCreateSliceData.h +/// \file GPUTPCCreateTrackingData.h /// \author David Rohr -#ifndef GPUTPCCREATESLICEDATA_H -#define GPUTPCCREATESLICEDATA_H +#ifndef GPUTPCCREATESECTORDATA_H +#define GPUTPCCREATESECTORDATA_H #include "GPUTPCDef.h" #include "GPUTPCHitId.h" @@ -26,7 +26,7 @@ namespace gpu { class GPUTPCTracker; -class GPUTPCCreateSliceData : public GPUKernelTemplate +class GPUTPCCreateTrackingData : public GPUKernelTemplate { public: struct GPUSharedMemory { @@ -34,7 +34,7 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -45,4 +45,4 @@ class GPUTPCCreateSliceData : public GPUKernelTemplate } // namespace gpu } // namespace o2 -#endif // GPUTPCCREATESLICEDATA_H +#endif // GPUTPCCREATESECTORDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDef.h b/GPU/GPUTracking/SectorTracker/GPUTPCDef.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCDef.h rename to GPU/GPUTracking/SectorTracker/GPUTPCDef.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCDefinitions.h b/GPU/GPUTracking/SectorTracker/GPUTPCDefinitions.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCDefinitions.h rename to GPU/GPUTracking/SectorTracker/GPUTPCDefinitions.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx similarity index 80% rename from GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx index 1a5e99f0f52ca..3ffead1c5902b 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.cxx @@ -22,7 +22,7 @@ using namespace o2::gpu; -GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) +GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& GPUrestrict() sectorSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction) { /*for (int32_t j = 0;j < Tracks()[j].NHits();j++) { @@ -37,7 +37,7 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT tParam.SetCov(5, 0.001f); tParam.SetCov(9, 0.001f); tParam.SetCov(14, 0.05f); - tParam.SetParam(sliceSource.Tracks()[iTrack].Param()); + tParam.SetParam(sectorSource.Tracks()[iTrack].Param()); // GPUInfo("Parameters X %f Y %f Z %f SinPhi %f DzDs %f QPt %f SignCosPhi %f", tParam.X(), tParam.Y(), tParam.Z(), tParam.SinPhi(), tParam.DzDs(), tParam.QPt(), tParam.SignCosPhi()); if (!tParam.Rotate(angle, GPUCA_MAX_SIN_PHI)) { @@ -73,13 +73,13 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT // GPUInfo("%d hits found", nHits); uint32_t hitId = CAMath::AtomicAdd(&tracker.CommonMemory()->nTrackHits, (uint32_t)nHits); if (hitId + nHits > tracker.NMaxTrackHits()) { - tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, tracker.ISlice(), hitId + nHits, tracker.NMaxTrackHits()); + tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_HIT_OVERFLOW, tracker.ISector(), hitId + nHits, tracker.NMaxTrackHits()); CAMath::AtomicExch(&tracker.CommonMemory()->nTrackHits, tracker.NMaxTrackHits()); return 0; } uint32_t trackId = CAMath::AtomicAdd(&tracker.CommonMemory()->nTracks, 1u); if (trackId >= tracker.NMaxTracks()) { // >= since will increase by 1 - tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_OVERFLOW, tracker.ISlice(), trackId, tracker.NMaxTracks()); + tracker.raiseError(GPUErrors::ERROR_GLOBAL_TRACKING_TRACK_OVERFLOW, tracker.ISector(), trackId, tracker.NMaxTracks()); CAMath::AtomicExch(&tracker.CommonMemory()->nTracks, tracker.NMaxTracks()); return 0; } @@ -112,13 +112,13 @@ GPUd() int32_t GPUTPCExtrapolationTracking::PerformExtrapolationTrackingRun(GPUT track.SetParam(tParam.GetParam()); track.SetNHits(nHits); track.SetFirstHitID(hitId); - track.SetLocalTrackId((sliceSource.ISlice() << 24) | sliceSource.Tracks()[iTrack].LocalTrackId()); + track.SetLocalTrackId((sectorSource.ISector() << 24) | sectorSource.Tracks()[iTrack].LocalTrackId()); } return (nHits >= tracker.Param().rec.tpc.extrapolationTrackingMinHits); } -GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sliceTarget, bool right) +GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& GPUrestrict() sectorTarget, bool right) { for (int32_t i = iBlock * nThreads + iThread; i < tracker.CommonMemory()->nLocalTracks; i += nThreads * nBlocks) { { @@ -129,11 +129,11 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, -1); } if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeLower) { // GPUInfo("Track %d, lower row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, -1); } } } @@ -146,11 +146,11 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB float Y = (float)tracker.Data().HitDataY(row, tracker.TrackHits()[tmpHit].HitIndex()) * row.HstepY() + row.Grid().YMin(); if (!right && Y < -row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, left border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, -row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, -tracker.Param().par.dAlpha, 1); } if (right && Y > row.MaxY() * tracker.Param().rec.tpc.extrapolationTrackingYRangeUpper) { // GPUInfo("Track %d, upper row %d, right border (%f of %f)", i, mTrackHits[tmpHit].RowIndex(), Y, row.MaxY()); - PerformExtrapolationTrackingRun(sliceTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); + PerformExtrapolationTrackingRun(sectorTarget, smem, tracker, i, rowIndex, tracker.Param().par.dAlpha, 1); } } } @@ -160,42 +160,42 @@ GPUd() void GPUTPCExtrapolationTracking::PerformExtrapolationTracking(int32_t nB template <> GPUdii() void GPUTPCExtrapolationTracking::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { - CA_SHARED_CACHE(&smem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); + CA_SHARED_CACHE(&smem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); if (tracker.NHitsTotal() == 0) { return; } - const int32_t iSlice = tracker.ISlice(); - int32_t sliceLeft = (iSlice + (GPUDataTypes::NSLICES / 2 - 1)) % (GPUDataTypes::NSLICES / 2); - int32_t sliceRight = (iSlice + 1) % (GPUDataTypes::NSLICES / 2); - if (iSlice >= (int32_t)GPUDataTypes::NSLICES / 2) { - sliceLeft += GPUDataTypes::NSLICES / 2; - sliceRight += GPUDataTypes::NSLICES / 2; + const int32_t iSector = tracker.ISector(); + int32_t sectorLeft = (iSector + (GPUDataTypes::NSECTORS / 2 - 1)) % (GPUDataTypes::NSECTORS / 2); + int32_t sectorRight = (iSector + 1) % (GPUDataTypes::NSECTORS / 2); + if (iSector >= (int32_t)GPUDataTypes::NSECTORS / 2) { + sectorLeft += GPUDataTypes::NSECTORS / 2; + sectorRight += GPUDataTypes::NSECTORS / 2; } - PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceLeft], smem, tracker, true); - PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sliceRight], smem, tracker, false); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sectorLeft], smem, tracker, true); + PerformExtrapolationTracking(nBlocks, nThreads, iBlock, iThread, tracker.GetConstantMem()->tpcTrackers[sectorRight], smem, tracker, false); } -GPUd() int32_t GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceOrder(int32_t iSlice) +GPUd() int32_t GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(int32_t iSector) { - iSlice++; - if (iSlice == GPUDataTypes::NSLICES / 2) { - iSlice = 0; + iSector++; + if (iSector == GPUDataTypes::NSECTORS / 2) { + iSector = 0; } - if (iSlice == GPUDataTypes::NSLICES) { - iSlice = GPUDataTypes::NSLICES / 2; + if (iSector == GPUDataTypes::NSECTORS) { + iSector = GPUDataTypes::NSECTORS / 2; } - return iSlice; + return iSector; } -GPUd() void GPUTPCExtrapolationTracking::ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right) +GPUd() void GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(uint32_t iSector, uint32_t& left, uint32_t& right) { - left = (iSlice + (GPUDataTypes::NSLICES / 2 - 1)) % (GPUDataTypes::NSLICES / 2); - right = (iSlice + 1) % (GPUDataTypes::NSLICES / 2); - if (iSlice >= (int32_t)GPUDataTypes::NSLICES / 2) { - left += GPUDataTypes::NSLICES / 2; - right += GPUDataTypes::NSLICES / 2; + left = (iSector + (GPUDataTypes::NSECTORS / 2 - 1)) % (GPUDataTypes::NSECTORS / 2); + right = (iSector + 1) % (GPUDataTypes::NSECTORS / 2); + if (iSector >= (int32_t)GPUDataTypes::NSECTORS / 2) { + left += GPUDataTypes::NSECTORS / 2; + right += GPUDataTypes::NSECTORS / 2; } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h similarity index 85% rename from GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h rename to GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index cd6533a3439ed..593bc172303fe 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -32,7 +32,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -40,19 +40,19 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); - GPUd() static int32_t ExtrapolationTrackingSliceOrder(int32_t iSlice); - GPUd() static void ExtrapolationTrackingSliceLeftRight(uint32_t iSlice, uint32_t& left, uint32_t& right); + GPUd() static int32_t ExtrapolationTrackingSectorOrder(int32_t iSector); + GPUd() static void ExtrapolationTrackingSectorLeftRight(uint32_t iSector, uint32_t& left, uint32_t& right); private: - GPUd() static int32_t PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sliceSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); - GPUd() static void PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sliceTarget, bool right); + GPUd() static int32_t PerformExtrapolationTrackingRun(GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, const GPUTPCTracker& sectorSource, int32_t iTrack, int32_t rowIndex, float angle, int32_t direction); + GPUd() static void PerformExtrapolationTracking(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& smem, GPUTPCTracker& sectorTarget, bool right); }; class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCGrid.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCGrid.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h similarity index 82% rename from GPU/GPUTracking/SliceTracker/GPUTPCGrid.h rename to GPU/GPUTracking/SectorTracker/GPUTPCGrid.h index 1fbb1c5a23c45..df8706d8c65b4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h @@ -37,8 +37,8 @@ class GPUTPCGrid GPUd() int32_t GetBin(float Y, float Z) const; /** - * returns -1 if the row is empty == no hits - */ + * returns -1 if the row is empty == no hits + */ GPUd() int32_t GetBinBounded(float Y, float Z) const; GPUd() void GetBin(float Y, float Z, int32_t* const bY, int32_t* const bZ) const; GPUd() void GetBinArea(float Y, float Z, float dy, float dz, int32_t& bin, int32_t& ny, int32_t& nz) const; @@ -56,15 +56,15 @@ class GPUTPCGrid private: friend class GPUTPCNeighboursFinder; - uint32_t mNy; //* N bins in Y - uint32_t mNz; //* N bins in Z - uint32_t mN; //* total N bins - float mYMin; //* minimal Y value - float mYMax; //* maximal Y value - float mZMin; //* minimal Z value - float mZMax; //* maximal Z value - float mStepYInv; //* inverse bin size in Y - float mStepZInv; //* inverse bin size in Z + uint32_t mNy; //* N bins in Y + uint32_t mNz; //* N bins in Z + uint32_t mN; //* total N bins + float mYMin; //* minimal Y value + float mYMax; //* maximal Y value + float mZMin; //* minimal Z value + float mZMax; //* maximal Z value + float mStepYInv; //* inverse bin size in Y + float mStepZInv; //* inverse bin size in Z }; } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHit.h b/GPU/GPUTracking/SectorTracker/GPUTPCHit.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCHit.h rename to GPU/GPUTracking/SectorTracker/GPUTPCHit.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCHitId.h b/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCHitId.h rename to GPU/GPUTracking/SectorTracker/GPUTPCHitId.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h b/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCMCInfo.h rename to GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index 1682e18244732..e1a7437f695b4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -40,7 +40,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 36254243e81b8..ec348b59ce7a5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -15,7 +15,7 @@ #include "GPUTPCHit.h" #include "GPUTPCNeighboursFinder.h" #include "GPUTPCTracker.h" -//#include "GPUCommonMath.h" +// #include "GPUCommonMath.h" #include "GPUDefMacros.h" using namespace o2::gpu; @@ -26,10 +26,10 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh #ifdef GPUCA_GPUCODE for (uint32_t i = iThread; i < sizeof(GPUTPCRow) / sizeof(int32_t); i += nThreads) { - reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock])[i]; + reinterpret_cast(&s.mRow)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock])[i]; if (iBlock >= 2 && iBlock < GPUCA_ROW_COUNT - 2) { - reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock + 2])[i]; - reinterpret_cast(&s.mRowDown)[i] = reinterpret_cast(&tracker.SliceDataRows()[iBlock - 2])[i]; + reinterpret_cast(&s.mRowUp)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock + 2])[i]; + reinterpret_cast(&s.mRowDown)[i] = reinterpret_cast(&tracker.TrackingDataRows()[iBlock - 2])[i]; } } GPUbarrier(); diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h similarity index 86% rename from GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h rename to GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 2d71d948ad9e1..cc61eaacf994f 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -34,11 +34,11 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate { public: struct GPUSharedMemory { - int32_t mNHits; // n hits - float mUpDx; // x distance to the next row - float mDnDx; // x distance to the previous row - float mUpTx; // normalized x distance to the next row - float mDnTx; // normalized x distance to the previous row + int32_t mNHits; // n hits + float mUpDx; // x distance to the next row + float mDnDx; // x distance to the previous row + float mUpTx; // normalized x distance to the next row + float mDnTx; // normalized x distance to the previous row int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number @@ -51,7 +51,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCRow.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h similarity index 90% rename from GPU/GPUTracking/SliceTracker/GPUTPCRow.h rename to GPU/GPUTracking/SectorTracker/GPUTPCRow.h index d401311683f28..c7e5b3ebc7217 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h @@ -31,7 +31,7 @@ namespace gpu */ class GPUTPCRow { - friend class GPUTPCSliceData; + friend class GPUTPCTrackingData; public: #if !defined(GPUCA_GPUCODE) @@ -61,9 +61,9 @@ class GPUTPCRow friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsFinder; - int32_t mNHits; // number of hits - float mX; // X coordinate of the row - float mMaxY; // maximal Y coordinate of the row + int32_t mNHits; // number of hits + float mX; // X coordinate of the row + float mMaxY; // maximal Y coordinate of the row GPUTPCGrid mGrid; // grid of hits // hit packing: @@ -75,7 +75,7 @@ class GPUTPCRow float mHstepZi; // inverse step size int32_t mHitNumberOffset; // index of the first hit in the hit array, used as - // offset in GPUTPCSliceData::LinkUp/DownData/HitDataY/... + // offset in GPUTPCTrackingData::LinkUp/DownData/HitDataY/... uint32_t mFirstHitInBinOffset; // offset in Tracker::mRowData to find the FirstHitInBin }; } // namespace gpu diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx similarity index 95% rename from GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx index 9f06b00f30c3f..7981ef5af26d8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.cxx @@ -15,7 +15,7 @@ #include "GPUParam.h" #include "GPUTPCClusterData.h" #include "GPUTPCHit.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" @@ -86,7 +86,7 @@ GPUdii() void GPUTPCSectorDebugSortKernels::Thread -GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCSectorDebugSortKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() tracker) { if (iThread || iBlock) { return; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h similarity index 94% rename from GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h index 5617f9745311e..520a791b0eb43 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSectorDebugSortKernels.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorDebugSortKernels.h @@ -29,8 +29,8 @@ class GPUTPCSectorDebugSortKernels : public GPUKernelTemplate enum K { defaultKernel = 0, hitData = 0, startHits = 1, - sliceTracks = 2 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; } + sectorTracks = 2 }; + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } typedef GPUTPCTracker processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h similarity index 70% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h index 1d958de1ff7a4..5a51f160576eb 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutCluster.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutCluster.h +/// \file GPUTPCSectorOutCluster.h /// \author Sergey Gorbunov, David Rohr -#ifndef GPUTPCSLICEOUTCLUSTER_H -#define GPUTPCSLICEOUTCLUSTER_H +#ifndef GPUTPCSECTOROUTCLUSTER_H +#define GPUTPCSECTOROUTCLUSTER_H #include "GPUTPCDef.h" @@ -22,11 +22,11 @@ namespace o2 namespace gpu { /** - * @class GPUTPCSliceOutCluster - * GPUTPCSliceOutCluster class contains clusters which are assigned to slice tracks. - * It is used to send the data from TPC slice trackers to the GlobalMerger + * @class GPUTPCSectorOutCluster + * GPUTPCSectorOutCluster class contains clusters which are assigned to sector tracks. + * It is used to send the data from TPC sector trackers to the GlobalMerger */ -class GPUTPCSliceOutCluster +class GPUTPCSectorOutCluster { public: GPUhd() void Set(uint32_t id, uint8_t row, uint8_t flags, uint16_t amp, float x, float y, float z) @@ -49,13 +49,13 @@ class GPUTPCSliceOutCluster GPUhd() uint8_t GetFlags() const { return mFlags; } private: - uint32_t mId; // Id - uint8_t mRow; // row - uint8_t mFlags; // flags - uint16_t mAmp; // amplitude - float mX; // coordinates - float mY; // coordinates - float mZ; // coordinates + uint32_t mId; // Id + uint8_t mRow; // row + uint8_t mFlags; // flags + uint16_t mAmp; // amplitude + float mX; // coordinates + float mY; // coordinates + float mZ; // coordinates #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME public: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx similarity index 71% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx index 06b87c7a682d3..864a5c6b7106e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx @@ -9,26 +9,26 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutput.cxx +/// \file GPUTPCSectorOutput.cxx /// \author Sergey Gorbunov, Ivan Kisel, David Rohr #include "GPUOutputControl.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUCommonMath.h" #include using namespace o2::gpu; -uint32_t GPUTPCSliceOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) +uint32_t GPUTPCSectorOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) { // calculate the amount of memory [bytes] needed for the event - return sizeof(GPUTPCSliceOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSliceOutCluster) * nOfTrackClusters; + return sizeof(GPUTPCSectorOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSectorOutCluster) * nOfTrackClusters; } #ifndef GPUCA_GPUCODE -void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) +void GPUTPCSectorOutput::Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) { - // Allocate All memory needed for slice output + // Allocate All memory needed for sector output const size_t memsize = EstimateSize(nTracks, nTrackHits); if (outputControl && outputControl->useExternal()) { @@ -42,7 +42,7 @@ void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, lock.clear(std::memory_order_release); return; } - ptrOutput = reinterpret_cast(outputControl->ptrCurrent); + ptrOutput = reinterpret_cast(outputControl->ptrCurrent); outputControl->ptrCurrent = (char*)outputControl->ptrCurrent + memsize; lock.clear(std::memory_order_release); } else { @@ -50,7 +50,7 @@ void GPUTPCSliceOutput::Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, free(internalMemory); } internalMemory = malloc(memsize); - ptrOutput = reinterpret_cast(internalMemory); + ptrOutput = reinterpret_cast(internalMemory); } ptrOutput->SetMemorySize(memsize); } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h similarity index 68% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h rename to GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h index 3b5712ccbb8f4..4d294dadc7974 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceOutput.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceOutput.h +/// \file GPUTPCSectorOutput.h /// \author Sergey Gorbunov, Ivan Kisel, David Rohr -#ifndef GPUTPCSLICEOUTPUT_H -#define GPUTPCSLICEOUTPUT_H +#ifndef GPUTPCSECTOROUTPUT_H +#define GPUTPCSECTOROUTPUT_H #include "GPUTPCDef.h" #include "GPUTPCTrack.h" @@ -25,17 +25,17 @@ namespace gpu struct GPUOutputControl; /** - * @class GPUTPCSliceOutput + * @class GPUTPCSectorOutput * - * GPUTPCSliceOutput class is used to store the output of GPUTPCTracker{Component} + * GPUTPCSectorOutput class is used to store the output of GPUTPCTracker{Component} * and transport the output to GPUTPCGBMerger{Component} * - * The class contains all the necessary information about TPC tracks, reconstructed in one slice. + * The class contains all the necessary information about TPC tracks, reconstructed in one sector. * This includes the reconstructed track parameters and some compressed information * about the assigned clusters: clusterId, position and amplitude. * */ -class GPUTPCSliceOutput +class GPUTPCSectorOutput { public: GPUhd() uint32_t NTracks() const @@ -58,24 +58,24 @@ class GPUTPCSliceOutput } static uint32_t EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters); - static void Allocate(GPUTPCSliceOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); + static void Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); GPUhd() void SetNTracks(uint32_t v) { mNTracks = v; } GPUhd() void SetNLocalTracks(uint32_t v) { mNLocalTracks = v; } GPUhd() void SetNTrackClusters(uint32_t v) { mNTrackClusters = v; } private: - GPUTPCSliceOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! - ~GPUTPCSliceOutput() = delete; // NOLINT - GPUTPCSliceOutput(const GPUTPCSliceOutput&) = delete; // NOLINT - GPUTPCSliceOutput& operator=(const GPUTPCSliceOutput&) = delete; // NOLINT + GPUTPCSectorOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! + ~GPUTPCSectorOutput() = delete; // NOLINT + GPUTPCSectorOutput(const GPUTPCSectorOutput&) = delete; // NOLINT + GPUTPCSectorOutput& operator=(const GPUTPCSectorOutput&) = delete; // NOLINT GPUhd() void SetMemorySize(size_t val) { mMemorySize = val; } uint32_t mNTracks; // number of reconstructed tracks uint32_t mNLocalTracks; - uint32_t mNTrackClusters; // total number of track clusters - size_t mMemorySize; // Amount of memory really used + uint32_t mNTrackClusters; // total number of track clusters + size_t mMemorySize; // Amount of memory really used }; } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx similarity index 92% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index 7b60e0621e78f..af79dddae554e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -43,7 +43,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxRowStartHits) { - tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); + tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); break; } @@ -51,7 +51,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletStartHits; uint32_t nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); break; } @@ -66,7 +66,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISlice() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index f818e6986dbc6..5005e4f5e15d0 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -40,7 +40,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h rename to GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index 0877b6c15a511..2e40fdc549d32 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -40,7 +40,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.cxx similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrack.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrack.cxx diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h similarity index 71% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrack.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index fcf9d1149c588..8c88e89e92b25 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -17,7 +17,7 @@ #include "GPUTPCBaseTrackParam.h" #include "GPUTPCDef.h" -#include "GPUTPCSliceOutCluster.h" +#include "GPUTPCSectorOutCluster.h" namespace o2 { @@ -51,18 +51,18 @@ class GPUTPCTrack GPUhd() void SetParam(const GPUTPCBaseTrackParam& v) { mParam = v; } - // Only if used as replacement for SliceOutTrack - GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSliceOutCluster); } + // Only if used as replacement for SectorOutTrack + GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSliceOutCluster& v) { ((GPUTPCSliceOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSliceOutCluster* OutTrackClusters() const { return (const GPUTPCSliceOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSliceOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } + GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } + GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } + GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: - int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array - int32_t mNHits; // number of track cells - int32_t mLocalTrackId; // Id of local track this extrapolated track belongs to, index of this track itself if it is a local track + int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array + int32_t mNHits; // number of track cells + int32_t mLocalTrackId; // Id of local track this extrapolated track belongs to, index of this track itself if it is a local track GPUTPCBaseTrackParam mParam; // track parameters private: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h similarity index 100% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackLinearisation.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx similarity index 99% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx index 68ced574a18a9..af6f8e6cddc08 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.cxx @@ -709,7 +709,7 @@ GPUd() bool GPUTPCTrackParam::CheckNumericalQuality() const GPUd() void GPUTPCTrackParam::ConstrainZ(float& z, int32_t sector, float& z0, float& lastZ) { - if (sector < GPUCA_NSLICES / 2) { + if (sector < GPUCA_NSECTORS / 2) { if (z < 0) { mParam.mZOffset += z; mParam.mP[1] -= z; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h similarity index 98% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h index 72f9d5fbaa23d..ae86ad97e41c9 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h @@ -29,7 +29,7 @@ class GPUTPCTrackLinearisation; * @class GPUTPCTrackParam * * GPUTPCTrackParam class describes the track parametrisation - * which is used by the GPUTPCTracker slice tracker. + * which is used by the GPUTPCTracker sector tracker. * */ class GPUTPCTrackParam @@ -143,7 +143,7 @@ class GPUTPCTrackParam #ifndef GPUCA_GPUCODE private: -#endif //! GPUCA_GPUCODE +#endif //! GPUCA_GPUCODE GPUTPCBaseTrackParam mParam; // Track Parameters private: diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx similarity index 78% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index cece49073f11b..bb49548163ff8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -18,7 +18,7 @@ #include "GPUCommonMath.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" @@ -40,7 +40,7 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) GPUTPCTracker::GPUTPCTracker() - : GPUProcessor(), mLinkTmpMemory(nullptr), mISlice(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSliceScratch(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) + : GPUProcessor(), mLinkTmpMemory(nullptr), mISector(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSectorScratch(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) { } @@ -52,11 +52,11 @@ GPUTPCTracker::~GPUTPCTracker() } // ---------------------------------------------------------------------------------- -void GPUTPCTracker::SetSlice(int32_t iSlice) { mISlice = iSlice; } +void GPUTPCTracker::SetSector(int32_t iSector) { mISector = iSector; } void GPUTPCTracker::InitializeProcessor() { - if (mISlice < 0) { - throw std::runtime_error("Slice not set"); + if (mISector < 0) { + throw std::runtime_error("Sector not set"); } InitializeRows(&Param()); SetupCommonMemory(); @@ -73,7 +73,7 @@ void* GPUTPCTracker::SetPointersScratch(void* mem) if (mRec->GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_INDIVIDUAL) { mem = SetPointersTracklets(mem); } - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) { + if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { computePointerWithAlignment(mem, mTrackletTmpStartHits, GPUCA_ROW_COUNT * mNMaxRowStartHits); computePointerWithAlignment(mem, mRowStartHitCountOffset, GPUCA_ROW_COUNT); } @@ -98,17 +98,17 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); - GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; - mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceLinks", reLinks); - mMemoryResSliceScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSliceScratch"); - GPUMemoryReuse reWeights{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataWeights, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; - mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataWeights, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSliceWeights", reWeights); - GPUMemoryReuse reScratch{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerScratch, (uint16_t)(mISlice % mRec->GetProcessingSettings().nStreams)}; + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; + mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); + mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); + GPUMemoryReuse reWeights{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataWeights, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; + mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataWeights, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorWeights", reWeights); + GPUMemoryReuse reScratch{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerScratch, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCTrackerScratch", reScratch); mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersScratchHost, GPUMemoryResource::MEMORY_SCRATCH_HOST, "TPCTrackerHost"); mMemoryResCommon = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersCommon, GPUMemoryResource::MEMORY_PERMANENT, "TPCTrackerCommon"); - mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataRows, GPUMemoryResource::MEMORY_PERMANENT, "TPCSliceRows"); + mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataRows, GPUMemoryResource::MEMORY_PERMANENT, "TPCSectorRows"); uint32_t type = GPUMemoryResource::MEMORY_SCRATCH; if (mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { // For individual scheme, we allocate tracklets separately, and change the type for the following allocations to custom @@ -142,8 +142,8 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) if (io.clustersNative) { uint32_t maxRowHits = 0; for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { - if (io.clustersNative->nClusters[mISlice][i] > maxRowHits) { - maxRowHits = io.clustersNative->nClusters[mISlice][i]; + if (io.clustersNative->nClusters[mISector][i] > maxRowHits) { + maxRowHits = io.clustersNative->nClusters[mISector][i]; } } mNMaxRowStartHits = mRec->MemoryScalers()->NTPCRowStartHits(maxRowHits * GPUCA_ROW_COUNT); @@ -155,7 +155,7 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxTracks = mRec->MemoryScalers()->NTPCSectorTracks(mData.NumberOfHits()); mNMaxTrackHits = mRec->MemoryScalers()->NTPCSectorTrackHits(mData.NumberOfHits(), mRec->GetProcessingSettings().tpcInputWithClusterRejection); #ifdef GPUCA_SORT_STARTHITS_GPU - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSliceTracking) { + if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { if (mNMaxStartHits > mNMaxRowStartHits * GPUCA_ROW_COUNT) { mNMaxStartHits = mNMaxRowStartHits * GPUCA_ROW_COUNT; } @@ -173,9 +173,9 @@ void GPUTPCTracker::UpdateMaxData() void GPUTPCTracker::SetupCommonMemory() { new (mCommonMem) commonMemoryStruct; } -GPUh() int32_t GPUTPCTracker::CheckEmptySlice() +GPUh() int32_t GPUTPCTracker::CheckEmptySector() { - // Check if the Slice is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate + // Check if the Sector is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate if (NHitsTotal() < 1) { mCommonMem->nTracks = mCommonMem->nTrackHits = 0; if (mOutput) { @@ -188,7 +188,7 @@ GPUh() int32_t GPUTPCTracker::CheckEmptySlice() return 0; } -GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSliceOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } +GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSectorOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } template static inline bool SortComparison(const T& a, const T& b) @@ -205,7 +205,7 @@ GPUh() void GPUTPCTracker::WriteOutput() if (mCommonMem->nTracks == 0) { return; } - if (mCommonMem->nTracks > GPUCA_MAX_SLICE_NTRACK) { + if (mCommonMem->nTracks > GPUCA_MAX_SECTOR_NTRACK) { GPUError("Maximum number of tracks exceeded, cannot store"); return; } @@ -241,12 +241,12 @@ GPUh() void GPUTPCTracker::WriteOutput() int32_t clusterIndex = mData.ClusterDataIndex(row, ih); #ifdef GPUCA_ARRAY_BOUNDS_CHECKS if (ih >= row.NHits() || ih < 0) { - GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISlice, iRow, ih); + GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih); fflush(stdout); continue; } if (clusterIndex >= NHitsTotal() || clusterIndex < 0) { - GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISlice, iRow, ih, clusterIndex); + GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih, clusterIndex); fflush(stdout); continue; } @@ -265,12 +265,12 @@ GPUh() void GPUTPCTracker::WriteOutput() id = mData.ClusterData()[clusterIndex].id; } else { const ClusterNativeAccess& cls = *mConstantMem->ioPtrs.clustersNative; - id = clusterIndex + cls.clusterOffset[mISlice][0]; - GPUTPCConvertImpl::convert(*mConstantMem, mISlice, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); + id = clusterIndex + cls.clusterOffset[mISector][0]; + GPUTPCConvertImpl::convert(*mConstantMem, mISector, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); flags = cls.clustersLinear[id].getFlags(); amp = cls.clustersLinear[id].qTot; } - GPUTPCSliceOutCluster c; + GPUTPCSectorOutCluster c; c.Set(id, iRow, flags, amp, origX, origY, origZ); #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME c.mPad = mData.ClusterData()[clusterIndex].pad; @@ -294,7 +294,7 @@ GPUh() void GPUTPCTracker::WriteOutput() mOutput->SetNLocalTracks(nStoredLocalTracks); mOutput->SetNTrackClusters(nStoredHits); if (Param().par.debugLevel >= 3) { - GPUInfo("Slice %d, Output: Tracks %d, local tracks %d, hits %d", mISlice, nStoredTracks, nStoredLocalTracks, nStoredHits); + GPUInfo("Sector %d, Output: Tracks %d, local tracks %d, hits %d", mISector, nStoredTracks, nStoredLocalTracks, nStoredHits); } } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h similarity index 74% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracker.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index c5d4d40a2bef8..ba5d95e1cc53e 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -22,7 +22,7 @@ #endif #include "GPUTPCHitId.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUTPCTrackParam.h" #include "GPUTPCTracklet.h" #include "GPUProcessor.h" @@ -31,7 +31,7 @@ namespace o2 { namespace gpu { -class GPUTPCSliceOutput; +class GPUTPCSectorOutput; struct GPUTPCClusterData; struct GPUParam; class GPUTPCTrack; @@ -47,22 +47,22 @@ class GPUTPCTracker : public GPUProcessor GPUTPCTracker(const GPUTPCTracker&) = delete; GPUTPCTracker& operator=(const GPUTPCTracker&) = delete; - void SetSlice(int32_t iSlice); + void SetSector(int32_t iSector); void InitializeProcessor(); void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } - int32_t CheckEmptySlice(); + int32_t CheckEmptySector(); void WriteOutputPrepare(); void WriteOutput(); // Debugging Stuff - void DumpSliceData(std::ostream& out); // Dump Input Slice Data + void DumpTrackingData(std::ostream& out); // Dump Input Sector Data void DumpLinks(std::ostream& out, int32_t phase); // Dump all links to file (for comparison after NeighboursFinder/Cleaner) - void DumpStartHits(std::ostream& out); // Same for Start Hits - void DumpHitWeights(std::ostream& out); //.... - void DumpTrackHits(std::ostream& out); // Same for Track Hits - void DumpTrackletHits(std::ostream& out); // Same for Track Hits - void DumpOutput(std::ostream& out); // Similar for output + void DumpStartHits(std::ostream& out); // Same for Start Hits + void DumpHitWeights(std::ostream& out); //.... + void DumpTrackHits(std::ostream& out); // Same for Track Hits + void DumpTrackletHits(std::ostream& out); // Same for Track Hits + void DumpOutput(std::ostream& out); // Similar for output #endif struct StructGPUParameters { @@ -75,14 +75,14 @@ class GPUTPCTracker : public GPUProcessor struct commonMemoryStruct { commonMemoryStruct() : nStartHits(0), nTracklets(0), nRowHits(0), nTracks(0), nLocalTracks(0), nTrackHits(0), nLocalTrackHits(0), gpuParameters() {} - GPUAtomic(uint32_t) nStartHits; // number of start hits - GPUAtomic(uint32_t) nTracklets; // number of tracklets - GPUAtomic(uint32_t) nRowHits; // number of tracklet hits - GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks - int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking - GPUAtomic(uint32_t) nTrackHits; // number of track hits - int32_t nLocalTrackHits; // see above - StructGPUParameters gpuParameters; // GPU parameters + GPUAtomic(uint32_t) nStartHits; // number of start hits + GPUAtomic(uint32_t) nTracklets; // number of tracklets + GPUAtomic(uint32_t) nRowHits; // number of tracklet hits + GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks + int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking + GPUAtomic(uint32_t) nTrackHits; // number of track hits + int32_t nLocalTrackHits; // see above + StructGPUParameters gpuParameters; // GPU parameters }; GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const @@ -90,7 +90,7 @@ class GPUTPCTracker : public GPUProcessor return mData.ClusterData(); } GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } - GPUhdi() GPUglobalref() GPUTPCSliceOutput* Output() const { return mOutput; } + GPUhdi() GPUglobalref() GPUTPCSectorOutput* Output() const { return mOutput; } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); @@ -104,13 +104,13 @@ class GPUTPCTracker : public GPUProcessor GPUdi() void GetErrors2Seeding(int32_t iRow, const GPUTPCTrackParam& t, float time, float& ErrY2, float& ErrZ2) const { - // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); - Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); + // Param().GetClusterErrors2(mISector, iRow, Param().GetContinuousTracking() != 0. ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, 0.f, 0.f, ErrY2, ErrZ2); + Param().GetClusterErrorsSeeding2(mISector, iRow, Param().par.continuousTracking != 0.f ? 125.f : t.Z(), t.SinPhi(), t.DzDs(), time, ErrY2, ErrZ2); } GPUdi() void GetErrors2Seeding(int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - // Param().GetClusterErrors2(mISlice, iRow, Param().GetContinuousTracking() != 0. ? 125.f : z, sinPhi, DzDs, time, 0.f, 0.f, ErrY2, ErrZ2); - Param().GetClusterErrorsSeeding2(mISlice, iRow, Param().par.continuousTracking != 0.f ? 125.f : z, sinPhi, DzDs, time, ErrY2, ErrZ2); + // Param().GetClusterErrors2(mISector, iRow, Param().GetContinuousTracking() != 0. ? 125.f : z, sinPhi, DzDs, time, 0.f, 0.f, ErrY2, ErrZ2); + Param().GetClusterErrorsSeeding2(mISector, iRow, Param().par.continuousTracking != 0.f ? 125.f : z, sinPhi, DzDs, time, ErrY2, ErrZ2); } void SetupCommonMemory(); @@ -130,15 +130,15 @@ class GPUTPCTracker : public GPUProcessor int16_t MemoryResCommon() const { return mMemoryResCommon; } int16_t MemoryResTracklets() const { return mMemoryResTracklets; } int16_t MemoryResOutput() const { return mMemoryResOutput; } - int16_t MemoryResSliceScratch() const { return mMemoryResSliceScratch; } + int16_t MemoryResSectorScratch() const { return mMemoryResSectorScratch; } void SetMaxData(const GPUTrackingInOutPointers& io); void UpdateMaxData(); - GPUhd() int32_t ISlice() const { return mISlice; } + GPUhd() int32_t ISector() const { return mISector; } - GPUhd() GPUconstantref() const GPUTPCSliceData& Data() const { return mData; } - GPUhdi() GPUconstantref() GPUTPCSliceData& Data() + GPUhd() GPUconstantref() const GPUTPCTrackingData& Data() const { return mData; } + GPUhdi() GPUconstantref() GPUTPCTrackingData& Data() { return mData; } @@ -172,13 +172,13 @@ class GPUTPCTracker : public GPUProcessor GPUhd() int32_t HitInputID(const GPUTPCRow& row, int32_t hitIndex) const { return mData.ClusterDataIndex(row, hitIndex); } /** - * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one - * competing for the same hit. The tracklet that has a higher weight wins. Comparison is done - * using the the number of hits in the tracklet (the more hits it has the more it keeps). If - * tracklets have the same number of hits then it doesn't matter who gets it, but it should be - * only one. So a unique number (row index is good) is added in the least significant part of - * the weight - */ + * The hit weight is used to determine whether a hit belongs to a certain tracklet or another one + * competing for the same hit. The tracklet that has a higher weight wins. Comparison is done + * using the the number of hits in the tracklet (the more hits it has the more it keeps). If + * tracklets have the same number of hits then it doesn't matter who gets it, but it should be + * only one. So a unique number (row index is good) is added in the least significant part of + * the weight + */ GPUdi() static int32_t CalculateHitWeight(int32_t NHits, float chi2) { const float chi2_suppress = 6.f; @@ -210,7 +210,7 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() GPUAtomic(uint32_t) * NTrackHits() const { return &mCommonMem->nTrackHits; } GPUhd() GPUglobalref() GPUTPCHitId* TrackHits() const { return mTrackHits; } - GPUhd() GPUglobalref() GPUTPCRow* SliceDataRows() const { return (mData.Rows()); } + GPUhd() GPUglobalref() GPUTPCRow* TrackingDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } GPUhd() StructGPUParametersConst* GPUParametersConst() @@ -222,7 +222,7 @@ class GPUTPCTracker : public GPUProcessor struct trackSortData { int32_t fTtrack; // Track ID - float fSortVal; // Value to sort for + float fSortVal; // Value to sort for }; void* LinkTmpMemory() { return mLinkTmpMemory; } @@ -237,9 +237,9 @@ class GPUTPCTracker : public GPUProcessor friend class GPUTPCStartHitsFinder; char* mLinkTmpMemory; // tmp memory for hits after neighbours finder - int32_t mISlice; // Number of slice + int32_t mISector; // Number of sector - GPUTPCSliceData mData; // The SliceData object. It is used to encapsulate the storage in memory from the access + GPUTPCTrackingData mData; // The TrackingData object. It is used to encapsulate the storage in memory from the access uint32_t mNMaxStartHits; uint32_t mNMaxRowStartHits; @@ -253,7 +253,7 @@ class GPUTPCTracker : public GPUProcessor int16_t mMemoryResCommon; int16_t mMemoryResTracklets; int16_t mMemoryResOutput; - int16_t mMemoryResSliceScratch; + int16_t mMemoryResSectorScratch; // GPU Temp Arrays GPUglobalref() int32_t* mRowStartHitCountOffset; // Offset, length and new offset of start hits in row @@ -263,16 +263,16 @@ class GPUTPCTracker : public GPUProcessor StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker // event - GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory - GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets - GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets - GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row - GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks - GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers + GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory + GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets + GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets + GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row + GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks + GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers // output - GPUglobalref() GPUTPCSliceOutput* mOutput; // address of pointer pointing to SliceOutput Object - void* mOutputMemory; // Pointer to output memory if stored internally + GPUglobalref() GPUTPCSectorOutput* mOutput; // address of pointer pointing to SectorOutput Object + void* mOutputMemory; // Pointer to output memory if stored internally static int32_t StarthitSortComparison(const void* a, const void* b); }; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx similarity index 90% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index 5c2ed83d47966..ba1727fa602a4 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -13,7 +13,7 @@ /// \author David Rohr #include "GPUTPCTracker.h" -#include "GPUTPCSliceOutput.h" +#include "GPUTPCSectorOutput.h" #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" @@ -29,7 +29,7 @@ using namespace o2::gpu; void GPUTPCTracker::DumpOutput(std::ostream& out) { if (Param().par.earlyTpcTransform) { - out << "\nSlice " << mISlice << "\n"; + out << "\nSector " << mISector << "\n"; const GPUTPCTrack* track = (Output())->GetFirstTrack(); for (uint32_t j = 0; j < (Output())->NTracks(); j++) { out << "Track " << j << " (" << track->NHits() << "): "; @@ -42,10 +42,10 @@ void GPUTPCTracker::DumpOutput(std::ostream& out) } } -void GPUTPCTracker::DumpSliceData(std::ostream& out) +void GPUTPCTracker::DumpTrackingData(std::ostream& out) { - // Dump Slice Input Data to File - out << "\nSlice Data (Slice" << mISlice << "):" << std::endl; + // Dump Sector Input Data to File + out << "\nSector Data (Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -64,7 +64,7 @@ void GPUTPCTracker::DumpSliceData(std::ostream& out) void GPUTPCTracker::DumpLinks(std::ostream& out, int32_t phase) { // Dump Links (after Neighbours Finder / Cleaner) to file - out << "\nHit Links (Phase " << phase << ", Slice" << mISlice << "):" << std::endl; + out << "\nHit Links (Phase " << phase << ", Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -83,7 +83,7 @@ void GPUTPCTracker::DumpLinks(std::ostream& out, int32_t phase) void GPUTPCTracker::DumpHitWeights(std::ostream& out) { // dump hit weights to file - out << "\nHit Weights(Slice" << mISlice << "):" << std::endl; + out << "\nHit Weights(Sector" << mISector << "):" << std::endl; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { if (Row(i).NHits() == 0) { continue; @@ -102,7 +102,7 @@ void GPUTPCTracker::DumpHitWeights(std::ostream& out) void GPUTPCTracker::DumpStartHits(std::ostream& out) { // dump start hits to file - out << "\nStart Hits: (Slice" << mISlice << ") (" << *NStartHits() << ")" << std::endl; + out << "\nStart Hits: (Sector" << mISector << ") (" << *NStartHits() << ")" << std::endl; for (uint32_t i = 0; i < *NStartHits(); i++) { out << TrackletStartHit(i).RowIndex() << "-" << TrackletStartHit(i).HitIndex() << std::endl; } @@ -112,7 +112,7 @@ void GPUTPCTracker::DumpStartHits(std::ostream& out) void GPUTPCTracker::DumpTrackHits(std::ostream& out) { // dump tracks to file - out << "\nTracks: (Slice" << mISlice << ") (" << *NTracks() << ")" << std::endl; + out << "\nTracks: (Sector" << mISector << ") (" << *NTracks() << ")" << std::endl; for (uint32_t j = 0; j < *NTracks(); j++) { if (Tracks()[j].NHits() == 0) { continue; @@ -140,7 +140,7 @@ void GPUTPCTracker::DumpTrackletHits(std::ostream& out) if (nTracklets < 0) { nTracklets = 0; } - out << "\nTracklets: (Slice" << mISlice << ") (" << nTracklets << ")" << std::endl; + out << "\nTracklets: (Sector" << mISector << ") (" << nTracklets << ")" << std::endl; std::vector Ids(nTracklets); std::iota(Ids.begin(), Ids.end(), 0); if (mRec->GetProcessingSettings().deterministicGPUReconstruction) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx similarity index 86% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx index 3cc3e3805dce8..a3e73c377ed44 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx @@ -9,13 +9,13 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceData.cxx +/// \file GPUTPCTrackingData.cxx /// \author Matthias Kretz, Sergey Gorbunov, David Rohr #include "GPUParam.h" #include "GPUTPCClusterData.h" #include "GPUTPCHit.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUTPCConvertImpl.h" @@ -32,7 +32,7 @@ using namespace o2::gpu; #ifndef GPUCA_GPUCODE -void GPUTPCSliceData::InitializeRows(const GPUParam& p) +void GPUTPCTrackingData::InitializeRows(const GPUParam& p) { // initialisation of rows for (int32_t i = 0; i < GPUCA_ROW_COUNT + 1; i++) { @@ -44,34 +44,34 @@ void GPUTPCSliceData::InitializeRows(const GPUParam& p) } } -void GPUTPCSliceData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset) +void GPUTPCTrackingData::SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset) { mClusterData = data; mNumberOfHits = nClusters; mClusterIdOffset = clusterIdOffset; } -void GPUTPCSliceData::SetMaxData() +void GPUTPCTrackingData::SetMaxData() { int32_t hitMemCount = GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT + mNumberOfHits; const uint32_t kVectorAlignment = 256; mNumberOfHitsPlusAlign = GPUProcessor::nextMultipleOf<(kVectorAlignment > GPUCA_ROWALIGNMENT ? kVectorAlignment : GPUCA_ROWALIGNMENT) / sizeof(int32_t)>(hitMemCount); } -void* GPUTPCSliceData::SetPointersLinks(void* mem) +void* GPUTPCTrackingData::SetPointersLinks(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mLinkUpData, mNumberOfHitsPlusAlign); GPUProcessor::computePointerWithAlignment(mem, mLinkDownData, mNumberOfHitsPlusAlign); return mem; } -void* GPUTPCSliceData::SetPointersWeights(void* mem) +void* GPUTPCTrackingData::SetPointersWeights(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mHitWeights, mNumberOfHitsPlusAlign + 16 / sizeof(*mHitWeights)); return mem; } -void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU) +void* GPUTPCTrackingData::SetPointersScratch(void* mem, bool idsOnGPU) { const int32_t firstHitInBinSize = GetGridSize(mNumberOfHits, GPUCA_ROW_COUNT) + GPUCA_ROW_COUNT * GPUCA_ROWALIGNMENT / sizeof(int32_t); GPUProcessor::computePointerWithAlignment(mem, mHitData, mNumberOfHitsPlusAlign); @@ -82,7 +82,7 @@ void* GPUTPCSliceData::SetPointersScratch(void* mem, bool idsOnGPU) return mem; } -void* GPUTPCSliceData::SetPointersClusterIds(void* mem, bool idsOnGPU) +void* GPUTPCTrackingData::SetPointersClusterIds(void* mem, bool idsOnGPU) { if (!idsOnGPU) { GPUProcessor::computePointerWithAlignment(mem, mClusterDataIndex, mNumberOfHitsPlusAlign); @@ -90,7 +90,7 @@ void* GPUTPCSliceData::SetPointersClusterIds(void* mem, bool idsOnGPU) return mem; } -void* GPUTPCSliceData::SetPointersRows(void* mem) +void* GPUTPCTrackingData::SetPointersRows(void* mem) { GPUProcessor::computePointerWithAlignment(mem, mRows, GPUCA_ROW_COUNT + 1); return mem; @@ -98,19 +98,19 @@ void* GPUTPCSliceData::SetPointersRows(void* mem) #endif -GPUd() void GPUTPCSliceData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) +GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; maxZ = maxZ / GPUCA_MIN_BIN_SIZE + 1; } -GPUd() uint32_t GPUTPCSliceData::GetGridSize(uint32_t nHits, uint32_t nRows) +GPUd() uint32_t GPUTPCTrackingData::GetGridSize(uint32_t nHits, uint32_t nRows) { return 128 * nRows + 4 * nHits; } -GPUdi() void GPUTPCSliceData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) +GPUdi() void GPUTPCTrackingData::CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax) { float dz = zMax - zMin; float tfFactor = 1.f; @@ -144,7 +144,7 @@ GPUdi() static void UpdateMinMaxYZ(float& yMin, float& yMax, float& zMin, float& } } -GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) +GPUdii() void GPUTPCTrackingData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) { GPUAtomic(calink)* c = (GPUAtomic(calink)*)mFirstHitInBin + row.mFirstHitInBinOffset; row.mGrid.CreateEmpty(); @@ -161,7 +161,7 @@ GPUdii() void GPUTPCSliceData::SetRowGridEmpty(GPUTPCRow& GPUrestrict() row) } } -GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSlice, float* tmpMinMax) +GPUdii() int32_t GPUTPCTrackingData::InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* GPUrestrict() mem, int32_t iSector, float* tmpMinMax) { #ifdef GPUCA_GPUCODE constexpr bool EarlyTransformWithoutClusterNative = false; @@ -220,8 +220,8 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n float zMin = 1.e6f; float zMax = -1.e6f; - const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSlice][rowIndex]; - const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSlice][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSlice][0]); + const uint32_t NumberOfClusters = EarlyTransformWithoutClusterNative ? NumberOfClustersInRow[rowIndex] : mem->ioPtrs.clustersNative->nClusters[iSector][rowIndex]; + const uint32_t RowOffset = EarlyTransformWithoutClusterNative ? RowOffsets[rowIndex] : (mem->ioPtrs.clustersNative->clusterOffset[iSector][rowIndex] - mem->ioPtrs.clustersNative->clusterOffset[iSector][0]); constexpr const uint32_t maxN = 1u << (sizeof(calink) < 3 ? (sizeof(calink) * 8) : 24); GPUTPCRow& row = mRows[rowIndex]; if (iThread == 0) { @@ -229,7 +229,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n } if (NumberOfClusters >= maxN) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_HITINROW_OVERFLOW, iSlice * 1000 + rowIndex, NumberOfClusters, maxN); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_HITINROW_OVERFLOW, iSector * 1000 + rowIndex, NumberOfClusters, maxN); SetRowGridEmpty(row); } continue; @@ -265,7 +265,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n } else { for (uint32_t i = iThread; i < NumberOfClusters; i += nThreads) { float x, y, z; - GPUTPCConvertImpl::convert(*mem, iSlice, rowIndex, mem->ioPtrs.clustersNative->clusters[iSlice][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSlice][rowIndex][i].getTime(), x, y, z); + GPUTPCConvertImpl::convert(*mem, iSector, rowIndex, mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getPad(), mem->ioPtrs.clustersNative->clusters[iSector][rowIndex][i].getTime(), x, y, z); UpdateMinMaxYZ(yMin, yMax, zMin, zMax, y, z); YZData[RowOffset + i] = CAMath::MakeFloat2(y, z); } @@ -310,7 +310,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n constexpr const int32_t maxBins = sizeof(calink) < 4 ? (int32_t)(1ul << (sizeof(calink) * 8)) : 0x7FFFFFFF; // NOLINT: false warning if (sizeof(calink) < 4 && numberOfBins >= maxBins) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_BIN_OVERFLOW, iSlice * 1000 + rowIndex, numberOfBins, maxBins); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_BIN_OVERFLOW, iSector * 1000 + rowIndex, numberOfBins, maxBins); SetRowGridEmpty(row); } continue; @@ -319,7 +319,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n const uint32_t maxnn = GetGridSize(NumberOfClusters, 1); if (nn >= maxnn) { if (iThread == 0) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_FIRSTHITINBIN_OVERFLOW, iSlice, nn, maxnn); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_FIRSTHITINBIN_OVERFLOW, iSector, nn, maxnn); SetRowGridEmpty(row); } continue; @@ -399,7 +399,7 @@ GPUdii() int32_t GPUTPCSliceData::InitFromClusterData(int32_t nBlocks, int32_t n if (iThread == 0 && !mem->param.par.continuousTracking) { const float maxAbsZ = CAMath::Max(CAMath::Abs(tmpMinMax[2]), CAMath::Abs(tmpMinMax[3])); if (maxAbsZ > 300) { - mem->errorCodes.raiseError(GPUErrors::ERROR_SLICEDATA_Z_OVERFLOW, iSlice, (uint32_t)maxAbsZ); + mem->errorCodes.raiseError(GPUErrors::ERROR_SECTORDATA_Z_OVERFLOW, iSector, (uint32_t)maxAbsZ); SetRowGridEmpty(row); continue; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h similarity index 58% rename from GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index 200a123b9bb83..656bb1c5e68f8 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCSliceData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUTPCSliceData.h +/// \file GPUTPCTrackingData.h /// \author Matthias Kretz, Sergey Gorbunov, David Rohr -#ifndef GPUTPCSLICEDATA_H -#define GPUTPCSLICEDATA_H +#ifndef GPUTPCSECTORDATA_H +#define GPUTPCSECTORDATA_H #include "GPUTPCDef.h" #include "GPUTPCRow.h" @@ -28,13 +28,13 @@ namespace gpu struct GPUTPCClusterData; class GPUTPCHit; -class GPUTPCSliceData +class GPUTPCTrackingData { public: - GPUTPCSliceData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} + GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} #ifndef GPUCA_GPUCODE_DEVICE - ~GPUTPCSliceData() = default; + ~GPUTPCTrackingData() = default; void InitializeRows(const GPUParam& p); void SetMaxData(); void SetClusterData(const GPUTPCClusterData* data, int32_t nClusters, int32_t clusterIdOffset); @@ -45,20 +45,20 @@ class GPUTPCSliceData void* SetPointersRows(void* mem); #endif - GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSlice, float* tmpMinMax); + GPUd() int32_t InitFromClusterData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUconstantref() const GPUConstantMem* mem, int32_t iSector, float* tmpMinMax); /** - * Return the number of hits in this slice. - */ + * Return the number of hits in this sector. + */ GPUhd() int32_t NumberOfHits() const { return mNumberOfHits; } GPUhd() int32_t NumberOfHitsPlusAlign() const { return mNumberOfHitsPlusAlign; } GPUhd() int32_t ClusterIdOffset() const { return mClusterIdOffset; } /** - * Access to the hit links. - * - * The links values give the hit index in the row above/below. Or -1 if there is no link. - */ + * Access to the hit links. + * + * The links values give the hit index in the row above/below. Or -1 if there is no link. + */ GPUd() calink HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const; GPUd() calink HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const; @@ -74,41 +74,41 @@ class GPUTPCSliceData GPUd() void SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value); /** - * Return the y and z coordinate(s) of the given hit(s). - */ + * Return the y and z coordinate(s) of the given hit(s). + */ GPUd() cahit HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const; GPUd() cahit HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const; GPUd() cahit2 HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const; /** - * For a given bin index, content tells how many hits there are in the preceding bins. This maps - * directly to the hit index in the given row. - * - * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. - */ + * For a given bin index, content tells how many hits there are in the preceding bins. This maps + * directly to the hit index in the given row. + * + * \param binIndexes in the range 0 to row.Grid.N + row.Grid.Ny + 3. + */ GPUd() calink FirstHitInBin(const GPUTPCRow& row, calink binIndex) const; /** - * If the given weight is higher than what is currently stored replace with the new weight. - */ + * If the given weight is higher than what is currently stored replace with the new weight. + */ GPUd() void MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); GPUd() void SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight); /** - * Return the maximal weight the given hit got from one tracklet - */ + * Return the maximal weight the given hit got from one tracklet + */ GPUd() int32_t HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const; /** - * Returns the index in the original GPUTPCClusterData object of the given hit - */ + * Returns the index in the original GPUTPCClusterData object of the given hit + */ GPUhd() int32_t ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const; GPUd() GPUglobalref() const int32_t* ClusterDataIndex() const { return mClusterDataIndex; } GPUd() GPUglobalref() int32_t* ClusterDataIndex() { return mClusterDataIndex; } /** - * Return the row object for the given row index. - */ + * Return the row object for the given row index. + */ GPUhdi() GPUglobalref() const GPUTPCRow& Row(int32_t rowIndex) const { return mRows[rowIndex]; } GPUhdi() GPUglobalref() GPUTPCRow* Rows() const { return mRows; } @@ -122,8 +122,8 @@ class GPUTPCSliceData private: #ifndef GPUCA_GPUCODE - GPUTPCSliceData& operator=(const GPUTPCSliceData&) = delete; // ROOT 5 tries to use this if it is not private - GPUTPCSliceData(const GPUTPCSliceData&) = delete; // + GPUTPCTrackingData& operator=(const GPUTPCTrackingData&) = delete; // ROOT 5 tries to use this if it is not private + GPUTPCTrackingData(const GPUTPCTrackingData&) = delete; // #endif GPUd() void CreateGrid(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, float yMin, float yMax, float zMin, float zMax); GPUd() void SetRowGridEmpty(GPUTPCRow& GPUrestrict() row); @@ -133,7 +133,7 @@ class GPUTPCSliceData friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsFinder; - int32_t mNumberOfHits; // the number of hits in this slice + int32_t mNumberOfHits; // the number of hits in this sector int32_t mNumberOfHitsPlusAlign; int32_t mClusterIdOffset; @@ -141,56 +141,56 @@ class GPUTPCSliceData GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions - GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index - GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index - GPUglobalref() cahit2* mHitData; // packed y,z coordinate of the given (global) hit index + GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index + GPUglobalref() calink* mLinkDownData; // hit index in the row below which is linked to the given (global) hit index + GPUglobalref() cahit2* mHitData; // packed y,z coordinate of the given (global) hit index GPUglobalref() int32_t* mClusterDataIndex; // see ClusterDataIndex() /* - * The size of the array is row.Grid.N + row.Grid.Ny + 3. The row.Grid.Ny + 3 is an optimization - * to remove the need for bounds checking. The last values are the same as the entry at [N - 1]. - */ - GPUglobalref() calink* mFirstHitInBin; // see FirstHitInBin - GPUglobalref() GPUAtomic(uint32_t) * mHitWeights; // the weight of the longest tracklet crossed the cluster + * The size of the array is row.Grid.N + row.Grid.Ny + 3. The row.Grid.Ny + 3 is an optimization + * to remove the need for bounds checking. The last values are the same as the entry at [N - 1]. + */ + GPUglobalref() calink* mFirstHitInBin; // see FirstHitInBin + GPUglobalref() GPUAtomic(uint32_t) * mHitWeights; // the weight of the longest tracklet crossed the cluster GPUglobalref() const GPUTPCClusterData* mClusterData; }; -GPUdi() calink GPUTPCSliceData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCTrackingData::HitLinkUpData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkUpData[row.mHitNumberOffset + hitIndex]; } -GPUdi() calink GPUTPCSliceData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } +GPUdi() calink GPUTPCTrackingData::HitLinkDownData(const GPUTPCRow& row, const calink& hitIndex) const { return mLinkDownData[row.mHitNumberOffset + hitIndex]; } -GPUdi() void GPUTPCSliceData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCTrackingData::SetHitLinkUpData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkUpData[row.mHitNumberOffset + hitIndex] = value; } -GPUdi() void GPUTPCSliceData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) +GPUdi() void GPUTPCTrackingData::SetHitLinkDownData(const GPUTPCRow& row, const calink& hitIndex, const calink& value) { mLinkDownData[row.mHitNumberOffset + hitIndex] = value; } -GPUdi() cahit GPUTPCSliceData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } +GPUdi() cahit GPUTPCTrackingData::HitDataY(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].x; } -GPUdi() cahit GPUTPCSliceData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } +GPUdi() cahit GPUTPCTrackingData::HitDataZ(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex].y; } -GPUdi() cahit2 GPUTPCSliceData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } +GPUdi() cahit2 GPUTPCTrackingData::HitData(const GPUTPCRow& row, const uint32_t& hitIndex) const { return mHitData[row.mHitNumberOffset + hitIndex]; } -GPUdi() calink GPUTPCSliceData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } +GPUdi() calink GPUTPCTrackingData::FirstHitInBin(const GPUTPCRow& row, calink binIndex) const { return mFirstHitInBin[row.mFirstHitInBinOffset + binIndex]; } -GPUhdi() int32_t GPUTPCSliceData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } +GPUhdi() int32_t GPUTPCTrackingData::ClusterDataIndex(const GPUTPCRow& row, uint32_t hitIndex) const { return mClusterDataIndex[row.mHitNumberOffset + hitIndex]; } -GPUdi() void GPUTPCSliceData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCTrackingData::MaximizeHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { CAMath::AtomicMax(&mHitWeights[row.mHitNumberOffset + hitIndex], weight); } -GPUdi() void GPUTPCSliceData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) +GPUdi() void GPUTPCTrackingData::SetHitWeight(const GPUTPCRow& row, uint32_t hitIndex, uint32_t weight) { mHitWeights[row.mHitNumberOffset + hitIndex] = weight; } -GPUdi() int32_t GPUTPCSliceData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } +GPUdi() int32_t GPUTPCTrackingData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } } // namespace gpu } // namespace o2 -#endif // GPUTPCSLICEDATA_H +#endif // GPUTPCSECTORDATA_H diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h similarity index 86% rename from GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 873368f1635a0..6d6d466c903b5 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -32,7 +32,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } @@ -48,11 +48,11 @@ class GPUTPCTracklet GPUhd() void SetHitWeight(const int32_t w) { mHitWeight = w; } private: - int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! - int32_t mLastRow; // last TPC row + int32_t mFirstRow; // first TPC row // TODO: We can use smaller data format here! + int32_t mLastRow; // last TPC row GPUTPCBaseTrackParam mParam; // tracklet parameters - int32_t mHitWeight; // Hit Weight of Tracklet - uint32_t mFirstHit; // first hit in row hit array + int32_t mHitWeight; // Hit Weight of Tracklet + uint32_t mFirstHit; // first hit in row hit array }; } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx similarity index 91% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 04833375ad6df..3aac31c87498c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -67,13 +67,13 @@ GPUd() void GPUTPCTrackletConstructor::StoreTracklet(int32_t /*nBlocks*/, int32_ uint32_t hitout = CAMath::AtomicAdd(tracker.NRowHits(), nHits); if (hitout + nHits > tracker.NMaxRowHits()) { - tracker.raiseError(GPUErrors::ERROR_TRACKLET_HIT_OVERFLOW, tracker.ISlice(), hitout + nHits, tracker.NMaxRowHits()); + tracker.raiseError(GPUErrors::ERROR_TRACKLET_HIT_OVERFLOW, tracker.ISector(), hitout + nHits, tracker.NMaxRowHits()); CAMath::AtomicExch(tracker.NRowHits(), tracker.NMaxRowHits()); return; } uint32_t itrout = CAMath::AtomicAdd(tracker.NTracklets(), 1u); if (itrout >= tracker.NMaxTracklets()) { - tracker.raiseError(GPUErrors::ERROR_TRACKLET_OVERFLOW, tracker.ISlice(), itrout, tracker.NMaxTracklets()); + tracker.raiseError(GPUErrors::ERROR_TRACKLET_OVERFLOW, tracker.ISector(), itrout, tracker.NMaxTracklets()); CAMath::AtomicExch(tracker.NTracklets(), tracker.NMaxTracklets()); return; } @@ -132,14 +132,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float y = y0 + hh.x * stepY; float z = z0 + hh.y * stepZ; if (iRow != r.mStartRow || !tracker.Param().par.continuousTracking) { - tParam.ConstrainZ(z, tracker.ISlice(), z0, r.mLastZ); - tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, z); + tParam.ConstrainZ(z, tracker.ISector(), z0, r.mLastZ); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISector(), iRow, x, y, z); } if (iRow == r.mStartRow) { if (tracker.Param().par.continuousTracking) { float refZ = ((z > 0) ? tracker.Param().rec.tpc.defaultZOffsetOverR : -tracker.Param().rec.tpc.defaultZOffsetOverR) * x; float zTmp = refZ; - tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISlice(), iRow, x, y, zTmp); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->TransformXYZ(tracker.ISector(), iRow, x, y, zTmp); z += zTmp - refZ; // Add zCorrection (=zTmp - refZ) to z, such that zOffset is set such, that transformed (z - zOffset) becomes refZ tParam.SetZOffset(z - refZ); tParam.SetZ(refZ); @@ -194,7 +194,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, tracker.GetErrors2Seeding(iRow, tParam.GetZ(), sinPhi, tParam.GetDzDs(), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { - const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); + const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISector() >= 18); err2Y += sErr2; err2Z += sErr2; const float kFactor = tracker.Param().rec.tpc.hitPickUpFactor * tracker.Param().rec.tpc.hitPickUpFactor * 3.5f * 3.5f; @@ -262,8 +262,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, rowHit = CALINK_INVAL; break; } - tParam.ConstrainZ(tmpZ, tracker.ISlice(), z0, r.mLastZ); - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); + tParam.ConstrainZ(tmpZ, tracker.ISector(), z0, r.mLastZ); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISector(), iRow, tmpY, tmpZ, x); } CADEBUG(printf("%14s: SEA TRACK ROW %3d X %8.3f -", "", iRow, tParam.X()); for (int32_t i = 0; i < 5; i++) { printf(" %8.3f", tParam.Par()[i]); } printf(" -"); for (int32_t i = 0; i < 15; i++) { printf(" %8.3f", tParam.Cov()[i]); } printf("\n")); @@ -286,7 +286,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); #endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISlice(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISector(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { rowHit = CALINK_INVAL; @@ -297,7 +297,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, float err2Y, err2Z; tracker.GetErrors2Seeding(iRow, *((GPUTPCTrackParam*)&tParam), -1.f, err2Y, err2Z); // TODO: Use correct time if (r.mNHits >= 10) { - const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISlice() >= 18); + const float sErr2 = tracker.Param().GetSystematicClusterErrorIFC2(x, tParam.GetY(), tParam.GetZ(), tracker.ISector() >= 18); err2Y += sErr2; err2Z += sErr2; } @@ -377,8 +377,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (false); (void)found; if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { - uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISlice(), iRow, yUncorrected)); - if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISlice(), iRow, pad)) { + uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); + if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { r.mNMissed--; rowHit = CALINK_DEAD_CHANNEL; } @@ -446,12 +446,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT { float tmpY, tmpZ; if (tParam.GetPropagatedYZ(tracker.Param().bzCLight, x, tmpY, tmpZ)) { - if (tracker.ISlice() < GPUCA_NSLICES / 2 ? (tmpZ < 0) : (tmpZ > 0)) { + if (tracker.ISector() < GPUCA_NSECTORS / 2 ? (tmpZ < 0) : (tmpZ > 0)) { tmpZ = 0; - } else if (tracker.ISlice() < GPUCA_NSLICES / 2 ? (tmpZ > GPUTPCGeometry::TPCLength()) : (tmpZ < -GPUTPCGeometry::TPCLength())) { - tmpZ = tracker.ISlice() < GPUCA_NSLICES / 2 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); + } else if (tracker.ISector() < GPUCA_NSECTORS / 2 ? (tmpZ > GPUTPCGeometry::TPCLength()) : (tmpZ < -GPUTPCGeometry::TPCLength())) { + tmpZ = tracker.ISector() < GPUCA_NSECTORS / 2 ? GPUTPCGeometry::TPCLength() : -GPUTPCGeometry::TPCLength(); } - tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISlice(), iRow, tmpY, tmpZ, x); + tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoX(tracker.ISector(), iRow, tmpY, tmpZ, x); } else { r.mGo = 0; continue; @@ -476,12 +476,12 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.SliceDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); + CA_SHARED_CACHE(&sMem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); GPUbarrier(); GPUTPCThreadMemory rMem; @@ -492,19 +492,19 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) { GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; #ifdef GPUCA_GPUCODE - int32_t mySlice = get_group_id(0) % GPUCA_NSLICES; - int32_t currentSlice = -1; + int32_t mySector = get_group_id(0) % GPUCA_NSECTORS; + int32_t currentSector = -1; if (get_local_id(0) == 0) { sMem.mNextStartHitFirstRun = 1; } GPUCA_UNROLL(, U()) - for (uint32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySlice]; + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySector]; GPUTPCThreadMemory rMem; @@ -515,13 +515,13 @@ GPUdii() void GPUTPCTrackletConstructor::Thread= 0 && rMem.mISH < sMem.mNStartHits) { @@ -529,13 +529,13 @@ GPUdii() void GPUTPCTrackletConstructor::Thread= GPUCA_NSLICES) { - mySlice = 0; + if (++mySector >= GPUCA_NSECTORS) { + mySector = 0; } } #else - for (int32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSlice]); + for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSector]); } #endif } @@ -549,7 +549,7 @@ GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() GPUTPCT if (get_local_id(0) == 0) { int32_t firstStartHit = -2; if (sMem.mNextStartHitFirstRun == 1) { - firstStartHit = (get_group_id(0) - tracker.ISlice()) / GPUCA_NSLICES * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); + firstStartHit = (get_group_id(0) - tracker.ISector()) / GPUCA_NSECTORS * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); sMem.mNextStartHitFirstRun = 0; } else { if (tracker.GPUParameters()->nextStartHit < nStartHit) { diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h similarity index 76% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 9af1eeb0ae7b2..a98fe9af0e74c 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -34,8 +34,8 @@ class GPUTPCTrackletConstructor { public: enum K { - singleSlice = 0, - allSlices = 1 + singleSector = 0, + allSectors = 1 }; class GPUTPCThreadMemory @@ -53,31 +53,31 @@ class GPUTPCTrackletConstructor protected: // WARNING: This data is copied element by element in CopyTrackletTempData. Changes to members of this class must be reflected in CopyTrackletTempData!!! - int32_t mISH; // track index - int32_t mFirstRow; // first row index - int32_t mLastRow; // last row index - int32_t mStartRow; // row index of first hit in seed - int32_t mEndRow; // row index of last hit in seed - calink mCurrIH; // indef of the current hit - int8_t mGo; // do fit/searching flag - int32_t mStage; // reco stage - int32_t mNHits; // n track hits + int32_t mISH; // track index + int32_t mFirstRow; // first row index + int32_t mLastRow; // last row index + int32_t mStartRow; // row index of first hit in seed + int32_t mEndRow; // row index of last hit in seed + calink mCurrIH; // indef of the current hit + int8_t mGo; // do fit/searching flag + int32_t mStage; // reco stage + int32_t mNHits; // n track hits int32_t mNHitsEndRow; // n hits at end row int32_t mNMissed; // n missed hits during search - float mLastY; // Y of the last fitted cluster - float mLastZ; // Z of the last fitted cluster + float mLastY; // Y of the last fitted cluster + float mLastZ; // Z of the last fitted cluster }; struct GPUSharedMemory { - CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows - int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration - int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration - int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? - int32_t mNStartHits; // Total number of start hits + CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows + int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration + int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration + int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? + int32_t mNStartHits; // Total number of start hits #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE int32_t fMaxSync; // temporary shared variable during profile creation -#endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE +#endif // GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE }; GPUd() static void InitTracklet(GPUTPCTrackParam& tParam); @@ -99,7 +99,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index d5492602a4283..8810b692e1377 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -86,13 +86,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread if (nHits >= minHits) { uint32_t nFirstTrackHit = CAMath::AtomicAdd(tracker.NTrackHits(), (uint32_t)nHits); if (nFirstTrackHit + nHits > tracker.NMaxTrackHits()) { - tracker.raiseError(GPUErrors::ERROR_TRACK_HIT_OVERFLOW, tracker.ISlice(), nFirstTrackHit + nHits, tracker.NMaxTrackHits()); + tracker.raiseError(GPUErrors::ERROR_TRACK_HIT_OVERFLOW, tracker.ISector(), nFirstTrackHit + nHits, tracker.NMaxTrackHits()); CAMath::AtomicExch(tracker.NTrackHits(), tracker.NMaxTrackHits()); return; } uint32_t itrout = CAMath::AtomicAdd(tracker.NTracks(), 1u); if (itrout >= tracker.NMaxTracks()) { - tracker.raiseError(GPUErrors::ERROR_TRACK_OVERFLOW, tracker.ISlice(), itrout, tracker.NMaxTracks()); + tracker.raiseError(GPUErrors::ERROR_TRACK_OVERFLOW, tracker.ISector(), itrout, tracker.NMaxTracks()); CAMath::AtomicExch(tracker.NTracks(), tracker.NMaxTracks()); return; } diff --git a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h similarity index 97% rename from GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h rename to GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index 80a29d21edac3..52ebbf2af1eeb 100644 --- a/GPU/GPUTracking/SliceTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -44,7 +44,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSliceTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 53ed77fe62d8c..58866224943c0 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -578,7 +578,7 @@ int32_t LoadEvent(int32_t iEvent, int32_t x) if (!configStandalone.runTransformation) { chainTracking->mIOPtrs.clustersNative = nullptr; } else { - for (int32_t i = 0; i < chainTracking->NSLICES; i++) { + for (int32_t i = 0; i < chainTracking->NSECTORS; i++) { if (chainTracking->mIOPtrs.rawClusters[i]) { if (configStandalone.proc.debugLevel >= 2) { printf("Converting Legacy Raw Cluster to Native\n"); @@ -687,7 +687,7 @@ int32_t RunBenchmark(GPUReconstruction* recUse, GPUChainTracking* chainTrackingU chainTrackingAsync->mIOPtrs.nMCInfosTPCCol = 0; chainTrackingAsync->mIOPtrs.mcLabelsTPC = nullptr; chainTrackingAsync->mIOPtrs.nMCLabelsTPC = 0; - for (int32_t i = 0; i < chainTracking->NSLICES; i++) { + for (int32_t i = 0; i < chainTracking->NSECTORS; i++) { chainTrackingAsync->mIOPtrs.clusterData[i] = nullptr; chainTrackingAsync->mIOPtrs.nClusterData[i] = 0; chainTrackingAsync->mIOPtrs.rawClusters[i] = nullptr; diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 32cdb246cf417..1c61316ed454e 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -144,7 +144,7 @@ include_directories(${GPU_DIR}/Common ${GPUTRACKING_DIR}/Merger ${GPUTRACKING_DIR}/Refit ${GPUTRACKING_DIR}/qa - ${GPUTRACKING_DIR}/SliceTracker + ${GPUTRACKING_DIR}/SectorTracker ${GPUTRACKING_DIR}/DataCompression ${GPUTRACKING_DIR}/TRDTracking ${GPUTRACKING_DIR}/TPCClusterFinder diff --git a/GPU/GPUTracking/Standalone/tools/createGeo.sh b/GPU/GPUTracking/Standalone/tools/createGeo.sh index 33973cf26079a..a449e1997a16e 100755 --- a/GPU/GPUTracking/Standalone/tools/createGeo.sh +++ b/GPU/GPUTracking/Standalone/tools/createGeo.sh @@ -7,5 +7,5 @@ alienv load O2/latest o2-sim -n 1 -export ROOT_INCLUDE_PATH=$ROOT_INCLUDE_PATH:/home/qon/alice/GPU/Common/:/home/qon/alice/GPU/GPUTracking/Base:/home/qon/alice/GPU/GPUTracking/SliceTracker:/home/qon/alice/GPU/GPUTracking/Merger:/home/qon/alice/GPU/GPUTracking/TRDTracking +export ROOT_INCLUDE_PATH=$ROOT_INCLUDE_PATH:/home/qon/alice/GPU/Common/:/home/qon/alice/GPU/GPUTracking/Base:/home/qon/alice/GPU/GPUTracking/SectorTracker:/home/qon/alice/GPU/GPUTracking/Merger:/home/qon/alice/GPU/GPUTracking/TRDTracking root -l -q -b createGeo.C+ diff --git a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h index de5dfe9e1d683..dcea2bf8e966c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfFragment.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfFragment.h @@ -27,9 +27,9 @@ struct CfFragment { OverlapTimebins = 8, }; - // Time offset of this sub slice within the entire time slice + // Time offset of this sub sector within the entire time sector tpccf::TPCTime start = 0; - // Number of time bins to process in this slice + // Number of time bins to process in this sector tpccf::TPCFragmentTime length = 0; size_t digitsStart = 0; // Start digits in this fragment. Only used when zero suppression is skipped @@ -38,23 +38,23 @@ struct CfFragment { bool hasBacklog = false; bool hasFuture = false; - tpccf::TPCTime totalSliceLength = 0; - tpccf::TPCFragmentTime maxSubSliceLength = 0; + tpccf::TPCTime totalSectorLength = 0; + tpccf::TPCFragmentTime maxSubSectorLength = 0; GPUdDefault() CfFragment() = default; - GPUd() CfFragment(tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen) : CfFragment(0, false, 0, totalSliceLen, maxSubSliceLen) {} + GPUd() CfFragment(tpccf::TPCTime totalSectorLen, tpccf::TPCFragmentTime maxSubSectorLen) : CfFragment(0, false, 0, totalSectorLen, maxSubSectorLen) {} GPUdi() bool isEnd() const { return length == 0; } GPUdi() CfFragment next() const { - return CfFragment{index + 1, hasFuture, tpccf::TPCTime(start + length - (hasFuture ? 2 * OverlapTimebins : 0)), totalSliceLength, maxSubSliceLength}; + return CfFragment{index + 1, hasFuture, tpccf::TPCTime(start + length - (hasFuture ? 2 * OverlapTimebins : 0)), totalSectorLength, maxSubSectorLength}; } GPUdi() uint32_t count() const { - return (totalSliceLength + maxSubSliceLength - 4 * OverlapTimebins - 1) / (maxSubSliceLength - 2 * OverlapTimebins); + return (totalSectorLength + maxSubSectorLength - 4 * OverlapTimebins - 1) / (maxSubSectorLength - 2 * OverlapTimebins); } GPUdi() tpccf::TPCTime first() const @@ -104,16 +104,16 @@ struct CfFragment { } private: - GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen) + GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSectorLen, tpccf::TPCFragmentTime maxSubSectorLen) { this->index = index_; this->hasBacklog = hasBacklog_; this->start = start_; - tpccf::TPCTime remainder = totalSliceLen - start; - this->hasFuture = remainder > tpccf::TPCTime(maxSubSliceLen); - this->length = hasFuture ? maxSubSliceLen : remainder; - this->totalSliceLength = totalSliceLen; - this->maxSubSliceLength = maxSubSliceLen; + tpccf::TPCTime remainder = totalSectorLen - start; + this->hasFuture = remainder > tpccf::TPCTime(maxSubSectorLen); + this->length = hasFuture ? maxSubSectorLen : remainder; + this->totalSectorLength = totalSectorLen; + this->maxSubSectorLength = maxSubSectorLen; } }; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h index d6107a6503e86..0dc691ae6044a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h @@ -29,10 +29,10 @@ namespace gpu struct GPUTPCCFChainContext { struct FragmentData { - uint32_t nDigits[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - uint32_t nPages[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - std::vector pageDigits[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - GPUTPCClusterFinder::MinMaxCN minMaxCN[GPUCA_NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + uint32_t nDigits[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + uint32_t nPages[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + std::vector pageDigits[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + GPUTPCClusterFinder::MinMaxCN minMaxCN[GPUCA_NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; }; struct PtrSave { @@ -45,21 +45,21 @@ struct GPUTPCCFChainContext { std::vector fragmentData; uint32_t nPagesTotal; uint32_t nPagesFragmentMax; - uint32_t nPagesSector[GPUCA_NSLICES]; - uint32_t nDigitsEndpointMax[GPUCA_NSLICES]; + uint32_t nPagesSector[GPUCA_NSECTORS]; + uint32_t nDigitsEndpointMax[GPUCA_NSECTORS]; uint32_t tpcMaxTimeBin; bool abandonTimeframe; uint32_t nFragments; CfFragment fragmentFirst; - std::pair nextPos[GPUCA_NSLICES]; - PtrSave ptrSave[GPUCA_NSLICES]; + std::pair nextPos[GPUCA_NSECTORS]; + PtrSave ptrSave[GPUCA_NSECTORS]; const o2::tpc::ClusterNativeAccess* ptrClusterNativeSave; void prepare(bool tpcZS, const CfFragment& fragmentMax) { abandonTimeframe = false; nPagesTotal = nPagesFragmentMax = 0; - for (uint32_t i = 0; i < GPUCA_NSLICES; i++) { + for (uint32_t i = 0; i < GPUCA_NSECTORS; i++) { nPagesSector[i] = 0; nDigitsEndpointMax[i] = 0; } @@ -72,7 +72,7 @@ struct GPUTPCCFChainContext { } for (uint32_t i = 0; i < nFragments; i++) { - for (uint32_t j = 0; j < GPUCA_NSLICES; j++) { + for (uint32_t j = 0; j < GPUCA_NSECTORS; j++) { for (uint32_t k = 0; k < GPUTrackingInOutZS::NENDPOINTS; k++) { fragmentData[i].nDigits[j][k] = fragmentData[i].nPages[j][k] = 0; fragmentData[i].pageDigits[j][k].clear(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx index 8f184836de6df..8dbc5804f8fb8 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx @@ -64,7 +64,7 @@ GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); positions[idx] = pos; float q = digit.getChargeFloat(); - q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISlice, digit.getRow(), digit.getPad()); + q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISector, digit.getRow(), digit.getPad()); chargeMap[pos] = PackedCharge(q); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index ad07f2b93f3e0..1aeae812f5193 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -261,7 +261,7 @@ GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, c if (index < maxElemsPerBucket) { buckets[maxElemsPerBucket * row + index] = cluster; } else { - clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISlice * 1000 + row, index, maxElemsPerBucket); + clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); } return index; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 3727e23bcf16c..e7634fa397bae 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -42,13 +42,13 @@ GPUdii() void GPUTPCCFDecodeZS::Thread(int32_t nBloc GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUSharedMemory& s, int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, int32_t firstHBF) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; #ifdef GPUCA_GPUCODE const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint; #else const uint32_t endpoint = iBlock; #endif - const GPUTrackingInOutZS::GPUTrackingInOutZSSlice& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->slice[slice]; + const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector]; if (zs.count[endpoint] == 0) { return; } @@ -179,7 +179,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared if (inFragment) { float q = float(byte & mask) * decodeBitsFactor; - q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(slice, row, pad); + q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, row, pad); chargeMap[pos] = PackedCharge(q); } pad++; @@ -277,7 +277,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, processorT (void)nDecoded; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0 && nDecoded != decHdr->nADCsamples) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISlice * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHdr->cruID, decHdr->nADCsamples, nDecoded); /*#ifndef GPUCA_GPUCODE FILE* foo = fopen("dump.bin", "w+b"); fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo); @@ -463,7 +463,7 @@ GPUd() bool GPUTPCCFDecodeZSLink::ChannelIsActive(const uint32_t* chan, uint8_t template GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, typename Decoder::GPUSharedMemory& smem, processorType& clusterer, int32_t firstHBF) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; #ifdef GPUCA_GPUCODE const uint32_t endpoint = clusterer.mPzsOffsets[iBlock].endpoint; @@ -471,7 +471,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, const uint32_t endpoint = iBlock; #endif - const GPUTrackingInOutZS::GPUTrackingInOutZSSlice& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->slice[slice]; + const GPUTrackingInOutZS::GPUTrackingInOutZSSector& zs = clusterer.GetConstantMem()->ioPtrs.tpcZS->sector[sector]; if (zs.count[endpoint] == 0) { return; } @@ -514,7 +514,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::Decode(int32_t nBlocks, int32_t nThreads, if (iThread == 0 && iBlock < nBlocks - 1) { uint32_t maxOffset = clusterer.mPzsOffsets[iBlock + 1].offset; if (pageDigitOffset != maxOffset) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISlice * 1000 + endpoint, pageDigitOffset, maxOffset); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_OFFSET, clusterer.mISector * 1000 + endpoint, pageDigitOffset, maxOffset); } } #endif @@ -550,12 +550,12 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset) { - const uint32_t slice = clusterer.mISlice; + const uint32_t sector = clusterer.mISector; ChargePos* positions = clusterer.mPpositions; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (padAndRow.getRow() >= GPUCA_ROW_COUNT) { positions[positionOffset] = INVALID_CHARGE_POS; - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISlice * 1000 + padAndRow.getRow()); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_ROW, clusterer.mISector * 1000 + padAndRow.getRow()); return; } #endif @@ -564,7 +564,7 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); positions[positionOffset] = pos; - charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(slice, padAndRow.getRow(), padAndRow.getPad()); + charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); chargeMap[pos] = PackedCharge(charge); } @@ -627,7 +627,7 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten); #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISlice * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage)); } #endif } @@ -642,7 +642,7 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (iThread == 0 && nSamplesWritten != nSamplesInPage) { - clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISlice * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten); + clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten); /*#ifndef GPUCA_GPUCODE FILE* foo = fopen("dump.bin", "w+b"); fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index edc4fd6bab56c..efed3643800b6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -120,7 +120,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread bufferSize) { - clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISlice, nFinal, bufferSize); + clusterer.raiseError(stage ? GPUErrors::ERROR_CF_CLUSTER_OVERFLOW : GPUErrors::ERROR_CF_PEAK_OVERFLOW, clusterer.mISector, nFinal, bufferSize); nFinal = bufferSize; } if (stage) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index e009ac12389b4..613c4ad9e5fa6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -103,11 +103,11 @@ void GPUTPCClusterFinder::RegisterMemoryAllocation() if (mRec->GetProcessingSettings().runMC) { scratchType |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; } - mScratchId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersScratch, scratchType, "TPCClustererScratch", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererScratch, (uint16_t)(mISlice % mRec->GetProcessingSettings().nTPCClustererLanes)}); + mScratchId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersScratch, scratchType, "TPCClustererScratch", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererScratch, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersMemory, GPUMemoryResource::MEMORY_PERMANENT, "TPCClustererMemory"); mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | GPUMemoryResource::MEMORY_STACK, "TPCClustererOutput"); - mZSId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZS, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSData", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererZS, (uint16_t)(mISlice % mRec->GetProcessingSettings().nTPCClustererLanes)}); + mZSId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZS, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSData", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::ClustererZS, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); mZSOffsetId = mRec->RegisterMemoryAllocation(this, &GPUTPCClusterFinder::SetPointersZSOffset, GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_CUSTOM_TRANSFER | GPUMemoryResource::MEMORY_INPUT | GPUMemoryResource::MEMORY_STACK, "TPCClustererZSOffsets"); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index a02d32f250604..f59102aa6b5c3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -61,7 +61,7 @@ class GPUTPCClusterFinder : public GPUProcessor tpccf::SizeT nPeaks = 0; tpccf::SizeT nClusters = 0; uint32_t maxTimeBin = 0; - uint32_t nPagesSubslice = 0; + uint32_t nPagesSubsector = 0; } counters; CfFragment fragment; }; @@ -123,7 +123,7 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t mPlabelsHeaderGlobalOffset = 0; uint32_t mPlabelsDataGlobalOffset = 0; - int32_t mISlice = 0; + int32_t mISector = 0; constexpr static int32_t mScanWorkGroupSize = GPUCA_THREAD_COUNT_SCAN; uint32_t mNMaxClusterPerRow = 0; uint32_t mNMaxClusters = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index eb5d7505eea22..a9fbc1b5f40e0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -24,7 +24,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) { const auto nPositions = mPmemory->counters.nPositions; - out << "\nClusterer - Digits - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << ": " << nPositions << "\n"; + out << "\nClusterer - Digits - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << ": " << nPositions << "\n"; out << std::hex; for (size_t i = 0; i < mPmemory->counters.nPositions; i++) { @@ -36,7 +36,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view title) { - out << "\nClusterer - " << title << " - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; Array2D map(mPchargeMap); out << std::hex; @@ -69,7 +69,7 @@ void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view titl void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) { - out << "\nClusterer - " << title << " - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; Array2D map(mPpeakMap); @@ -105,7 +105,7 @@ void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) void GPUTPCClusterFinder::DumpPeaks(std::ostream& out) { - out << "\nClusterer - Peaks - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - Peaks - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; for (uint32_t i = 0; i < mPmemory->counters.nPositions; i++) { out << int32_t{mPisPeak[i]}; if ((i + 1) % 100 == 0) { @@ -118,7 +118,7 @@ void GPUTPCClusterFinder::DumpPeaksCompacted(std::ostream& out) { const auto nPeaks = mPmemory->counters.nPeaks; - out << "\nClusterer - Compacted Peaks - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << ": " << nPeaks << "\n"; + out << "\nClusterer - Compacted Peaks - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << ": " << nPeaks << "\n"; for (size_t i = 0; i < nPeaks; i++) { const auto& pos = mPpeakPositions[i]; out << pos.time() << " " << int32_t{pos.pad()} << " " << int32_t{pos.row()} << "\n"; @@ -130,7 +130,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaks(std::ostream& out) const auto& fragment = mPmemory->fragment; const auto nPeaks = mPmemory->counters.nPeaks; - out << "\nClusterer - NoiseSuppression - Slice " << mISlice << " - Fragment " << fragment.index << mISlice << "\n"; + out << "\nClusterer - NoiseSuppression - Sector " << mISector << " - Fragment " << fragment.index << mISector << "\n"; for (uint32_t i = 0; i < nPeaks; i++) { out << int32_t{mPisPeak[i]}; if ((i + 1) % 100 == 0) { @@ -144,7 +144,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out) const auto& fragment = mPmemory->fragment; const auto nPeaks = mPmemory->counters.nClusters; - out << "\nClusterer - Noise Suppression Peaks Compacted - Slice " << mISlice << " - Fragment " << fragment.index << ": " << nPeaks << "\n"; + out << "\nClusterer - Noise Suppression Peaks Compacted - Sector " << mISector << " - Fragment " << fragment.index << ": " << nPeaks << "\n"; for (size_t i = 0; i < nPeaks; i++) { const auto& peak = mPfilteredPeakPositions[i]; out << peak.time() << " " << int32_t{peak.pad()} << " " << int32_t{peak.row()} << "\n"; @@ -153,7 +153,7 @@ void GPUTPCClusterFinder::DumpSuppressedPeaksCompacted(std::ostream& out) void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { - out << "\nClusterer - Clusters - Slice " << mISlice << " - Fragment " << mPmemory->fragment.index << "\n"; + out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { size_t N = mPclusterInRow[i]; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h index 222c2ffa65648..52beb195d7c83 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h @@ -39,10 +39,10 @@ class GPUTPCConvert : public GPUProcessor void* SetPointersMemory(void* mem); #endif - constexpr static uint32_t NSLICES = GPUCA_NSLICES; + constexpr static uint32_t NSECTORS = GPUCA_NSECTORS; struct Memory { - GPUTPCClusterData* clusters[NSLICES]; + GPUTPCClusterData* clusters[NSECTORS]; }; protected: diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h index 8dfe4ac6c28bc..4a142debcaafa 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h @@ -28,20 +28,20 @@ namespace gpu class GPUTPCConvertImpl { public: - GPUd() static void convert(const GPUConstantMem& GPUrestrict() cm, int32_t slice, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) + GPUd() static void convert(const GPUConstantMem& GPUrestrict() cm, int32_t sector, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) { if (cm.param.par.continuousTracking) { - cm.calibObjects.fastTransformHelper->getCorrMap()->TransformInTimeFrame(slice, row, pad, time, x, y, z, cm.param.continuousMaxTimeBin); + cm.calibObjects.fastTransformHelper->getCorrMap()->TransformInTimeFrame(sector, row, pad, time, x, y, z, cm.param.continuousMaxTimeBin); } else { - cm.calibObjects.fastTransformHelper->Transform(slice, row, pad, time, x, y, z); + cm.calibObjects.fastTransformHelper->Transform(sector, row, pad, time, x, y, z); } } - GPUd() static void convert(const TPCFastTransform& GPUrestrict() transform, const GPUParam& GPUrestrict() param, int32_t slice, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) + GPUd() static void convert(const TPCFastTransform& GPUrestrict() transform, const GPUParam& GPUrestrict() param, int32_t sector, int32_t row, float pad, float time, float& GPUrestrict() x, float& GPUrestrict() y, float& GPUrestrict() z) { if (param.par.continuousTracking) { - transform.TransformInTimeFrame(slice, row, pad, time, x, y, z, param.continuousMaxTimeBin); + transform.TransformInTimeFrame(sector, row, pad, time, x, y, z, param.continuousMaxTimeBin); } else { - transform.Transform(slice, row, pad, time, x, y, z); + transform.Transform(sector, row, pad, time, x, y, z); } } }; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx index dc01b3782daf9..e17bfc1dff025 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.cxx @@ -24,18 +24,18 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCConvertKernel::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) { - const int32_t iSlice = iBlock / GPUCA_ROW_COUNT; + const int32_t iSector = iBlock / GPUCA_ROW_COUNT; const int32_t iRow = iBlock % GPUCA_ROW_COUNT; GPUTPCConvert& GPUrestrict() convert = processors.tpcConverter; const o2::tpc::ClusterNativeAccess* GPUrestrict() native = processors.ioPtrs.clustersNative; - GPUTPCClusterData* GPUrestrict() clusters = convert.mMemory->clusters[iSlice]; - const int32_t idOffset = native->clusterOffset[iSlice][iRow]; - const int32_t indexOffset = native->clusterOffset[iSlice][iRow] - native->clusterOffset[iSlice][0]; + GPUTPCClusterData* GPUrestrict() clusters = convert.mMemory -> clusters[iSector]; + const int32_t idOffset = native->clusterOffset[iSector][iRow]; + const int32_t indexOffset = native->clusterOffset[iSector][iRow] - native->clusterOffset[iSector][0]; - for (uint32_t k = get_local_id(0); k < native->nClusters[iSlice][iRow]; k += get_local_size(0)) { - const auto& GPUrestrict() clin = native->clusters[iSlice][iRow][k]; + for (uint32_t k = get_local_id(0); k < native->nClusters[iSector][iRow]; k += get_local_size(0)) { + const auto& GPUrestrict() clin = native -> clusters[iSector][iRow][k]; float x, y, z; - GPUTPCConvertImpl::convert(processors, iSlice, iRow, clin.getPad(), clin.getTime(), x, y, z); + GPUTPCConvertImpl::convert(processors, iSector, iRow, clin.getPad(), clin.getTime(), x, y, z); auto& GPUrestrict() clout = clusters[indexOffset + k]; clout.x = x; clout.y = y; diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 4604a8cdbdf70..286409ef146b1 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -34,7 +34,7 @@ class GPUdEdx public: // The driver must call clear(), fill clusters row by row outside-in, then run computedEdx() to get the result GPUd() void clear(); - GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t slice, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); + GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); GPUd() void fillSubThreshold(int32_t padRow, const GPUParam& param); GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); @@ -94,7 +94,7 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) mLastROC = roc; } -GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t slice, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) +GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) { if (mCount >= MAX_NCL) { return; @@ -123,10 +123,10 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint const float absRelPad = CAMath::Abs(pad - padPos); const int32_t region = param.tpcGeometry.GetRegion(padRow); z = CAMath::Abs(z); - const float threshold = calibContainer->getZeroSupressionThreshold(slice, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? + const float threshold = calibContainer->getZeroSupressionThreshold(sector, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? const bool useFullGainMap = calibContainer->isUsageOfFullGainMap(); float qTotIn = qtot; - const float fullGainMapGain = calibContainer->getGain(slice, padRow, padPos); + const float fullGainMapGain = calibContainer->getGain(sector, padRow, padPos); if (useFullGainMap) { qmax /= fullGainMapGain; qtot /= fullGainMapGain; @@ -140,7 +140,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qtot /= qTotTopologyCorr; tpc::StackID stack{ - slice, + sector, static_cast(roc)}; const float qMaxResidualCorr = calibContainer->getResidualCorrection(stack, tpc::ChargeType::Max, trackTgl, trackSnp); @@ -148,7 +148,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= qMaxResidualCorr; qtot /= qTotResidualCorr; - const float residualGainMapGain = calibContainer->getResidualGain(slice, padRow, padPos); + const float residualGainMapGain = calibContainer->getResidualGain(sector, padRow, padPos); qmax /= residualGainMapGain; qtot /= residualGainMapGain; @@ -164,13 +164,13 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamdEdx)) { float padlx = param.tpcGeometry.Row2X(padRow); - float padly = param.tpcGeometry.LinearPad2Y(slice, padRow, padPos); + float padly = param.tpcGeometry.LinearPad2Y(sector, padRow, padPos); o2::utils::DebugStreamer::instance()->getStreamer("debug_dedx", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_dedx").data() << "qTot=" << mChargeTot[mCount - 1] << "qMax=" << mChargeMax[mCount - 1] << "region=" << region << "padRow=" << padRow - << "sector=" << slice + << "sector=" << sector << "lx=" << padlx << "ly=" << padly << "tanTheta=" << tanTheta diff --git a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h index 1bbe21a5e7c20..ce1fbba68838a 100644 --- a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h +++ b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h @@ -3952,9 +3952,9 @@ GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR(GLuint count); #define GL_KHR_texture_compression_astc_ldr 1 #endif /* GL_KHR_texture_compression_astc_ldr */ -#ifndef GL_KHR_texture_compression_astc_sliced_3d -#define GL_KHR_texture_compression_astc_sliced_3d 1 -#endif /* GL_KHR_texture_compression_astc_sliced_3d */ +#ifndef GL_KHR_texture_compression_astc_sectord_3d +#define GL_KHR_texture_compression_astc_sectord_3d 1 +#endif /* GL_KHR_texture_compression_astc_sectord_3d */ #ifndef GL_AMD_framebuffer_multisample_advanced #define GL_AMD_framebuffer_multisample_advanced 1 diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index e42a4fa3e4bf1..c2b74489e6250 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -27,7 +27,7 @@ #include "GPUChainTracking.h" #include "GPUQA.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" @@ -106,11 +106,11 @@ void GPUDisplay::calcXYZ(const float* matrix) }*/ } -void GPUDisplay::SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) +void GPUDisplay::SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) { mNCollissions = std::max(mNCollissions, collision + 1); mOverlayTFClusters.resize(mNCollissions); - mOverlayTFClusters[collision][slice] = cluster; + mOverlayTFClusters[collision][sector] = cluster; } void GPUDisplay::mAnimationCloseAngle(float& newangle, float lastAngle) @@ -234,7 +234,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla // Calculate rotation / translation scaling factors float scalefactor = mFrontend->mKeys[mFrontend->KEY_SHIFT] ? 0.2f : 1.0f; float rotatescalefactor = scalefactor * 0.25f; - if (mCfgL.drawSlice != -1) { + if (mCfgL.drawSector != -1) { scalefactor *= 0.2f; } float sqrdist = sqrtf(sqrtf(mViewMatrixP[12] * mViewMatrixP[12] + mViewMatrixP[13] * mViewMatrixP[13] + mViewMatrixP[14] * mViewMatrixP[14]) * GL_SCALE_FACTOR) * 0.8f; @@ -258,7 +258,7 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla mCfgL.pointSize = 2.0f; mCfgL.lineWidth = 1.4f; - mCfgL.drawSlice = -1; + mCfgL.drawSector = -1; mCfgH.xAdd = mCfgH.zAdd = 0; mCfgR.camLookOrigin = mCfgR.camYUp = false; mAngleRollOrigin = -1e9f; @@ -411,8 +411,8 @@ void GPUDisplay::DrawGLScene_cameraAndAnimation(float animateTime, float& mixSla void GPUDisplay::DrawGLScene_drawCommands() { -#define LOOP_SLICE for (int32_t iSlice = (mCfgL.drawSlice == -1 ? 0 : mCfgL.drawRelatedSlices ? (mCfgL.drawSlice % (NSLICES / 4)) : mCfgL.drawSlice); iSlice < NSLICES; iSlice += (mCfgL.drawSlice == -1 ? 1 : mCfgL.drawRelatedSlices ? (NSLICES / 4) : NSLICES)) -#define LOOP_SLICE2 for (int32_t iSlice = (mCfgL.drawSlice == -1 ? 0 : mCfgL.drawRelatedSlices ? (mCfgL.drawSlice % (NSLICES / 4)) : mCfgL.drawSlice) % (NSLICES / 2); iSlice < NSLICES / 2; iSlice += (mCfgL.drawSlice == -1 ? 1 : mCfgL.drawRelatedSlices ? (NSLICES / 4) : NSLICES)) +#define LOOP_SECTOR for (int32_t iSector = (mCfgL.drawSector == -1 ? 0 : mCfgL.drawRelatedSectors ? (mCfgL.drawSector % (NSECTORS / 4)) : mCfgL.drawSector); iSector < NSECTORS; iSector += (mCfgL.drawSector == -1 ? 1 : mCfgL.drawRelatedSectors ? (NSECTORS / 4) : NSECTORS)) +#define LOOP_SECTOR2 for (int32_t iSector = (mCfgL.drawSector == -1 ? 0 : mCfgL.drawRelatedSectors ? (mCfgL.drawSector % (NSECTORS / 4)) : mCfgL.drawSector) % (NSECTORS / 2); iSector < NSECTORS / 2; iSector += (mCfgL.drawSector == -1 ? 1 : mCfgL.drawRelatedSectors ? (NSECTORS / 4) : NSECTORS)) #define LOOP_COLLISION for (int32_t iCol = (mCfgL.showCollision == -1 ? 0 : mCfgL.showCollision); iCol < mNCollissions; iCol += (mCfgL.showCollision == -1 ? 1 : mNCollissions)) #define LOOP_COLLISION_COL(cmd) \ LOOP_COLLISION \ @@ -426,37 +426,37 @@ void GPUDisplay::DrawGLScene_drawCommands() if (mCfgL.drawGrid) { if (mCfgL.drawTPC) { SetColorGrid(); - LOOP_SLICE drawVertices(mGlDLGrid[iSlice], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLGrid[iSector], GPUDisplayBackend::LINES); } if (mCfgL.drawTRD) { SetColorGridTRD(); - LOOP_SLICE2 drawVertices(mGlDLGridTRD[iSlice], GPUDisplayBackend::LINES); + LOOP_SECTOR2 drawVertices(mGlDLGridTRD[iSector], GPUDisplayBackend::LINES); } } if (mCfgL.drawClusters) { if (mCfgL.drawTRD) { SetColorTRD(); mBackend->lineWidthFactor(2); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRDCLUSTER][iCol], GPUDisplayBackend::LINES)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRDCLUSTER][iCol], GPUDisplayBackend::LINES)); if (mCfgL.drawFinal && mCfgL.colorClusters) { SetColorFinal(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRDATTACHED][iCol], GPUDisplayBackend::LINES)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRDATTACHED][iCol], GPUDisplayBackend::LINES)); mBackend->lineWidthFactor(1); } if (mCfgL.drawTOF) { SetColorTOF(); mBackend->pointSizeFactor(2); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tTOFCLUSTER][0], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tTOFCLUSTER][0], GPUDisplayBackend::POINTS)); mBackend->pointSizeFactor(1); } if (mCfgL.drawITS) { SetColorITS(); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tITSCLUSTER][0], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[0][tITSCLUSTER][0], GPUDisplayBackend::POINTS)); } if (mCfgL.drawTPC) { SetColorClusters(); - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tCLUSTER][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tCLUSTER][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawInitLinks) { if (mCfgL.excludeClusters) { @@ -466,7 +466,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorInitLinks(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tINITLINK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tINITLINK][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawLinks) { if (mCfgL.excludeClusters) { @@ -478,7 +478,7 @@ void GPUDisplay::DrawGLScene_drawCommands() } else { SetColorClusters(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tLINK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tLINK][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawSeeds) { if (mCfgL.excludeClusters) { @@ -488,7 +488,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorSeeds(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tSEED][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tSEED][iCol], GPUDisplayBackend::POINTS)); skip1: SetColorClusters(); @@ -500,7 +500,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorTracklets(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tTRACKLET][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tTRACKLET][iCol], GPUDisplayBackend::POINTS)); if (mCfgL.drawTracks) { if (mCfgL.excludeClusters) { @@ -510,7 +510,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorTracks(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tSLICETRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tSECTORTRACK][iCol], GPUDisplayBackend::POINTS)); skip2:; if (mCfgL.drawExtrapolatedTracks) { @@ -523,7 +523,7 @@ void GPUDisplay::DrawGLScene_drawCommands() } else { SetColorClusters(); } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tEXTRAPOLATEDTRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tEXTRAPOLATEDTRACK][iCol], GPUDisplayBackend::POINTS)); SetColorClusters(); if (mCfgL.drawFinal && mCfgL.propagateTracks < 2) { @@ -534,7 +534,7 @@ void GPUDisplay::DrawGLScene_drawCommands() SetColorFinal(); } } - LOOP_SLICE LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSlice][tFINALTRACK][iCol], GPUDisplayBackend::POINTS)); + LOOP_SECTOR LOOP_COLLISION_COL(drawVertices(mGlDLPoints[iSector][tFINALTRACK][iCol], GPUDisplayBackend::POINTS)); skip3:; } } @@ -543,47 +543,47 @@ void GPUDisplay::DrawGLScene_drawCommands() if (mCfgL.drawTPC) { if (mCfgL.drawInitLinks) { SetColorInitLinks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tINITLINK], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tINITLINK], GPUDisplayBackend::LINES); } if (mCfgL.drawLinks) { SetColorLinks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tLINK], GPUDisplayBackend::LINES); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tLINK], GPUDisplayBackend::LINES); } if (mCfgL.drawSeeds) { SetColorSeeds(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tSEED], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tSEED], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawTracklets) { SetColorTracklets(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tTRACKLET], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tTRACKLET], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawTracks) { SetColorTracks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tSLICETRACK], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tSECTORTRACK], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.drawExtrapolatedTracks) { SetColorExtrapolatedTracks(); - LOOP_SLICE drawVertices(mGlDLLines[iSlice][tEXTRAPOLATEDTRACK], GPUDisplayBackend::LINE_STRIP); + LOOP_SECTOR drawVertices(mGlDLLines[iSector][tEXTRAPOLATEDTRACK], GPUDisplayBackend::LINE_STRIP); } } if (mCfgL.drawFinal) { SetColorFinal(); - LOOP_SLICE LOOP_COLLISION + LOOP_SECTOR LOOP_COLLISION { if (mCfgL.colorCollisions) { SetCollisionColor(iCol); } if (mCfgL.propagateTracks < 2) { - drawVertices(mGlDLFinal[iSlice][iCol][0], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][0], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks > 0 && mCfgL.propagateTracks < 3) { - drawVertices(mGlDLFinal[iSlice][iCol][1], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][1], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks == 2) { - drawVertices(mGlDLFinal[iSlice][iCol][2], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][2], GPUDisplayBackend::LINE_STRIP); } if (mCfgL.propagateTracks == 3) { - drawVertices(mGlDLFinal[iSlice][iCol][3], GPUDisplayBackend::LINE_STRIP); + drawVertices(mGlDLFinal[iSector][iCol][3], GPUDisplayBackend::LINE_STRIP); } } if (mCfgH.drawTracksAndFilter ? (mCfgH.drawTPCTracks || mCfgH.drawTRDTracks || mCfgH.drawTOFTracks) : mCfgH.drawITSTracks) { @@ -595,7 +595,7 @@ void GPUDisplay::DrawGLScene_drawCommands() mBackend->pointSizeFactor(3); } SetColorMarked(); - LOOP_SLICE LOOP_COLLISION drawVertices(mGlDLPoints[iSlice][tMARKED][iCol], GPUDisplayBackend::POINTS); + LOOP_SECTOR LOOP_COLLISION drawVertices(mGlDLPoints[iSector][tMARKED][iCol], GPUDisplayBackend::POINTS); if (mCfgH.markFakeClusters) { mBackend->pointSizeFactor(1); } @@ -665,9 +665,9 @@ void GPUDisplay::DrawGLScene_internal(float animateTime, bool renderToMixBuffer) char info[1024]; float fps = (double)mFramesDoneFPS / fpstime; snprintf(info, 1024, - "FPS: %6.2f (Slice: %d, 1:Clusters %d, 2:Prelinks %d, 3:Links %d, 4:Seeds %d, 5:Tracklets %d, 6:Tracks %d, 7:GTracks %d, 8:Merger %d) (%d frames, %d draw calls) " + "FPS: %6.2f (Sector: %d, 1:Clusters %d, 2:Prelinks %d, 3:Links %d, 4:Seeds %d, 5:Tracklets %d, 6:Tracks %d, 7:GTracks %d, 8:Merger %d) (%d frames, %d draw calls) " "(X %1.2f Y %1.2f Z %1.2f / R %1.2f Phi %1.1f Theta %1.1f) / Yaw %1.1f Pitch %1.1f Roll %1.1f)", - fps, mCfgL.drawSlice, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawExtrapolatedTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), + fps, mCfgL.drawSector, mCfgL.drawClusters, mCfgL.drawInitLinks, mCfgL.drawLinks, mCfgL.drawSeeds, mCfgL.drawTracklets, mCfgL.drawTracks, mCfgL.drawExtrapolatedTracks, mCfgL.drawFinal, mFramesDone, mNDrawCalls, mXYZ[0], mXYZ[1], mXYZ[2], mRPhiTheta[0], mRPhiTheta[1] * 180 / CAMath::Pi(), mRPhiTheta[2] * 180 / CAMath::Pi(), mAngle[1] * 180 / CAMath::Pi(), mAngle[0] * 180 / CAMath::Pi(), mAngle[2] * 180 / CAMath::Pi()); if (fpstime > 1.) { if (mPrintInfoText & 2) { diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 1c4b751bbf85b..8f5808eb41261 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -47,7 +47,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t StartDisplay() override; void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) override; void WaitForNextEvent() override; - void SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) override; + void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) override; void UpdateCalib(const GPUCalibObjectsConst* calib) override { mCalib = calib; } void UpdateParam(const GPUParam* param) override { mParam = param; } @@ -79,7 +79,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t& drawTextFontSize() { return mDrawTextFontSize; } private: - static constexpr int32_t NSLICES = GPUChainTracking::NSLICES; + static constexpr int32_t NSECTORS = GPUChainTracking::NSECTORS; static constexpr float GL_SCALE_FACTOR = (1.f / 100.f); static constexpr const int32_t N_POINTS_TYPE = 15; @@ -95,7 +95,7 @@ class GPUDisplay : public GPUDisplayInterface tLINK = 2, tSEED = 3, tTRACKLET = 4, - tSLICETRACK = 5, + tSECTORTRACK = 5, tEXTRAPOLATEDTRACK = 6, tFINALTRACK = 7, tMARKED = 8, @@ -153,11 +153,11 @@ class GPUDisplay : public GPUDisplayInterface void updateOptions(); void disableUnsupportedOptions(); int32_t buildTrackFilter(); - const GPUTPCTracker& sliceTracker(int32_t iSlice); + const GPUTPCTracker& sectorTracker(int32_t iSector); const GPUTRDGeometry* trdGeometry(); const GPUTrackingInOutPointers* mIOPtrs = nullptr; void insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last); - void insertVertexList(int32_t iSlice, size_t first, size_t last); + void insertVertexList(int32_t iSector, size_t first, size_t last); template void SetInfo(Args... args) { @@ -195,19 +195,19 @@ class GPUDisplay : public GPUDisplayInterface void SetColorMarked(); void SetCollisionColor(int32_t col); void updateConfig(); - void drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT); - vboList DrawClusters(int32_t iSlice, int32_t select, uint32_t iCol); - vboList DrawSpacePointsTRD(int32_t iSlice, int32_t select, int32_t iCol); - vboList DrawSpacePointsTOF(int32_t iSlice, int32_t select, int32_t iCol); - vboList DrawSpacePointsITS(int32_t iSlice, int32_t select, int32_t iCol); + void drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit = TRACK_TYPE_ID_LIMIT); + vboList DrawClusters(int32_t iSector, int32_t select, uint32_t iCol); + vboList DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol); + vboList DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol); + vboList DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol); vboList DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown = false); vboList DrawSeeds(const GPUTPCTracker& tracker); vboList DrawTracklets(const GPUTPCTracker& tracker); vboList DrawTracks(const GPUTPCTracker& tracker, int32_t global); - void DrawTrackITS(int32_t trackId, int32_t iSlice); + void DrawTrackITS(int32_t trackId, int32_t iSector); GPUDisplay::vboList DrawFinalITS(); template - void DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); + void DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer); vboList DrawGrid(const GPUTPCTracker& tracker); vboList DrawGridTRD(int32_t sector); void DoScreenshot(const char* filename, std::vector& pixels, float animateTime = -1.f); @@ -252,9 +252,9 @@ class GPUDisplay : public GPUDisplayInterface vecpod> mOverlayTFClusters; int32_t mNCollissions = 1; - vecpod mVertexBuffer[NSLICES]; - vecpod mVertexBufferStart[NSLICES]; - vecpod mVertexBufferCount[NSLICES]; + vecpod mVertexBuffer[NSECTORS]; + vecpod mVertexBufferStart[NSECTORS]; + vecpod mVertexBufferCount[NSECTORS]; std::unique_ptr mGlobalPosPtr; std::unique_ptr mGlobalPosPtrTRD; @@ -303,18 +303,18 @@ class GPUDisplay : public GPUDisplayInterface HighResTimer mInfoText2Timer, mInfoHelpTimer; std::vector mThreadBuffers; - std::vector, 2>, NSLICES>>> mThreadTracks; + std::vector, 2>, NSECTORS>>> mThreadTracks; volatile int32_t mInitResult = 0; float mFPSScale = 1, mFPSScaleadjust = 0; int32_t mFramesDone = 0, mFramesDoneFPS = 0; HighResTimer mTimerFPS, mTimerDisplay, mTimerDraw; - vboList mGlDLLines[NSLICES][N_LINES_TYPE]; - vecpod> mGlDLFinal[NSLICES]; + vboList mGlDLLines[NSECTORS][N_LINES_TYPE]; + vecpod> mGlDLFinal[NSECTORS]; vboList mGlDLFinalITS; - vecpod mGlDLPoints[NSLICES][N_POINTS_TYPE]; - vboList mGlDLGrid[NSLICES]; - vboList mGlDLGridTRD[NSLICES / 2]; + vecpod mGlDLPoints[NSECTORS][N_POINTS_TYPE]; + vboList mGlDLGrid[NSECTORS]; + vboList mGlDLGridTRD[NSECTORS / 2]; bool mRequestScreenshot = false; std::string mScreenshotFile; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 44ae35068cac3..0f2aabffbc5af 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -33,7 +33,7 @@ class GPUDisplayInterface virtual int32_t StartDisplay() = 0; virtual void ShowNextEvent(const GPUTrackingInOutPointers* ptrs = nullptr) = 0; virtual void WaitForNextEvent() = 0; - virtual void SetCollisionFirstCluster(uint32_t collision, int32_t slice, int32_t cluster) = 0; + virtual void SetCollisionFirstCluster(uint32_t collision, int32_t sector, int32_t cluster) = 0; virtual void UpdateCalib(const GPUCalibObjectsConst* calib) = 0; virtual void UpdateParam(const GPUParam* param) = 0; static GPUDisplayInterface* getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr); diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index 508c9d0b2e4ff..ded8803801fb7 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -138,12 +138,12 @@ std::vector GPUDisplayBackend::getPixels() void GPUDisplayBackend::fillIndirectCmdBuffer() { mCmdBuffer.clear(); - mIndirectSliceOffset.resize(GPUCA_NSLICES); + mIndirectSectorOffset.resize(GPUCA_NSECTORS); // TODO: Check if this can be parallelized - for (int32_t iSlice = 0; iSlice < GPUCA_NSLICES; iSlice++) { - mIndirectSliceOffset[iSlice] = mCmdBuffer.size(); - for (uint32_t k = 0; k < mDisplay->vertexBufferStart()[iSlice].size(); k++) { - mCmdBuffer.emplace_back(mDisplay->vertexBufferCount()[iSlice][k], 1, mDisplay->vertexBufferStart()[iSlice][k], 0); + for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + mIndirectSectorOffset[iSector] = mCmdBuffer.size(); + for (uint32_t k = 0; k < mDisplay->vertexBufferStart()[iSector].size(); k++) { + mCmdBuffer.emplace_back(mDisplay->vertexBufferCount()[iSector][k], 1, mDisplay->vertexBufferStart()[iSector][k], 0); } } } diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h index c2c23f659e418..dc56dedf587ed 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.h +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.h @@ -113,7 +113,7 @@ class GPUDisplayBackend bool smoothFont(); GPUDisplay* mDisplay = nullptr; - std::vector mIndirectSliceOffset; + std::vector mIndirectSectorOffset; vecpod mCmdBuffer; bool mFreetypeInitialized = false; bool mFrontendCompatTextDraw = false; diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx index 10acbea3a2586..3ee3384c8e118 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx @@ -186,7 +186,7 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType GLenum t = types[tt]; auto first = std::get<0>(v); auto count = std::get<1>(v); - auto iSlice = std::get<2>(v); + auto iSector = std::get<2>(v); if (count == 0) { return 0; } @@ -195,7 +195,7 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType if (mDisplay->cfgR().openGLCore) { CHKERR(glBindVertexArray(mVertexArray)); } - CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[iSlice])); + CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[iSector])); #ifndef GPUCA_DISPLAY_OPENGL_CORE if (!mDisplay->cfgR().openGLCore) { CHKERR(glVertexPointer(3, GL_FLOAT, 0, nullptr)); @@ -208,14 +208,14 @@ uint32_t GPUDisplayBackendOpenGL::drawVertices(const vboList& v, const drawType } if (mDisplay->cfgR().useGLIndirectDraw) { - CHKERR(glMultiDrawArraysIndirect(t, (void*)(size_t)((mIndirectSliceOffset[iSlice] + first) * sizeof(DrawArraysIndirectCommand)), count, 0)); + CHKERR(glMultiDrawArraysIndirect(t, (void*)(size_t)((mIndirectSectorOffset[iSector] + first) * sizeof(DrawArraysIndirectCommand)), count, 0)); } else if (OPENGL_EMULATE_MULTI_DRAW) { for (uint32_t k = 0; k < count; k++) { - CHKERR(glDrawArrays(t, mDisplay->vertexBufferStart()[iSlice][first + k], mDisplay->vertexBufferCount()[iSlice][first + k])); + CHKERR(glDrawArrays(t, mDisplay->vertexBufferStart()[iSector][first + k], mDisplay->vertexBufferCount()[iSector][first + k])); } } else { - static_assert(sizeof(GLsizei) == sizeof(*mDisplay->vertexBufferCount()[iSlice].data()), "Invalid counter size does not match GLsizei"); - CHKERR(glMultiDrawArrays(t, mDisplay->vertexBufferStart()[iSlice].data() + first, ((const GLsizei*)mDisplay->vertexBufferCount()[iSlice].data()) + first, count)); + static_assert(sizeof(GLsizei) == sizeof(*mDisplay->vertexBufferCount()[iSector].data()), "Invalid counter size does not match GLsizei"); + CHKERR(glMultiDrawArrays(t, mDisplay->vertexBufferStart()[iSector].data() + first, ((const GLsizei*)mDisplay->vertexBufferCount()[iSector].data()) + first, count)); } return count; } @@ -315,7 +315,7 @@ int32_t GPUDisplayBackendOpenGL::InitBackendA() GPUError("Unsupported OpenGL runtime %d.%d < %d.%d", glVersion[0], glVersion[1], GPUDisplayFrontend::GL_MIN_VERSION_MAJOR, GPUDisplayFrontend::GL_MIN_VERSION_MINOR); return (1); } - mVBOId.resize(GPUCA_NSLICES); + mVBOId.resize(GPUCA_NSECTORS); CHKERR(glCreateBuffers(mVBOId.size(), mVBOId.data())); CHKERR(glBindBuffer(GL_ARRAY_BUFFER, mVBOId[0])); CHKERR(glGenBuffers(1, &mIndirectId)); @@ -457,7 +457,7 @@ void GPUDisplayBackendOpenGL::loadDataToGPU(size_t totalVertizes) { // TODO: Check if this can be parallelized if (mDisplay->useMultiVBO()) { - for (int32_t i = 0; i < GPUCA_NSLICES; i++) { + for (int32_t i = 0; i < GPUCA_NSECTORS; i++) { CHKERR(glNamedBufferData(mVBOId[i], mDisplay->vertexBuffer()[i].size() * sizeof(mDisplay->vertexBuffer()[i][0]), mDisplay->vertexBuffer()[i].data(), GL_STATIC_DRAW)); } } else { diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 6f0ebb9baf945..2324c194d04b9 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -1469,7 +1469,7 @@ uint32_t GPUDisplayBackendVulkan::drawVertices(const vboList& v, const drawType { auto first = std::get<0>(v); auto count = std::get<1>(v); - auto iSlice = std::get<2>(v); + auto iSector = std::get<2>(v); if (count == 0) { return 0; } @@ -1482,10 +1482,10 @@ uint32_t GPUDisplayBackendVulkan::drawVertices(const vboList& v, const drawType mCurrentCommandBufferLastPipeline = tt; } if (mDisplay->cfgR().useGLIndirectDraw) { - mCurrentCommandBuffer.drawIndirect(mIndirectCommandBuffer.buffer, (mIndirectSliceOffset[iSlice] + first) * sizeof(DrawArraysIndirectCommand), count, sizeof(DrawArraysIndirectCommand)); + mCurrentCommandBuffer.drawIndirect(mIndirectCommandBuffer.buffer, (mIndirectSectorOffset[iSector] + first) * sizeof(DrawArraysIndirectCommand), count, sizeof(DrawArraysIndirectCommand)); } else { for (uint32_t k = 0; k < count; k++) { - mCurrentCommandBuffer.draw(mDisplay->vertexBufferCount()[iSlice][first + k], 1, mDisplay->vertexBufferStart()[iSlice][first + k], 0); + mCurrentCommandBuffer.draw(mDisplay->vertexBufferCount()[iSector][first + k], 1, mDisplay->vertexBufferStart()[iSector][first + k], 0); } } diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index acf5566489f49..32ff6c73e110c 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -20,8 +20,8 @@ const char* HelpText[] = { "[ESC] Quit", "[n] Next event", "[r] Reset Display Settings", - "[l] / [k] / [J] Draw single slice (next / previous slice), draw related slices (same plane in phi)", - "[;] / [:] Show splitting of TPC in slices by extruding volume, [:] resets", + "[l] / [k] / [J] Draw single sector (next / previous sector), draw related sectors (same plane in phi)", + "[;] / [:] Show splitting of TPC in sectors by extruding volume, [:] resets", "[#] Invert colors", "[y] / [Y] / [X] / [M] Start Animation, Add / remove Animation point, Reset Points, Cycle animation camera mode (resets)", "[>] / [<] Toggle config interpolation during Animation / change Animation interval (via movement)", @@ -110,27 +110,27 @@ void GPUDisplay::HandleKey(uint8_t key) } else if (key == mFrontend->KEY_ALT) { mFrontend->mKeys[mFrontend->KEY_CTRL] = false; // Release CTRL with alt, to avoid orienting along y automatically! } else if (key == 'l') { - if (mCfgL.drawSlice >= (mCfgL.drawRelatedSlices ? (NSLICES / 4 - 1) : (NSLICES - 1))) { - mCfgL.drawSlice = -1; - SetInfo("Showing all slices", 1); + if (mCfgL.drawSector >= (mCfgL.drawRelatedSectors ? (NSECTORS / 4 - 1) : (NSECTORS - 1))) { + mCfgL.drawSector = -1; + SetInfo("Showing all sectors", 1); } else { - mCfgL.drawSlice++; - SetInfo("Showing slice %d", mCfgL.drawSlice); + mCfgL.drawSector++; + SetInfo("Showing sector %d", mCfgL.drawSector); } } else if (key == 'k') { - if (mCfgL.drawSlice <= -1) { - mCfgL.drawSlice = mCfgL.drawRelatedSlices ? (NSLICES / 4 - 1) : (NSLICES - 1); + if (mCfgL.drawSector <= -1) { + mCfgL.drawSector = mCfgL.drawRelatedSectors ? (NSECTORS / 4 - 1) : (NSECTORS - 1); } else { - mCfgL.drawSlice--; + mCfgL.drawSector--; } - if (mCfgL.drawSlice == -1) { - SetInfo("Showing all slices", 1); + if (mCfgL.drawSector == -1) { + SetInfo("Showing all sectors", 1); } else { - SetInfo("Showing slice %d", mCfgL.drawSlice); + SetInfo("Showing sector %d", mCfgL.drawSector); } } else if (key == 'J') { - mCfgL.drawRelatedSlices ^= 1; - SetInfo("Drawing of related slices %s", mCfgL.drawRelatedSlices ? "enabled" : "disabled"); + mCfgL.drawRelatedSectors ^= 1; + SetInfo("Drawing of related sectors %s", mCfgL.drawRelatedSectors ? "enabled" : "disabled"); } else if (key == 'L') { if (mCfgL.showCollision >= mNCollissions - 1) { mCfgL.showCollision = -1; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 8c42cfa46abb9..188df5467e83d 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -40,10 +40,10 @@ using namespace o2::gpu; -#define GET_CID(slice, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[slice][i].id : (mIOPtrs->clustersNative->clusterOffset[slice][0] + i)) +#define GET_CID(sector, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[sector][i].id : (mIOPtrs->clustersNative->clusterOffset[sector][0] + i)) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } -const GPUTPCTracker& GPUDisplay::sliceTracker(int32_t iSlice) { return mChain->GetTPCSliceTrackers()[iSlice]; } +const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetTPCSectorTrackers()[iSector]; } inline void GPUDisplay::insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last) { @@ -53,15 +53,15 @@ inline void GPUDisplay::insertVertexList(std::pair*, vecpodemplace_back(first); vBuf.second->emplace_back(last - first); } -inline void GPUDisplay::insertVertexList(int32_t iSlice, size_t first, size_t last) +inline void GPUDisplay::insertVertexList(int32_t iSector, size_t first, size_t last) { - std::pair*, vecpod*> vBuf(mVertexBufferStart + iSlice, mVertexBufferCount + iSlice); + std::pair*, vecpod*> vBuf(mVertexBufferStart + iSector, mVertexBufferCount + iSector); insertVertexList(vBuf, first, last); } -inline void GPUDisplay::drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t id, int32_t id_limit) +inline void GPUDisplay::drawPointLinestrip(int32_t iSector, int32_t cid, int32_t id, int32_t id_limit) { - mVertexBuffer[iSlice].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); float curVal; while ((curVal = mGlobalPos[cid].w) < id_limit) { if (GPUCommonMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { @@ -71,66 +71,66 @@ inline void GPUDisplay::drawPointLinestrip(int32_t iSlice, int32_t cid, int32_t } } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsTRD(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsTRD(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); if (iCol == 0) { for (uint32_t i = 0; i < mIOPtrs->nTRDTracklets; i++) { int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); - bool draw = iSlice == iSec && mGlobalPosTRD[i].w == select; + bool draw = iSector == iSec && mGlobalPosTRD[i].w == select; if (draw) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD[i].x, mGlobalPosTRD[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[i].z); - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD2[i].x, mGlobalPosTRD2[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD[i].x, mGlobalPosTRD[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD2[i].x, mGlobalPosTRD2[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[i].z); } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsTOF(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsTOF(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); - if (iCol == 0 && iSlice == 0) { + if (iCol == 0 && iSector == 0) { for (uint32_t i = 0; i < mIOPtrs->nTOFClusters; i++) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosTOF[i].x, mGlobalPosTOF[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTOF[i].x, mGlobalPosTOF[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[i].z); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSlice, int32_t select, int32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawSpacePointsITS(int32_t iSector, int32_t select, int32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); - if (iCol == 0 && iSlice == 0 && mIOPtrs->itsClusters) { + if (iCol == 0 && iSector == 0 && mIOPtrs->itsClusters) { for (uint32_t i = 0; i < mIOPtrs->nItsClusters; i++) { - mVertexBuffer[iSlice].emplace_back(mGlobalPosITS[i].x, mGlobalPosITS[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[i].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosITS[i].x, mGlobalPosITS[i].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[i].z); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uint32_t iCol) +GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSector, int32_t select, uint32_t iCol) { - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); if (mOverlayTFClusters.size() > 0 || iCol == 0 || mNCollissions) { - const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSlice] : 0; - const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSlice] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] : 0); + const int32_t firstCluster = (mOverlayTFClusters.size() > 1 && iCol > 0) ? mOverlayTFClusters[iCol - 1][iSector] : 0; + const int32_t lastCluster = (mOverlayTFClusters.size() > 1 && iCol + 1 < mOverlayTFClusters.size()) ? mOverlayTFClusters[iCol][iSector] : (mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); const bool checkClusterCollision = mQA && mNCollissions && mOverlayTFClusters.size() == 0 && mIOPtrs->clustersNative && mIOPtrs->clustersNative->clustersMCTruth; - for (int32_t cidInSlice = firstCluster; cidInSlice < lastCluster; cidInSlice++) { - const int32_t cid = GET_CID(iSlice, cidInSlice); + for (int32_t cidInSector = firstCluster; cidInSector < lastCluster; cidInSector++) { + const int32_t cid = GET_CID(iSector, cidInSector); #ifdef GPUCA_TPC_GEOMETRY_O2 if (checkClusterCollision) { const auto& labels = mIOPtrs->clustersNative->clustersMCTruth->getLabels(cid); @@ -170,7 +170,7 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uin } else if (mCfgH.markClusters) { int16_t flags; if (mParam->par.earlyTpcTransform) { - flags = mIOPtrs->clusterData[iSlice][cidInSlice].flags; + flags = mIOPtrs->clusterData[iSector][cidInSector].flags; } else { flags = mIOPtrs->clustersNative->clustersLinear[cid].getFlags(); } @@ -181,22 +181,22 @@ GPUDisplay::vboList GPUDisplay::DrawClusters(int32_t iSlice, int32_t select, uin draw = (select == tMARKED) ? (fake) : (draw && !fake); } if (draw) { - mVertexBuffer[iSlice].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t id, bool dodown) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { const GPUTPCRow& row = tracker.Data().Row(i); @@ -204,10 +204,10 @@ GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t const GPUTPCRow& rowUp = tracker.Data().Row(i + 2); for (int32_t j = 0; j < row.NHits(); j++) { if (tracker.Data().HitLinkUpData(row, j) != CALINK_INVAL) { - const int32_t cid1 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, j)); - const int32_t cid2 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(rowUp, tracker.Data().HitLinkUpData(row, j))); - drawPointLinestrip(iSlice, cid1, id); - drawPointLinestrip(iSlice, cid2, id); + const int32_t cid1 = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, j)); + const int32_t cid2 = GET_CID(iSector, tracker.Data().ClusterDataIndex(rowUp, tracker.Data().HitLinkUpData(row, j))); + drawPointLinestrip(iSector, cid1, id); + drawPointLinestrip(iSector, cid2, id); } } } @@ -216,114 +216,114 @@ GPUDisplay::vboList GPUDisplay::DrawLinks(const GPUTPCTracker& tracker, int32_t const GPUTPCRow& rowDown = tracker.Data().Row(i - 2); for (int32_t j = 0; j < row.NHits(); j++) { if (tracker.Data().HitLinkDownData(row, j) != CALINK_INVAL) { - const int32_t cid1 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, j)); - const int32_t cid2 = GET_CID(iSlice, tracker.Data().ClusterDataIndex(rowDown, tracker.Data().HitLinkDownData(row, j))); - drawPointLinestrip(iSlice, cid1, id); - drawPointLinestrip(iSlice, cid2, id); + const int32_t cid1 = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, j)); + const int32_t cid2 = GET_CID(iSector, tracker.Data().ClusterDataIndex(rowDown, tracker.Data().HitLinkDownData(row, j))); + drawPointLinestrip(iSector, cid1, id); + drawPointLinestrip(iSector, cid2, id); } } } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawSeeds(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < *tracker.NStartHits(); i++) { const GPUTPCHitId& hit = tracker.TrackletStartHit(i); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); int32_t ir = hit.RowIndex(); calink ih = hit.HitIndex(); do { const GPUTPCRow& row = tracker.Data().Row(ir); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, ih)); - drawPointLinestrip(iSlice, cid, tSEED); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, ih)); + drawPointLinestrip(iSector, cid, tSEED); ir += 2; ih = tracker.Data().HitLinkUpData(row, ih); } while (ih != CALINK_INVAL); - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawTracklets(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < *tracker.NTracklets(); i++) { const GPUTPCTracklet& tracklet = tracker.Tracklet(i); - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); float4 oldpos; for (int32_t j = tracklet.FirstRow(); j <= tracklet.LastRow(); j++) { const calink rowHit = tracker.TrackletRowHits()[tracklet.FirstHit() + (j - tracklet.FirstRow())]; if (rowHit != CALINK_INVAL && rowHit != CALINK_DEAD_CHANNEL) { const GPUTPCRow& row = tracker.Data().Row(j); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, rowHit)); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, rowHit)); oldpos = mGlobalPos[cid]; - drawPointLinestrip(iSlice, cid, tTRACKLET); + drawPointLinestrip(iSector, cid, tTRACKLET); } } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawTracks(const GPUTPCTracker& tracker, int32_t global) { - int32_t iSlice = tracker.ISlice(); + int32_t iSector = tracker.ISector(); if (mCfgH.clustersOnly) { - return (vboList(0, 0, iSlice)); + return (vboList(0, 0, iSector)); } - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = (global ? tracker.CommonMemory()->nLocalTracks : 0); i < (global ? *tracker.NTracks() : tracker.CommonMemory()->nLocalTracks); i++) { GPUTPCTrack& track = tracker.Tracks()[i]; - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t j = 0; j < track.NHits(); j++) { const GPUTPCHitId& hit = tracker.TrackHits()[track.FirstHitID() + j]; const GPUTPCRow& row = tracker.Data().Row(hit.RowIndex()); - const int32_t cid = GET_CID(iSlice, tracker.Data().ClusterDataIndex(row, hit.HitIndex())); - drawPointLinestrip(iSlice, cid, tSLICETRACK + global); + const int32_t cid = GET_CID(iSector, tracker.Data().ClusterDataIndex(row, hit.HitIndex())); + drawPointLinestrip(iSector, cid, tSECTORTRACK + global); } - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } -void GPUDisplay::DrawTrackITS(int32_t trackId, int32_t iSlice) +void GPUDisplay::DrawTrackITS(int32_t trackId, int32_t iSector) { const auto& trk = mIOPtrs->itsTracks[trackId]; for (int32_t k = 0; k < trk.getNClusters(); k++) { int32_t cid = mIOPtrs->itsTrackClusIdx[trk.getFirstClusterEntry() + k]; - mVertexBuffer[iSlice].emplace_back(mGlobalPosITS[cid].x, mGlobalPosITS[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosITS[cid].x, mGlobalPosITS[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosITS[cid].z); mGlobalPosITS[cid].w = tITSATTACHED; } } GPUDisplay::vboList GPUDisplay::DrawFinalITS() { - const int32_t iSlice = 0; - size_t startCount = mVertexBufferStart[iSlice].size(); + const int32_t iSector = 0; + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t i = 0; i < mIOPtrs->nItsTracks; i++) { if (mITSStandaloneTracks[i]) { - size_t startCountInner = mVertexBuffer[iSlice].size(); - DrawTrackITS(i, iSlice); - insertVertexList(iSlice, startCountInner, mVertexBuffer[iSlice].size()); + size_t startCountInner = mVertexBuffer[iSector].size(); + DrawTrackITS(i, iSector); + insertVertexList(iSector, startCountInner, mVertexBuffer[iSector].size()); } } - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } template -void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) +void GPUDisplay::DrawFinal(int32_t iSector, int32_t /*iCol*/, const GPUTPCGMPropagator* prop, std::array, 2>& trackList, threadVertexBuffer& threadBuffer) { auto& vBuf = threadBuffer.vBuf; auto& buffer = threadBuffer.buffer; @@ -354,7 +354,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa throw std::runtime_error("invalid type"); } - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); bool drawing = false; if constexpr (std::is_same_v) { @@ -375,7 +375,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if (mIOPtrs->tpcLinkTOF && mIOPtrs->tpcLinkTOF[i] != -1 && mIOPtrs->nTOFClusters) { int32_t cid = mIOPtrs->tpcLinkTOF[i]; drawing = true; - mVertexBuffer[iSlice].emplace_back(mGlobalPosTOF[cid].x, mGlobalPosTOF[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTOF[cid].x, mGlobalPosTOF[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTOF[cid].z); mGlobalPosTOF[cid].w = tTOFATTACHED; } } @@ -388,8 +388,8 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa continue; } drawing = true; - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD2[cid].x, mGlobalPosTRD2[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[cid].z); - mVertexBuffer[iSlice].emplace_back(mGlobalPosTRD[cid].x, mGlobalPosTRD[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD2[cid].x, mGlobalPosTRD2[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD2[cid].z); + mVertexBuffer[iSector].emplace_back(mGlobalPosTRD[cid].x, mGlobalPosTRD[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPosTRD[cid].z); mGlobalPosTRD[cid].w = tTRDATTACHED; } }; @@ -429,21 +429,21 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa int32_t w = mGlobalPos[cid].w; if (drawing) { if (mCfgH.splitCETracks && lastSide != (mGlobalPos[cid].z < 0)) { - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); drawing = false; lastCluster = -1; } else { - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } } if (w == separateExtrapolatedTracksLimit) { if (drawing) { - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); } drawing = false; } else { if (!drawing) { - startCountInner = mVertexBuffer[iSlice].size(); + startCountInner = mVertexBuffer[iSector].size(); if (lastCluster != -1 && (!mCfgH.splitCETracks || lastSide == (mGlobalPos[cid].z < 0))) { int32_t lastcid; if constexpr (std::is_same_v) { @@ -451,9 +451,9 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } else { lastcid = &track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative) - mIOPtrs->clustersNative->clustersLinear; } - drawPointLinestrip(iSlice, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, lastcid, tFINALTRACK, separateExtrapolatedTracksLimit); } - drawPointLinestrip(iSlice, cid, tFINALTRACK, separateExtrapolatedTracksLimit); + drawPointLinestrip(iSector, cid, tFINALTRACK, separateExtrapolatedTracksLimit); } drawing = true; } @@ -464,10 +464,10 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa // Print ITS part of track if constexpr (std::is_same_v) { if (mIOPtrs->tpcLinkITS && mIOPtrs->tpcLinkITS[i] != -1 && mIOPtrs->nItsTracks && mIOPtrs->nItsClusters) { - DrawTrackITS(mIOPtrs->tpcLinkITS[i], iSlice); + DrawTrackITS(mIOPtrs->tpcLinkITS[i], iSector); } } - insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[0], startCountInner, mVertexBuffer[iSector].size()); break; } @@ -491,7 +491,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } } - size_t startCountInner = mVertexBuffer[iSlice].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t inFlyDirection = 0; inFlyDirection < 2; inFlyDirection++) { GPUTPCGMPhysicalTrackModel trkParam; float ZOffset = 0; @@ -503,7 +503,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa } if constexpr (std::is_same_v) { trkParam.Set(track->GetParam()); - alphaOrg = mParam->Alpha(iSlice); + alphaOrg = mParam->Alpha(iSector); } else { GPUTPCGMTrackParam t; convertTrackParam(t, *track); @@ -521,8 +521,8 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if constexpr (std::is_same_v) { auto cl = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + lastCluster]; const auto& cln = mIOPtrs->clustersNative->clustersLinear[cl.num]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.slice, cl.row, cln.getPad(), cln.getTime(), x, y, z); - ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSlice, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, cl.sector, cl.row, cln.getPad(), cln.getTime(), x, y, z); + ZOffset = mCalib->fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(iSector, track->GetParam().GetTZOffset(), mParam->continuousMaxTimeBin); } else { uint8_t sector, row; auto cln = track->getCluster(mIOPtrs->outputClusRefsTPCO2, lastCluster, *mIOPtrs->clustersNative, sector, row); @@ -539,7 +539,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa break; } - alphaOrg = mParam->Alpha(iSlice); + alphaOrg = mParam->Alpha(iSector); float c = cosf(alphaOrg); float s = sinf(alphaOrg); float mclocal[4]; @@ -577,7 +577,7 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa break; } float alpha = alphaOrg; - vecpod& useBuffer = iMC && inFlyDirection == 0 ? buffer : mVertexBuffer[iSlice]; + vecpod& useBuffer = iMC && inFlyDirection == 0 ? buffer : mVertexBuffer[iSector]; int32_t nPoints = 0; while (nPoints++ < 5000) { @@ -623,24 +623,24 @@ void GPUDisplay::DrawFinal(int32_t iSlice, int32_t /*iCol*/, const GPUTPCGMPropa if (inFlyDirection == 0) { if (iMC) { for (int32_t k = (int32_t)buffer.size() - 1; k >= 0; k--) { - mVertexBuffer[iSlice].emplace_back(buffer[k]); + mVertexBuffer[iSector].emplace_back(buffer[k]); } } else { - insertVertexList(vBuf[1], startCountInner, mVertexBuffer[iSlice].size()); - startCountInner = mVertexBuffer[iSlice].size(); + insertVertexList(vBuf[1], startCountInner, mVertexBuffer[iSector].size()); + startCountInner = mVertexBuffer[iSector].size(); } } } - insertVertexList(vBuf[iMC ? 3 : 2], startCountInner, mVertexBuffer[iSlice].size()); + insertVertexList(vBuf[iMC ? 3 : 2], startCountInner, mVertexBuffer[iSector].size()); } } } GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) { - int32_t iSlice = tracker.ISlice(); - size_t startCount = mVertexBufferStart[iSlice].size(); - size_t startCountInner = mVertexBuffer[iSlice].size(); + int32_t iSector = tracker.ISector(); + size_t startCount = mVertexBufferStart[iSector].size(); + size_t startCountInner = mVertexBuffer[iSector].size(); for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { const GPUTPCRow& row = tracker.Data().Row(i); for (int32_t j = 0; j <= (signed)row.Grid().Ny(); j++) { @@ -649,17 +649,17 @@ GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) float x = row.X() + mCfgH.xAdd; float y = row.Grid().YMin() + (float)j / row.Grid().StepYInv(); float zz1, zz2, yy1, yy2, xx1, xx2; - mParam->Slice2Global(tracker.ISlice(), x, y, z1, &xx1, &yy1, &zz1); - mParam->Slice2Global(tracker.ISlice(), x, y, z2, &xx2, &yy2, &zz2); - if (iSlice < 18) { + mParam->Sector2Global(tracker.ISector(), x, y, z1, &xx1, &yy1, &zz1); + mParam->Sector2Global(tracker.ISector(), x, y, z2, &xx2, &yy2, &zz2); + if (iSector < 18) { zz1 += mCfgH.zAdd; zz2 += mCfgH.zAdd; } else { zz1 -= mCfgH.zAdd; zz2 -= mCfgH.zAdd; } - mVertexBuffer[iSlice].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); - mVertexBuffer[iSlice].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); } for (int32_t j = 0; j <= (signed)row.Grid().Nz(); j++) { float y1 = row.Grid().YMin(); @@ -667,21 +667,21 @@ GPUDisplay::vboList GPUDisplay::DrawGrid(const GPUTPCTracker& tracker) float x = row.X() + mCfgH.xAdd; float z = row.Grid().ZMin() + (float)j / row.Grid().StepZInv(); float zz1, zz2, yy1, yy2, xx1, xx2; - mParam->Slice2Global(tracker.ISlice(), x, y1, z, &xx1, &yy1, &zz1); - mParam->Slice2Global(tracker.ISlice(), x, y2, z, &xx2, &yy2, &zz2); - if (iSlice < 18) { + mParam->Sector2Global(tracker.ISector(), x, y1, z, &xx1, &yy1, &zz1); + mParam->Sector2Global(tracker.ISector(), x, y2, z, &xx2, &yy2, &zz2); + if (iSector < 18) { zz1 += mCfgH.zAdd; zz2 += mCfgH.zAdd; } else { zz1 -= mCfgH.zAdd; zz2 -= mCfgH.zAdd; } - mVertexBuffer[iSlice].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); - mVertexBuffer[iSlice].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx1 * GL_SCALE_FACTOR, yy1 * GL_SCALE_FACTOR * mYFactor, zz1 * GL_SCALE_FACTOR); + mVertexBuffer[iSector].emplace_back(xx2 * GL_SCALE_FACTOR, yy2 * GL_SCALE_FACTOR * mYFactor, zz2 * GL_SCALE_FACTOR); } } - insertVertexList(tracker.ISlice(), startCountInner, mVertexBuffer[iSlice].size()); - return (vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice)); + insertVertexList(tracker.ISector(), startCountInner, mVertexBuffer[iSector].size()); + return (vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector)); } GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) @@ -691,7 +691,7 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) size_t startCountInner = mVertexBuffer[sector].size(); auto* geo = trdGeometry(); if (geo) { - int32_t trdsector = NSLICES / 2 - 1 - sector; + int32_t trdsector = NSECTORS / 2 - 1 - sector; float alpha = geo->GetAlpha() / 2.f + geo->GetAlpha() * trdsector; if (trdsector >= 9) { alpha -= 2 * CAMath::Pi(); @@ -753,7 +753,7 @@ GPUDisplay::vboList GPUDisplay::DrawGridTRD(int32_t sector) size_t GPUDisplay::DrawGLScene_updateVertexList() { - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { mVertexBuffer[i].clear(); mVertexBufferStart[i].clear(); mVertexBufferCount[i].clear(); @@ -766,46 +766,46 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlobalPosTRD[i].w = tTRDCLUSTER; } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = 0; i < N_POINTS_TYPE; i++) { - mGlDLPoints[iSlice][i].resize(mNCollissions); + mGlDLPoints[iSector][i].resize(mNCollissions); } for (int32_t i = 0; i < N_FINAL_TYPE; i++) { - mGlDLFinal[iSlice].resize(mNCollissions); + mGlDLFinal[iSector].resize(mNCollissions); } } int32_t numThreads = getNumThreads(); tbb::task_arena(numThreads).execute([&] { - if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSliceTracking)) { - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - GPUTPCTracker& tracker = (GPUTPCTracker&)sliceTracker(iSlice); + if (mChain && (mChain->GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + GPUTPCTracker& tracker = (GPUTPCTracker&)sectorTracker(iSector); tracker.SetPointersDataLinks(tracker.LinkTmpMemory()); - mGlDLLines[iSlice][tINITLINK] = DrawLinks(tracker, tINITLINK, true); + mGlDLLines[iSector][tINITLINK] = DrawLinks(tracker, tINITLINK, true); tracker.SetPointersDataLinks(mChain->rec()->Res(tracker.MemoryResLinks()).Ptr()); // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - const GPUTPCTracker& tracker = sliceTracker(iSlice); + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + const GPUTPCTracker& tracker = sectorTracker(iSector); - mGlDLLines[iSlice][tLINK] = DrawLinks(tracker, tLINK); - mGlDLLines[iSlice][tSEED] = DrawSeeds(tracker); - mGlDLLines[iSlice][tTRACKLET] = DrawTracklets(tracker); - mGlDLLines[iSlice][tSLICETRACK] = DrawTracks(tracker, 0); - mGlDLGrid[iSlice] = DrawGrid(tracker); - if (iSlice < NSLICES / 2) { - mGlDLGridTRD[iSlice] = DrawGridTRD(iSlice); + mGlDLLines[iSector][tLINK] = DrawLinks(tracker, tLINK); + mGlDLLines[iSector][tSEED] = DrawSeeds(tracker); + mGlDLLines[iSector][tTRACKLET] = DrawTracklets(tracker); + mGlDLLines[iSector][tSECTORTRACK] = DrawTracks(tracker, 0); + mGlDLGrid[iSector] = DrawGrid(tracker); + if (iSector < NSECTORS / 2) { + mGlDLGridTRD[iSector] = DrawGridTRD(iSector); } // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { - const GPUTPCTracker& tracker = sliceTracker(iSlice); - mGlDLLines[iSlice][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { + const GPUTPCTracker& tracker = sectorTracker(iSector); + mGlDLLines[iSector][tEXTRAPOLATEDTRACK] = DrawTracks(tracker, 1); // clang-format off }, tbb::simple_partitioner()); // clang-format on } tbb::parallel_for(0, numThreads, [&](int32_t iThread) { mThreadTracks[iThread].resize(mNCollissions); for (int32_t i = 0; i < mNCollissions; i++) { - for (int32_t j = 0; j < NSLICES; j++) { + for (int32_t j = 0; j < NSECTORS; j++) { for (int32_t k = 0; k < 2; k++) { mThreadTracks[iThread][i][j][k].clear(); } @@ -837,19 +837,19 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (mCfgH.hideRejectedTracks && !track->OK()) { return; } - int32_t slice = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + track->NClusters() - 1].slice; + int32_t sector = mIOPtrs->mergedTrackHits[track->FirstClusterRef() + track->NClusters() - 1].sector; uint32_t col = 0; if (mQA) { const auto& label = mQA->GetMCTrackLabel(i); #ifdef GPUCA_TPC_GEOMETRY_O2 col = mQA->GetMCLabelCol(label); #else - while (label.isValid() && col < mOverlayTFClusters.size() && mOverlayTFClusters[col][NSLICES] < label.track) { + while (label.isValid() && col < mOverlayTFClusters.size() && mOverlayTFClusters[col][NSECTORS] < label.track) { col++; } #endif } - mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][0].emplace_back(i); + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][0].emplace_back(i); }); } for (uint32_t col = 0; col < mIOPtrs->nMCInfosTPCCol; col++) { @@ -866,11 +866,11 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (alpha < 0) { alpha += 2 * CAMath::Pi(); } - int32_t slice = alpha / (2 * CAMath::Pi()) * 18; + int32_t sector = alpha / (2 * CAMath::Pi()) * 18; if (mc.z < 0) { - slice += 18; + sector += 18; } - mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][slice][1].emplace_back(i); + mThreadTracks[GPUReconstruction::getHostThreadIndex()][col][sector][1].emplace_back(i); }); } @@ -879,33 +879,33 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() prop.SetMaterialTPC(); prop.SetPolynomialField(&mParam->polynomialField); - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { int32_t numThread = GPUReconstruction::getHostThreadIndex(); for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { mThreadBuffers[numThread].clear(); for (int32_t iSet = 0; iSet < numThreads; iSet++) { if (mConfig.showTPCTracksFromO2Format) { - DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); + DrawFinal(iSector, iCol, &prop, mThreadTracks[iSet][iCol][iSector], mThreadBuffers[numThread]); } else { - DrawFinal(iSlice, iCol, &prop, mThreadTracks[iSet][iCol][iSlice], mThreadBuffers[numThread]); + DrawFinal(iSector, iCol, &prop, mThreadTracks[iSet][iCol][iSector], mThreadBuffers[numThread]); } } - vboList* list = &mGlDLFinal[iSlice][iCol][0]; + vboList* list = &mGlDLFinal[iSector][iCol][0]; for (int32_t i = 0; i < N_FINAL_TYPE; i++) { - size_t startCount = mVertexBufferStart[iSlice].size(); + size_t startCount = mVertexBufferStart[iSector].size(); for (uint32_t j = 0; j < mThreadBuffers[numThread].start[i].size(); j++) { - mVertexBufferStart[iSlice].emplace_back(mThreadBuffers[numThread].start[i][j]); - mVertexBufferCount[iSlice].emplace_back(mThreadBuffers[numThread].count[i][j]); + mVertexBufferStart[iSector].emplace_back(mThreadBuffers[numThread].start[i][j]); + mVertexBufferCount[iSector].emplace_back(mThreadBuffers[numThread].count[i][j]); } - list[i] = vboList(startCount, mVertexBufferStart[iSlice].size() - startCount, iSlice); + list[i] = vboList(startCount, mVertexBufferStart[iSector].size() - startCount, iSector); } } // clang-format off }, tbb::simple_partitioner()); // clang-format on - tbb::parallel_for(0, NSLICES, [&](int32_t iSlice) { + tbb::parallel_for(0, NSECTORS, [&](int32_t iSector) { for (int32_t i = 0; i < N_POINTS_TYPE_TPC; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawClusters(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawClusters(iSector, i, iCol); } } // clang-format off }, tbb::simple_partitioner()); // clang-format on @@ -914,35 +914,35 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() mGlDLFinalITS = DrawFinalITS(); - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsTRD(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTRD(iSector, i, iCol); } } } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsTOF(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsTOF(iSector, i, iCol); } } - break; // TODO: Only slice 0 filled for now + break; // TODO: Only sector 0 filled for now } - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { for (int32_t i = N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF; i < N_POINTS_TYPE_TPC + N_POINTS_TYPE_TRD + N_POINTS_TYPE_TOF + N_POINTS_TYPE_ITS; i++) { for (int32_t iCol = 0; iCol < mNCollissions; iCol++) { - mGlDLPoints[iSlice][i][iCol] = DrawSpacePointsITS(iSlice, i, iCol); + mGlDLPoints[iSector][i][iCol] = DrawSpacePointsITS(iSector, i, iCol); } } - break; // TODO: Only slice 0 filled for now + break; // TODO: Only sector 0 filled for now } mUpdateVertexLists = 0; size_t totalVertizes = 0; - for (int32_t i = 0; i < NSLICES; i++) { + for (int32_t i = 0; i < NSECTORS; i++) { totalVertizes += mVertexBuffer[i].size(); } if (totalVertizes > 0xFFFFFFFF) { @@ -953,7 +953,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() if (!mUseMultiVBO) { size_t totalYet = mVertexBuffer[0].size(); mVertexBuffer[0].resize(totalVertizes); - for (int32_t i = 1; i < GPUCA_NSLICES; i++) { + for (int32_t i = 1; i < GPUCA_NSECTORS; i++) { for (uint32_t j = 0; j < mVertexBufferStart[i].size(); j++) { mVertexBufferStart[i][j] += totalYet; } @@ -963,7 +963,7 @@ size_t GPUDisplay::DrawGLScene_updateVertexList() } } mBackend->loadDataToGPU(totalVertizes); - for (int32_t i = 0; i < (mUseMultiVBO ? GPUCA_NSLICES : 1); i++) { + for (int32_t i = 0; i < (mUseMultiVBO ? GPUCA_NSECTORS : 1); i++) { mVertexBuffer[i].clear(); } return totalVertizes; diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index f53fa185029f8..6fd70354c9486 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -44,8 +44,8 @@ void GPUDisplay::DrawGLScene_updateEventData() mCurrentClusters = mIOPtrs->clustersNative->nClustersTotal; } else { mCurrentClusters = 0; - for (int32_t iSlice = 0; iSlice < NSLICES; iSlice++) { - mCurrentClusters += mIOPtrs->nClusterData[iSlice]; + for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { + mCurrentClusters += mIOPtrs->nClusterData[iSector]; } } if (mNMaxClusters < mCurrentClusters) { @@ -128,19 +128,19 @@ void GPUDisplay::DrawGLScene_updateEventData() } mUpdateTrackFilter = false; - mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSLICES, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { - for (int32_t iSlice = r.begin(); iSlice < r.end(); iSlice++) { + mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS, 1), float(0.f), [&](const tbb::blocked_range& r, float maxClusterZ) { + for (int32_t iSector = r.begin(); iSector < r.end(); iSector++) { int32_t row = 0; - uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSlice] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSlice] : 0); + uint32_t nCls = mParam->par.earlyTpcTransform ? mIOPtrs->nClusterData[iSector] : (mIOPtrs->clustersNative ? mIOPtrs->clustersNative->nClustersSector[iSector] : 0); for (uint32_t i = 0; i < nCls; i++) { int32_t cid; if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; + const auto& cl = mIOPtrs->clusterData[iSector][i]; cid = cl.id; row = cl.row; } else { - cid = mIOPtrs->clustersNative->clusterOffset[iSlice][0] + i; - while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSlice][row + 1] <= (uint32_t)cid) { + cid = mIOPtrs->clustersNative->clusterOffset[iSector][0] + i; + while (row < GPUCA_ROW_COUNT - 1 && mIOPtrs->clustersNative->clusterOffset[iSector][row + 1] <= (uint32_t)cid) { row++; } } @@ -149,22 +149,22 @@ void GPUDisplay::DrawGLScene_updateEventData() } float4* ptr = &mGlobalPos[cid]; if (mParam->par.earlyTpcTransform) { - const auto& cl = mIOPtrs->clusterData[iSlice][i]; - mParam->Slice2Global(iSlice, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); + const auto& cl = mIOPtrs->clusterData[iSector][i]; + mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); } else { float x, y, z; - const auto& cln = mIOPtrs->clustersNative->clusters[iSlice][0][i]; - GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSlice, row, cln.getPad(), cln.getTime(), x, y, z); + const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; + GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); if (mCfgH.clustersOnNominalRow) { x = mParam->tpcGeometry.Row2X(row); } - mParam->Slice2Global(iSlice, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); } if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); } - ptr->z += iSlice < 18 ? mCfgH.zAdd : -mCfgH.zAdd; + ptr->z += iSector < 18 ? mCfgH.zAdd : -mCfgH.zAdd; ptr->x *= GL_SCALE_FACTOR; ptr->y *= GL_SCALE_FACTOR; ptr->z *= GL_SCALE_FACTOR; @@ -186,7 +186,7 @@ void GPUDisplay::DrawGLScene_updateEventData() const auto& sp = mIOPtrs->trdSpacePoints[i]; int32_t iSec = trdGeometry()->GetSector(mIOPtrs->trdTracklets[i].GetDetector()); float4* ptr = &mGlobalPosTRD[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSec, sp.getX() + mCfgH.xAdd, sp.getY(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); @@ -196,7 +196,7 @@ void GPUDisplay::DrawGLScene_updateEventData() ptr->z *= GL_SCALE_FACTOR; ptr->w = tTRDCLUSTER; ptr = &mGlobalPosTRD2[i]; - mParam->Slice2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSec, sp.getX() + mCfgH.xAdd + 4.5f, sp.getY() + 1.5f * sp.getDy(), sp.getZ(), &ptr->x, &ptr->y, &ptr->z); ptr->z += ptr->z > 0 ? trdZoffset : -trdZoffset; if (fabsf(ptr->z) > maxClusterZ) { maxClusterZ = fabsf(ptr->z); @@ -212,7 +212,7 @@ void GPUDisplay::DrawGLScene_updateEventData() mMaxClusterZ = tbb::parallel_reduce(tbb::blocked_range(0, mCurrentClustersTOF, 32), float(mMaxClusterZ), [&](const tbb::blocked_range& r, float maxClusterZ) { for (int32_t i = r.begin(); i < r.end(); i++) { float4* ptr = &mGlobalPosTOF[i]; - mParam->Slice2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(mIOPtrs->tofClusters[i].getSector(), mIOPtrs->tofClusters[i].getX() + mCfgH.xAdd, mIOPtrs->tofClusters[i].getY(), mIOPtrs->tofClusters[i].getZ(), &ptr->x, &ptr->y, &ptr->z); float ZOffset = 0; if (mParam->par.continuousTracking) { float tofTime = mIOPtrs->tofClusters[i].getTime() * 1e-3 / o2::constants::lhc::LHCBunchSpacingNS / o2::tpc::constants::LHCBCPERTIMEBIN; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 4085bebee08c4..c84a1be8e6890 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -15,11 +15,11 @@ o2_gpu_kernel_file_list(ERRORS GPUErrors.cxx) o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx GPUTPCGrid.cxx GPUTPCRow.cxx GPUTPCTracker.cxx) o2_gpu_kernel_file_list(TPCTRACKLETCONS GPUTPCTrackletConstructor.cxx) -o2_gpu_kernel_file_list(TPCSLICEDATA TPCTRACKER GPUTPCSliceData.cxx) +o2_gpu_kernel_file_list(TPCSECTORDATA TPCTRACKER GPUTPCTrackingData.cxx) o2_gpu_kernel_file_list(TPCOCCUPANCY GPUTPCClusterOccupancyMap.cxx) o2_gpu_kernel_file_list(TPCDEDX GPUdEdx.cxx) o2_gpu_kernel_file_list(MATLUT MatLayerCylSet.cxx MatLayerCyl.cxx Ray.cxx) -o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSliceTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) +o2_gpu_kernel_file_list(TPCMERGER ERRORS GPUTPCGMMerger.cxx GPUTPCGMSectorTrack.cxx GPUTPCGMTrackParam.cxx GPUTPCGMPhysicalTrackModel.cxx GPUTPCGMPropagator.cxx) o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizationWithError.cxx Propagator.cxx TrackLTIntegral.cxx) o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) @@ -31,17 +31,17 @@ o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSlice" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSlices" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) -o2_gpu_add_kernel("GPUTPCCreateSliceData" "= TPCTRACKER TPCSLICEDATA" LB single) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sliceTracks" "= TPCTRACKER" NO single) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER" NO single) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO single int8_t parameter) o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO single int8_t parameter) @@ -51,9 +51,9 @@ o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCU o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB simple int32_t mode) o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerSliceRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSlice) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t id) o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) @@ -62,10 +62,10 @@ o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMM o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t useOrigTrackParam int8_t mergeAll) o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t output) o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSlicesPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice int8_t withinSlice int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSlice int8_t withinSlice int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSlice int8_t withinSlice int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB simple) o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) @@ -98,7 +98,7 @@ o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCom o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB simple int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sliceStart int32_t nSlices) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sectorStart int32_t nSectors) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB simple) o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB simple) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 015159fee24d7..edb6c6e762a65 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -36,7 +36,7 @@ #include "GPUQA.h" #include "GPUTPCDef.h" -#include "GPUTPCSliceData.h" +#include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" @@ -1027,8 +1027,8 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } else if (mTracking->GetParam().par.earlyTpcTransform) { comp = fabsf(trks[i].GetParam().GetZ() + trks[i].GetParam().GetTZOffset()) < fabsf(trks[revLabel].GetParam().GetZ() + trks[revLabel].GetParam().GetTZOffset()); } else { - float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSLICES / 2, trks[i].GetParam().GetTZOffset()); - float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSLICES / 2, trks[revLabel].GetParam().GetTZOffset()); + float shift1 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[i].CSide() * GPUChainTracking::NSECTORS / 2, trks[i].GetParam().GetTZOffset()); + float shift2 = mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(trks[revLabel].CSide() * GPUChainTracking::NSECTORS / 2, trks[revLabel].GetParam().GetTZOffset()); comp = fabsf(trks[i].GetParam().GetZ() + shift1) < fabsf(trks[revLabel].GetParam().GetZ() + shift2); } if (revLabel == -1 || !trks[revLabel].OK() || (trks[i].OK() && comp)) { @@ -1362,7 +1362,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } #ifdef GPUCA_TPC_GEOMETRY_O2 if (!mParam->par.earlyTpcTransform) { - float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSLICES / 2, param.GetTZOffset() - mc1.t0); + float shift = side == 2 ? 0 : mTracking->GetTPCTransformHelper()->getCorrMap()->convDeltaTimeToDeltaZinTimeFrame(side * GPUChainTracking::NSECTORS / 2, param.GetTZOffset() - mc1.t0); return param.GetZ() + shift - mc1.z; } #endif @@ -1664,13 +1664,13 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mNCl->Fill(track.NClustersFitted()); } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { - for (uint32_t i = 0; i < GPUChainTracking::NSLICES; i++) { + for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < mClNative->nClusters[i][j]; k++) { const auto& cl = mClNative->clusters[i][j][k]; float x, y, z; GPUTPCConvertImpl::convert(*mTracking->GetTPCTransformHelper()->getCorrMap(), mTracking->GetParam(), i, j, cl.getPad(), cl.getTime(), x, y, z); - mTracking->GetParam().Slice2Global(i, x, y, z, &x, &y, &z); + mTracking->GetParam().Sector2Global(i, x, y, z, &x, &y, &z); mClXY->Fill(x, y); } } @@ -1759,7 +1759,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx throw std::runtime_error("Cannot dump non o2::tpc::clusterNative clusters, need also hit attachmend and GPU tracks"); } uint32_t clid = 0; - for (uint32_t i = 0; i < GPUChainTracking::NSLICES; i++) { + for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { for (uint32_t k = 0; k < mClNative->nClusters[i][j]; k++) { const auto& cl = mClNative->clusters[i][j][k]; @@ -1769,7 +1769,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx uint32_t track = attach & gputpcgmmergertypes::attachTrackMask; const auto& trk = mTracking->mIOPtrs.mergedTracks[track]; mTracking->GetTPCTransformHelper()->Transform(i, j, cl.getPad(), cl.getTime(), x, y, z, trk.GetParam().GetTZOffset()); - mTracking->GetParam().Slice2Global(i, x, y, z, &x, &y, &z); + mTracking->GetParam().Sector2Global(i, x, y, z, &x, &y, &z); } uint32_t extState = mTracking->mIOPtrs.mergedTrackHitStates ? mTracking->mIOPtrs.mergedTrackHitStates[clid] : 0; diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 3bd4779dd13f0..627cfc5f9909a 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -47,10 +47,10 @@ namespace o2::gpu extern GPUSettingsStandalone configStandalone; } -int32_t genEvents::GetSlice(double GlobalPhi) +int32_t genEvents::GetSector(double GlobalPhi) { double phi = GlobalPhi; - // std::cout<<" GetSlice: phi = "<Fill(sigmaY); @@ -286,7 +286,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) // std::cout< 0.5 ) sigmaY = 0.5; // if( sigmaZ > 0.5 ) sigmaZ = 0.5; - c.sector = (t.GetZ() >= 0.) ? iSlice : iSlice + 18; + c.sector = (t.GetZ() >= 0.) ? iSector : iSector + 18; c.row = iRow; c.mcID = itr; c.x = t.GetX(); @@ -299,9 +299,9 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) std::vector labels; - std::unique_ptr clSlices[GPUChainTracking::NSLICES]; + std::unique_ptr clSectors[GPUChainTracking::NSECTORS]; - for (int32_t iSector = 0; iSector < (int32_t)GPUChainTracking::NSLICES; iSector++) // HLT Sector numbering, sectors go from 0 to 35, all spanning all rows from 0 to 158. + for (int32_t iSector = 0; iSector < (int32_t)GPUChainTracking::NSECTORS; iSector++) // HLT Sector numbering, sectors go from 0 to 35, all spanning all rows from 0 to 158. { int32_t nNumberOfHits = 0; for (uint32_t i = 0; i < vClusters.size(); i++) { @@ -313,7 +313,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) mRec->mIOPtrs.nClusterData[iSector] = nNumberOfHits; GPUTPCClusterData* clusters = new GPUTPCClusterData[nNumberOfHits]; - clSlices[iSector].reset(clusters); + clSectors[iSector].reset(clusters); int32_t icl = 0; for (uint32_t i = 0; i < vClusters.size(); i++) { GenCluster& c = vClusters[i]; @@ -338,7 +338,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) mRec->mIOPtrs.clusterData[iSector] = clusters; } - // Create vector with cluster MC labels, clusters are counter from 0 to clusterId in the order they have been written above. No separation in slices. + // Create vector with cluster MC labels, clusters are counter from 0 to clusterId in the order they have been written above. No separation in sectors. mRec->mIOPtrs.nMCLabelsTPC = labels.size(); mRec->mIOPtrs.mcLabelsTPC = labels.data(); diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index 43c091099bcf0..93a3a3e512ac7 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -30,7 +30,7 @@ class genEvents public: genEvents(GPUChainTracking* rec) {} void InitEventGenerator() {} - int32_t GenerateEvent(const GPUParam& sliceParam, char* filename) { return 1; } + int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; } void FinishEventGenerator() {} static void RunEventGenerator(GPUChainTracking* rec){}; @@ -43,16 +43,16 @@ class genEvents public: genEvents(GPUChainTracking* rec) : mRec(rec) {} void InitEventGenerator(); - int32_t GenerateEvent(const GPUParam& sliceParam, char* filename); + int32_t GenerateEvent(const GPUParam& sectorParam, char* filename); void FinishEventGenerator(); static void RunEventGenerator(GPUChainTracking* rec); private: - int32_t GetSlice(double GlobalPhi); - int32_t GetDSlice(double LocalPhi); - double GetSliceAngle(int32_t iSlice); - int32_t RecalculateSlice(GPUTPCGMPhysicalTrackModel& t, int32_t& iSlice); + int32_t GetSector(double GlobalPhi); + int32_t GetDSector(double LocalPhi); + double GetSectorAngle(int32_t iSector); + int32_t RecalculateSector(GPUTPCGMPhysicalTrackModel& t, int32_t& iSector); double GetGaus(double sigma); TH1F* mClusterError[3][2] = {{nullptr, nullptr}, {nullptr, nullptr}, {nullptr, nullptr}}; @@ -68,8 +68,8 @@ class genEvents }; const double mTwoPi = 2 * M_PI; - const double mSliceDAngle = mTwoPi / 18.; - const double mSliceAngleOffset = mSliceDAngle / 2; + const double mSectorDAngle = mTwoPi / 18.; + const double mSectorAngleOffset = mSectorDAngle / 2; GPUChainTracking* mRec; }; diff --git a/GPU/Workflow/src/GPUWorkflowInternal.h b/GPU/Workflow/src/GPUWorkflowInternal.h index 2e30adbd0130f..7ac9c60048e20 100644 --- a/GPU/Workflow/src/GPUWorkflowInternal.h +++ b/GPU/Workflow/src/GPUWorkflowInternal.h @@ -29,10 +29,10 @@ namespace gpurecoworkflow_internals { struct GPURecoWorkflowSpec_TPCZSBuffers { - std::vector Pointers[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - std::vector Sizes[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - const void** Pointers2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; - const uint32_t* Sizes2[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + std::vector Pointers[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + std::vector Sizes[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + const void** Pointers2[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; + const uint32_t* Sizes2[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; }; struct GPURecoWorkflow_QueueObject { diff --git a/GPU/Workflow/src/GPUWorkflowPipeline.cxx b/GPU/Workflow/src/GPUWorkflowPipeline.cxx index fb23680266ae2..5aca7502d8e91 100644 --- a/GPU/Workflow/src/GPUWorkflowPipeline.cxx +++ b/GPU/Workflow/src/GPUWorkflowPipeline.cxx @@ -53,7 +53,7 @@ struct pipelinePrepareMessage { size_t magicWord = MAGIC_WORD; DataProcessingHeader::StartTime timeSliceId; GPUSettingsTF tfSettings; - size_t pointerCounts[GPUTrackingInOutZS::NSLICES][GPUTrackingInOutZS::NENDPOINTS]; + size_t pointerCounts[GPUTrackingInOutZS::NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; size_t pointersTotal; bool flagEndOfStream; }; @@ -181,12 +181,12 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn size_t ptrsTotal = 0; const void* firstPtr = nullptr; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - if (firstPtr == nullptr && ptrs.tpcZS->slice[i].count[j]) { - firstPtr = ptrs.tpcZS->slice[i].zsPtr[j][0]; + if (firstPtr == nullptr && ptrs.tpcZS->sector[i].count[j]) { + firstPtr = ptrs.tpcZS->sector[i].zsPtr[j][0]; } - ptrsTotal += ptrs.tpcZS->slice[i].count[j]; + ptrsTotal += ptrs.tpcZS->sector[i].count[j]; } } @@ -202,11 +202,11 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn size_t* ptrBuffer = messageBuffer.data() + sizeof(preMessage) / sizeof(size_t); size_t ptrsCopied = 0; int32_t lastRegion = -1; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - preMessage.pointerCounts[i][j] = ptrs.tpcZS->slice[i].count[j]; - for (uint32_t k = 0; k < ptrs.tpcZS->slice[i].count[j]; k++) { - const void* curPtr = ptrs.tpcZS->slice[i].zsPtr[j][k]; + preMessage.pointerCounts[i][j] = ptrs.tpcZS->sector[i].count[j]; + for (uint32_t k = 0; k < ptrs.tpcZS->sector[i].count[j]; k++) { + const void* curPtr = ptrs.tpcZS->sector[i].zsPtr[j][k]; bool regionFound = lastRegion != -1 && (size_t)curPtr >= (size_t)mRegionInfos[lastRegion].ptr && (size_t)curPtr < (size_t)mRegionInfos[lastRegion].ptr + mRegionInfos[lastRegion].size; if (!regionFound) { for (uint32_t l = 0; l < mRegionInfos.size(); l++) { @@ -221,11 +221,11 @@ int32_t GPURecoWorkflowSpec::handlePipeline(ProcessingContext& pc, GPUTrackingIn LOG(fatal) << "Found a TPC ZS pointer outside of shared memory"; } ptrBuffer[ptrsCopied + k] = (size_t)curPtr - (size_t)mRegionInfos[lastRegion].ptr; - ptrBuffer[ptrsTotal + ptrsCopied + k] = ptrs.tpcZS->slice[i].nZSPtr[j][k]; + ptrBuffer[ptrsTotal + ptrsCopied + k] = ptrs.tpcZS->sector[i].nZSPtr[j][k]; ptrBuffer[2 * ptrsTotal + ptrsCopied + k] = mRegionInfos[lastRegion].managed; ptrBuffer[3 * ptrsTotal + ptrsCopied + k] = mRegionInfos[lastRegion].id; } - ptrsCopied += ptrs.tpcZS->slice[i].count[j]; + ptrsCopied += ptrs.tpcZS->sector[i].count[j]; } } @@ -353,10 +353,10 @@ void GPURecoWorkflowSpec::RunReceiveThread() context->tpcZSmeta.Pointers[0][0].resize(m->pointersTotal); context->tpcZSmeta.Sizes[0][0].resize(m->pointersTotal); int32_t lastRegion = -1; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - context->tpcZS.slice[i].count[j] = m->pointerCounts[i][j]; - for (uint32_t k = 0; k < context->tpcZS.slice[i].count[j]; k++) { + context->tpcZS.sector[i].count[j] = m->pointerCounts[i][j]; + for (uint32_t k = 0; k < context->tpcZS.sector[i].count[j]; k++) { bool regionManaged = ptrBuffer[2 * m->pointersTotal + ptrsCopied + k]; size_t regionId = ptrBuffer[3 * m->pointersTotal + ptrsCopied + k]; bool regionFound = lastRegion != -1 && mRegionInfos[lastRegion].managed == regionManaged && mRegionInfos[lastRegion].id == regionId; @@ -375,9 +375,9 @@ void GPURecoWorkflowSpec::RunReceiveThread() context->tpcZSmeta.Pointers[0][0][ptrsCopied + k] = (void*)(ptrBuffer[ptrsCopied + k] + (size_t)mRegionInfos[lastRegion].ptr); context->tpcZSmeta.Sizes[0][0][ptrsCopied + k] = ptrBuffer[m->pointersTotal + ptrsCopied + k]; } - context->tpcZS.slice[i].zsPtr[j] = context->tpcZSmeta.Pointers[0][0].data() + ptrsCopied; - context->tpcZS.slice[i].nZSPtr[j] = context->tpcZSmeta.Sizes[0][0].data() + ptrsCopied; - ptrsCopied += context->tpcZS.slice[i].count[j]; + context->tpcZS.sector[i].zsPtr[j] = context->tpcZSmeta.Pointers[0][0].data() + ptrsCopied; + context->tpcZS.sector[i].nZSPtr[j] = context->tpcZSmeta.Sizes[0][0].data() + ptrsCopied; + ptrsCopied += context->tpcZS.sector[i].count[j]; } } context->ptrs.tpcZS = &context->tpcZS; diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 06942eab476c6..aa4f3cfca1289 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -194,7 +194,7 @@ void GPURecoWorkflowSpec::init(InitContext& ic) // Configure the "GPU workflow" i.e. which steps we run on the GPU (or CPU) if (mSpecConfig.outputTracks || mSpecConfig.outputCompClusters || mSpecConfig.outputCompClustersFlat) { mConfig->configWorkflow.steps.set(GPUDataTypes::RecoStep::TPCConversion, - GPUDataTypes::RecoStep::TPCSliceTracking, + GPUDataTypes::RecoStep::TPCSectorTracking, GPUDataTypes::RecoStep::TPCMerging); mConfig->configWorkflow.outputs.set(GPUDataTypes::InOutType::TPCMergedTracks); mConfig->configWorkflow.steps.setBits(GPUDataTypes::RecoStep::TPCdEdx, mConfParam->rundEdx == -1 ? !mConfParam->synchronousProcessing : mConfParam->rundEdx); @@ -396,7 +396,7 @@ void GPURecoWorkflowSpec::processInputs(ProcessingContext& pc, D& tpcZSmeta, E& constexpr static size_t NEndpoints = o2::gpu::GPUTrackingInOutZS::NENDPOINTS; if (mSpecConfig.zsOnTheFly || mSpecConfig.zsDecoder) { - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { tpcZSmeta.Pointers[i][j].clear(); tpcZSmeta.Sizes[i][j].clear(); @@ -473,13 +473,13 @@ void GPURecoWorkflowSpec::processInputs(ProcessingContext& pc, D& tpcZSmeta, E& } int32_t totalCount = 0; - for (uint32_t i = 0; i < GPUTrackingInOutZS::NSLICES; i++) { + for (uint32_t i = 0; i < GPUTrackingInOutZS::NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { tpcZSmeta.Pointers2[i][j] = tpcZSmeta.Pointers[i][j].data(); tpcZSmeta.Sizes2[i][j] = tpcZSmeta.Sizes[i][j].data(); - tpcZS.slice[i].zsPtr[j] = tpcZSmeta.Pointers2[i][j]; - tpcZS.slice[i].nZSPtr[j] = tpcZSmeta.Sizes2[i][j]; - tpcZS.slice[i].count[j] = tpcZSmeta.Pointers[i][j].size(); + tpcZS.sector[i].zsPtr[j] = tpcZSmeta.Pointers2[i][j]; + tpcZS.sector[i].nZSPtr[j] = tpcZSmeta.Sizes2[i][j]; + tpcZS.sector[i].count[j] = tpcZSmeta.Pointers[i][j].size(); totalCount += tpcZSmeta.Pointers[i][j].size(); } } @@ -640,9 +640,9 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc) if (!(mTPCSectorMask & (1ul << i))) { if (ptrs.tpcZS) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - tpcZS.slice[i].zsPtr[j] = nullptr; - tpcZS.slice[i].nZSPtr[j] = nullptr; - tpcZS.slice[i].count[j] = 0; + tpcZS.sector[i].zsPtr[j] = nullptr; + tpcZS.sector[i].nZSPtr[j] = nullptr; + tpcZS.sector[i].count[j] = 0; } } } From 796a0f80772f332ed754f66bb8c23b172839f79c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 16:26:39 +0100 Subject: [PATCH 0106/1914] GPU: Switch to modern C++ nested namespace style --- GPU/GPUTracking/Base/GPUConstantMem.h | 14 +++------- GPU/GPUTracking/Base/GPUGeneralKernels.h | 7 ++--- GPU/GPUTracking/Base/GPUKernelDebugOutput.h | 7 ++--- GPU/GPUTracking/Base/GPUMemoryResource.h | 7 ++--- GPU/GPUTracking/Base/GPUParam.h | 7 ++--- GPU/GPUTracking/Base/GPUParam.inc | 7 ++--- GPU/GPUTracking/Base/GPUParamRTC.h | 7 ++--- GPU/GPUTracking/Base/GPUProcessor.h | 7 ++--- GPU/GPUTracking/Base/GPUReconstruction.cxx | 7 ++--- GPU/GPUTracking/Base/GPUReconstruction.h | 14 +++------- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 7 ++--- .../Base/GPUReconstructionConvert.h | 7 ++--- .../Base/GPUReconstructionDeviceBase.h | 7 ++--- ...ReconstructionIncludesDeviceAll.template.h | 7 ++--- .../Base/GPUReconstructionKernels.h | 7 ++--- .../Base/GPUReconstructionTimeframe.h | 7 ++--- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 14 +++------- .../Base/cuda/GPUReconstructionCUDA.h | 7 ++--- .../cuda/GPUReconstructionCUDAInternals.h | 8 ++---- .../DataCompression/GPUTPCClusterRejection.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 20 +++---------- GPU/GPUTracking/DataTypes/GPUHostDataTypes.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUNewCalibValues.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUSettings.h | 7 ++--- .../DataTypes/GPUTPCGMMergedTrackHit.h | 7 ++--- .../DataTypes/GPUTPCGMPolynomialField.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUTRDDef.h | 19 +++++-------- .../DataTypes/GPUTRDInterfaceO2Track.h | 14 +++------- GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx | 7 ++--- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 18 ++++-------- GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h | 7 ++--- GPU/GPUTracking/DataTypes/GPUdEdxInfo.h | 7 ++--- GPU/GPUTracking/Debug/GPUROOTDump.h | 7 ++--- GPU/GPUTracking/Debug/GPUROOTDumpCore.h | 7 ++--- GPU/GPUTracking/Global/GPUChain.h | 7 ++--- GPU/GPUTracking/Global/GPUChainTracking.h | 28 ++++++------------- GPU/GPUTracking/Global/GPUChainTrackingDefs.h | 7 ++--- GPU/GPUTracking/Global/GPUErrors.h | 7 ++--- .../Global/GPUTrackingInputProvider.h | 14 +++------- GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h | 7 ++--- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 7 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 14 +++------- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h | 7 ++--- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 10 ++----- GPU/GPUTracking/Merger/GPUTPCGMO2Output.h | 7 ++--- .../Merger/GPUTPCGMPhysicalTrackModel.h | 7 ++--- .../Merger/GPUTPCGMPolynomialFieldManager.h | 7 ++--- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 14 +++------- GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h | 7 ++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h | 7 ++--- .../SectorTracker/GPUTPCBaseTrackParam.h | 7 ++--- .../SectorTracker/GPUTPCClusterData.h | 7 ++--- .../SectorTracker/GPUTPCCreateTrackingData.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCDef.h | 7 ++--- .../GPUTPCExtrapolationTracking.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCGrid.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCHit.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCHitId.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h | 7 ++--- .../SectorTracker/GPUTPCNeighboursCleaner.h | 7 ++--- .../SectorTracker/GPUTPCNeighboursFinder.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCRow.h | 7 ++--- .../SectorTracker/GPUTPCSectorOutCluster.h | 7 ++--- .../SectorTracker/GPUTPCSectorOutput.h | 7 ++--- .../SectorTracker/GPUTPCStartHitsFinder.h | 7 ++--- .../SectorTracker/GPUTPCStartHitsSorter.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 7 ++--- .../SectorTracker/GPUTPCTrackLinearisation.h | 7 ++--- .../SectorTracker/GPUTPCTrackParam.h | 7 ++--- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 7 ++--- .../SectorTracker/GPUTPCTrackingData.h | 7 ++--- .../SectorTracker/GPUTPCTracklet.h | 9 ++---- .../SectorTracker/GPUTPCTrackletConstructor.h | 7 ++--- .../SectorTracker/GPUTPCTrackletSelector.h | 7 ++--- GPU/GPUTracking/TPCClusterFinder/CfConsts.h | 10 ++----- GPU/GPUTracking/TPCClusterFinder/ChargePos.h | 7 ++--- .../TPCClusterFinder/GPUTPCCFChainContext.h | 7 ++--- GPU/GPUTracking/TPCConvert/GPUTPCConvert.h | 7 ++--- .../TPCConvert/GPUTPCConvertImpl.h | 7 ++--- .../TPCConvert/GPUTPCConvertKernel.h | 7 ++--- GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h | 7 ++--- .../TRDTracking/GPUTRDInterfaces.h | 21 ++++---------- .../TRDTracking/GPUTRDSpacePoint.h | 14 +++------- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 7 ++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 7 ++--- .../TRDTracking/GPUTRDTrackerDebug.h | 7 ++--- .../TRDTracking/GPUTRDTrackerKernels.h | 7 ++--- .../TRDTracking/GPUTRDTrackletLabels.h | 7 ++--- .../TRDTracking/GPUTRDTrackletWord.h | 14 +++------- GPU/GPUTracking/dEdx/GPUdEdx.h | 7 ++--- GPU/GPUTracking/display/GPUDisplay.h | 7 ++--- GPU/GPUTracking/display/GPUDisplayInterface.h | 7 ++--- .../display/shaders/GPUDisplayShaders.h | 7 ++--- GPU/GPUTracking/qa/GPUQA.h | 7 ++--- GPU/GPUTracking/qa/GPUQAHelper.h | 6 ++-- GPU/GPUTracking/qa/genEvents.h | 7 ++--- 98 files changed, 237 insertions(+), 591 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 378b9a9be2ead..4f83fa48a64e0 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -34,9 +34,7 @@ #include "GPUKernelDebugOutput.h" #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUConstantMem { GPUParam param; @@ -90,14 +88,11 @@ union GPUConstantMemCopyable { #if defined(GPUCA_GPUCODE) static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM -namespace o2 -{ -namespace gpu +namespace o2::gpu { // Must be placed here, to avoid circular header dependency @@ -120,7 +115,6 @@ GPUdi() void GPUProcessor::raiseError(uint32_t code, uint32_t param1, uint32_t p GetConstantMem()->errorCodes.raiseError(code, param1, param2, param3); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index e11f818297770..71980d38fdc9e 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -32,9 +32,7 @@ #define GPUCA_CUB cub #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUConstantMem; @@ -110,8 +108,7 @@ class GPUitoa : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #undef GPUCA_CUB diff --git a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h index dc60014718677..9e58ae332dcd4 100644 --- a/GPU/GPUTracking/Base/GPUKernelDebugOutput.h +++ b/GPU/GPUTracking/Base/GPUKernelDebugOutput.h @@ -19,9 +19,7 @@ #include "GPUProcessor.h" #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUKernelDebugOutput : public GPUProcessor @@ -75,8 +73,7 @@ class GPUKernelDebugOutput : public GPUProcessor mutable int32_t* mDebugOutMemory; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif #endif diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 5037e7800d4d9..3bb2c363db2a9 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -18,9 +18,7 @@ #include "GPUCommonDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUMemoryReuse { @@ -103,7 +101,6 @@ class GPUMemoryResource int32_t mReuse; MemoryType mType; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 279e1f9142231..9bdf705dfeb59 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -31,9 +31,7 @@ using Propagator = PropagatorImpl; } // namespace o2::base #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUSettingsRec; struct GPUSettingsGTP; @@ -114,7 +112,6 @@ struct GPUParam : public internal::GPUParam_t GPUd() bool rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 1c26a6d56664b..19dc1fc4a3578 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -19,9 +19,7 @@ #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCClusterOccupancyMap.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { GPUdi() void GPUParam::Sector2Global(int32_t iSector, float x, float y, float z, float* X, float* Y, float* Z) const @@ -223,7 +221,6 @@ GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, fl return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUParamRTC.h b/GPU/GPUTracking/Base/GPUParamRTC.h index dd9cf73e38a1e..2377666ee7b07 100644 --- a/GPU/GPUTracking/Base/GPUParamRTC.h +++ b/GPU/GPUTracking/Base/GPUParamRTC.h @@ -18,9 +18,7 @@ #include "GPUParam.h" #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { namespace gpu_rtc { @@ -38,7 +36,6 @@ struct GPUParamRTC : public internal::GPUParam_t #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTrackingInOutPointers; class GPUReconstruction; @@ -157,7 +155,6 @@ class GPUProcessor friend class GPUTPCNeighboursFinder; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 1bae5a12f2ee7..c8d1bfc1dfe51 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -44,9 +44,7 @@ #include "GPUReconstructionIncludesITS.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUReconstructionPipelineQueue { uint32_t op = 0; // For now, 0 = process, 1 = terminate @@ -63,8 +61,7 @@ struct GPUReconstructionPipelineContext { std::condition_variable cond; bool terminate = false; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f0526777a8bcb..de4e94a886a3b 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -34,19 +34,14 @@ #include "GPUTPCSectorOutput.h" #include "GPULogging.h" -namespace o2 -{ -namespace its +namespace o2::its { class TrackerTraits; class VertexerTraits; class TimeFrame; -} // namespace its -} // namespace o2 +} // namespace o2::its -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChain; struct GPUMemorySizeScalers; @@ -483,7 +478,6 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate) } } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 7d50a564fedf8..b6225999c68a0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -24,9 +24,7 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUReconstructionCPUBackend : public GPUReconstructionProcessing @@ -197,7 +195,6 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args return retVal; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.h b/GPU/GPUTracking/Base/GPUReconstructionConvert.h index 28e4552ba3849..a24eb52a3a47c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.h +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.h @@ -37,9 +37,7 @@ class RawFileWriter; struct AliHLTTPCRawCluster; -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUParam; struct GPUTPCClusterData; @@ -63,7 +61,6 @@ class GPUReconstructionConvert static std::function&, const void*, uint32_t, uint32_t)> GetDecoder(int32_t version, const GPUParam* param); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index a279c6c0c2508..6cd3813ff1431 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -20,9 +20,7 @@ #include "GPUChain.h" #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) extern template class GPUReconstructionKernels; @@ -87,7 +85,6 @@ inline size_t GPUReconstructionDeviceBase::GPUMemCpyAlways(bool onGpu, void* dst return GPUReconstructionCPU::GPUMemCpyAlways(false, dst, src, size, stream, toGPU, ev, evList, nEvents); } } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h index 02008ed0ff78f..38a9780376d16 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludesDeviceAll.template.h @@ -17,12 +17,9 @@ #include "GPUDef.h" -namespace o2 +namespace o2::gpu { -namespace gpu -{ -} -} // namespace o2 +} // namespace o2::gpu using namespace o2::gpu; // clang-format off diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index 3ed2ef1a95109..e95a59df6cfd5 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -17,9 +17,7 @@ #include "GPUReconstruction.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { namespace gpu_reconstruction_kernels @@ -113,7 +111,6 @@ class GPUReconstructionKernels : public T #undef GPUCA_KRNL }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h index 9592f549bcc4b..47cbfa0a1a5b6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.h @@ -27,9 +27,7 @@ namespace o2::tpc struct ClusterNative; } // namespace o2::tpc -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct ClusterNativeAccess; @@ -73,7 +71,6 @@ class GPUReconstructionTimeframe std::vector mEventUsed; std::vector> mShiftedEvents; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h index 99399f505d552..fdc5c16d91f35 100644 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h @@ -19,9 +19,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { class ThrustVolatileAsyncAllocator @@ -38,14 +36,11 @@ class ThrustVolatileAsyncAllocator GPUReconstruction* mRec; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #ifndef __HIPCC__ // Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize -namespace thrust -{ -namespace cuda_cub +namespace thrust::cuda_cub { typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; @@ -60,8 +55,7 @@ __host__ __device__ inline cudaError_t synchronize(thrustStr #endif } -} // namespace cuda_cub -} // namespace thrust +} // namespace thrust::cuda_cub #endif // __HIPCC__ #endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 4d1865c0fd0e7..f14696a92a5b0 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -25,9 +25,7 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUReconstructionCUDAInternals; @@ -104,7 +102,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { + #define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) #define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) @@ -85,7 +84,6 @@ class GPUDebugTiming static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h index fcdfcfc9cc49a..5c25813e75d29 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterRejection.h @@ -17,9 +17,7 @@ #include "GPUTPCGMMergerTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterRejection { template @@ -67,7 +65,6 @@ struct GPUTPCClusterRejection { return GetProtectionStatus(attach, physics, protect); } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 1faee5f28907d..51b5c0b101537 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -27,9 +27,7 @@ struct AliHLTTPCClusterMCLabel; struct AliHLTTPCRawCluster; -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNativeAccess; struct CompressedClustersFlat; @@ -38,8 +36,7 @@ class TrackTPC; namespace constants { } // namespace constants -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc namespace o2 { @@ -91,21 +88,13 @@ class CalibdEdxContainer; } // namespace tpc } // namespace o2 -namespace o2 -{ -namespace gpu +namespace o2::gpu { class CorrectionMapsHelper; class TPCFastTransform; struct TPCPadGainCalib; struct TPCZSLinkMapping; -} // namespace gpu -} // namespace o2 -namespace o2 -{ -namespace gpu -{ #include "utils/bitfield.h" #define ENUM_CLASS class #define ENUM_UINT : uint32_t @@ -325,7 +314,6 @@ struct GPUTrackingInOutPointers { #undef ENUM_CLASS #undef ENUM_UINT -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h index 0788b445416b4..fe6d05cef202b 100644 --- a/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUHostDataTypes.h @@ -32,9 +32,7 @@ #include "SimulationDataFormat/ConstMCTruthContainer.h" #include "SimulationDataFormat/MCCompLabel.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCDigitsMCInput { @@ -55,7 +53,6 @@ struct GPUTPCLinearLabels { std::vector data; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h index e16fde9614911..b6e176f468995 100644 --- a/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h +++ b/GPU/GPUTracking/DataTypes/GPUNewCalibValues.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUNewCalibValues { @@ -33,7 +31,6 @@ struct GPUNewCalibValues { void updateFrom(const GPUNewCalibValues* from); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index cad554f355c8e..799fd25330ab4 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -20,9 +20,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { // This defines an output region. ptrBase points to a memory buffer, which should have a proper alignment. @@ -83,7 +81,6 @@ struct GPUTrackingOutputs { static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index afde8d4128bab..05888770ef9e5 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -23,9 +23,7 @@ #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUDisplayFrontendInterface; class GPUReconstruction; @@ -80,8 +78,7 @@ struct GPUSettingsDeviceBackend { GPUReconstruction* master = nullptr; // GPUReconstruction master object }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #ifdef GPUCA_GPUCODE_DEVICE #define QCONFIG_GPU diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 029d0bdea1397..3c86dbfcd8d18 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCGMMergedTrackHit { uint32_t num; @@ -49,7 +47,6 @@ struct GPUTPCGMMergedTrackHitXYZ { #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h index 13940cecc86de..6417e47352339 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMPolynomialField.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMPolynomialField @@ -289,7 +287,6 @@ GPUdi() float GPUTPCGMPolynomialField::GetFieldItsBz(float x, float y, float z) return bz; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index cb022bf891f4b..461ac9366ca23 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -26,9 +26,7 @@ #endif #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { // Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper // Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr @@ -144,6 +142,5 @@ class GPUTPCGeometry // TODO: Make values constexpr return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDDef.h b/GPU/GPUTracking/DataTypes/GPUTRDDef.h index 4340e854cd67d..bb0cf3652a7e7 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDDef.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDDef.h @@ -17,23 +17,19 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace track +namespace o2::track { template class TrackParametrizationWithError; -} // namespace track -namespace base +} // namespace o2::track + +namespace o2::base { template class PropagatorImpl; -} // namespace base -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { typedef o2::track::TrackParametrizationWithError TRDBaseTrack; @@ -62,7 +58,6 @@ class GPUTRDTracker_t; typedef GPUTRDTracker_t GPUTRDTracker; typedef GPUTRDTracker_t GPUTRDTrackerGPU; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDDEF_H diff --git a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h index 6b37afbde12d7..0a269c05869e4 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDInterfaceO2Track.h @@ -17,9 +17,7 @@ // This is the interface for the GPUTRDTrack based on the O2 track type #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class trackInterface; @@ -28,8 +26,7 @@ namespace gputpcgmmergertypes { struct GPUTPCOuterParam; } // namespace gputpcgmmergertypes -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/TrackTPCITS.h" @@ -38,9 +35,7 @@ struct GPUTPCOuterParam; #include "ReconstructionDataFormats/TrackLTIntegral.h" #include "CommonConstants/LHCConstants.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template <> @@ -94,7 +89,6 @@ class trackInterface : public o2::track::TrackParCov ClassDefNV(trackInterface, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx index 54f28ec21d30e..369d3838b646f 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.cxx @@ -19,14 +19,11 @@ using namespace o2::gpu; #include "GPUTRDTrack.inc" #if !defined(GPUCA_GPUCODE) -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !defined(GPUCA_O2_LIB) // Instantiate O2 track version, for O2 this happens in GPUTRDTrackO2.cxx template class GPUTRDTrack_t>; #endif template class GPUTRDTrack_t>; // Always instatiate GM track version -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 66cf31fc8e8d0..18f7c61e01fc3 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -22,25 +22,20 @@ struct GPUTRDTrackDataRecord; class AliHLTExternalTrackParam; -namespace o2 -{ -namespace tpc +namespace o2::tpc { class TrackTPC; -} // namespace tpc -namespace dataformats +} // namespace o2::tpc +namespace o2::dataformats { class TrackTPCITS; class GlobalTrackID; -} // namespace dataformats -} // namespace o2 +} // namespace o2::dataformats //_____________________________________________________________________________ #include "GPUTRDInterfaceO2Track.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -127,7 +122,6 @@ class GPUTRDTrack_t : public T #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACK_H diff --git a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h index 01b61928be922..41ed5d0859efe 100644 --- a/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h +++ b/GPU/GPUTracking/DataTypes/GPUTriggerOutputs.h @@ -21,9 +21,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTriggerOutputs { @@ -52,7 +50,6 @@ struct GPUTriggerOutputs { static_assert(sizeof(o2::tpc::TriggerInfoDLBZS) % sizeof(uint32_t) == 0); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h index f3a7f4b2a0724..cbb8e2f161fa7 100644 --- a/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h +++ b/GPU/GPUTracking/DataTypes/GPUdEdxInfo.h @@ -17,12 +17,9 @@ #include "DataFormatsTPC/dEdxInfo.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { using GPUdEdxInfo = o2::tpc::dEdxInfo; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Debug/GPUROOTDump.h b/GPU/GPUTracking/Debug/GPUROOTDump.h index f8f8950a06ae0..d4f034fd7c60f 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDump.h +++ b/GPU/GPUTracking/Debug/GPUROOTDump.h @@ -28,9 +28,7 @@ class TNtuple; #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if !defined(GPUCA_NO_ROOT) && !defined(GPUCA_GPUCODE) namespace @@ -172,7 +170,6 @@ class GPUROOTDump } }; #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h index f3f7890ebfa5b..08e88eddb377e 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.h +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.h @@ -21,9 +21,7 @@ class TFile; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUROOTDumpCore; @@ -61,7 +59,6 @@ class GPUROOTDumpCore std::vector mBranches; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index d899523217dbc..6f23487c1c4b9 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -17,9 +17,7 @@ #include "GPUReconstructionCPU.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChain { @@ -294,7 +292,6 @@ int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) return false; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index e998baf8e45e8..4a2778851e517 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -24,35 +24,24 @@ #include #include -namespace o2 -{ -namespace trd +namespace o2::trd { class GeometryFlat; -} -} // namespace o2 +} // namespace o2::trd -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNativeAccess; struct ClusterNative; class CalibdEdxContainer; -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc -namespace o2 -{ -namespace base +namespace o2::base { class MatLayerCylSet; -} -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { //class GPUTRDTrackerGPU; class GPUTPCGPUTracker; @@ -320,7 +309,6 @@ class GPUChainTracking : public GPUChain int32_t OutputStream() const { return mRec->NStreams() - 2; } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h index 31ef86bcd6f70..dc1a665e6052c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h @@ -18,9 +18,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUChainTrackingFinalContext { GPUReconstruction* rec = nullptr; @@ -28,7 +26,6 @@ struct GPUChainTrackingFinalContext { std::condition_variable cond; bool ready = false; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index 377736a5dacb9..cd86390bc1b01 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUErrors @@ -44,7 +42,6 @@ class GPUErrors GPUglobalref() uint32_t* mErrors; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h index 751c9f0229f3d..910e87fd02126 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.h +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.h @@ -18,18 +18,13 @@ #include "GPUDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace tpc +namespace o2::tpc { struct ClusterNative; struct ClusterNativeAccess; -} // namespace tpc -} // namespace o2 +} // namespace o2::tpc -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTrackingInOutZS; @@ -90,7 +85,6 @@ class GPUTrackingInputProvider : public GPUProcessor uint32_t* mErrorCodes = nullptr; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h index 77a6f262f03e0..d59b8fea28f08 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMBorderTrack.h @@ -18,9 +18,7 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMBorderTrack @@ -127,7 +125,6 @@ class GPUTPCGMBorderTrack ClassDefNV(GPUTPCGMBorderTrack, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 00d4b1822bdc1..2d73279cf1fe7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -18,9 +18,7 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTPCGMMergedTrackHit.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMMergedTrack @@ -125,7 +123,6 @@ class GPUTPCGMMergedTrack ClassDefNV(GPUTPCGMMergedTrack, 0); #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index c04b3eb1a1703..f821a543af0a9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -30,21 +30,16 @@ #include #endif // GPUCA_GPUCODE -namespace o2 -{ -namespace base +namespace o2::base { class MatLayerCylSet; } -namespace tpc +namespace o2::tpc { struct ClusterNative; } -} // namespace o2 -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCSectorTrack; class GPUTPCSectorOutput; @@ -291,7 +286,6 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t* mRetryRefitIds; GPUTPCGMLoopData* mLoopData; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCGMMERGER_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h index 63592eb09eb3d..bda00822bac6a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.h @@ -19,9 +19,7 @@ #include "GPUConstantMem.h" #include "GPUTPCGMMergerTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMMergerGeneral : public GPUKernelTemplate { @@ -182,7 +180,6 @@ class GPUTPCGMMergerMergeLoopers : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 3c8f21420a14f..4e225a61661c2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -18,11 +18,7 @@ #include "GPUTPCDef.h" #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu -{ -namespace gputpcgmmergertypes +namespace o2::gpu::gputpcgmmergertypes { enum attachTypes { attachAttached = 0x40000000, @@ -59,8 +55,6 @@ struct GPUTPCOuterParam { float C[15]; }; -} // namespace gputpcgmmergertypes -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu::gputpcgmmergertypes #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h index a5a9869c2061a..8f7a91ad69269 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.h @@ -18,9 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGMMergerGPU.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMO2Output : public GPUTPCGMMergerGeneral @@ -34,7 +32,6 @@ class GPUTPCGMO2Output : public GPUTPCGMMergerGeneral GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& merger); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h index d77cb861affa3..eac86a5598644 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPhysicalTrackModel.h @@ -27,9 +27,7 @@ * */ -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMPhysicalTrackModel { @@ -272,7 +270,6 @@ GPUdi() void GPUTPCGMPhysicalTrackModel::Rotate(float alpha) RotateLight(alpha); UpdateValues(); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 15f2bd880e351..88f0882a79f03 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -18,13 +18,10 @@ #include "GPUCommonDef.h" class AliMagF; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMPolynomialField; -} -} // namespace o2 +} // namespace o2::gpu /** * @class GPUTPCGMPolynomialFieldManager diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index eaff9be4f5e46..a2369bafc9751 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -20,18 +20,13 @@ #include "GPUTPCGMPolynomialField.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace base +namespace o2::base { struct MatBudget; class MatLayerCylSet; -} // namespace base -} // namespace o2 +} // namespace o2::base -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMTrackParam; struct GPUParam; @@ -275,7 +270,6 @@ GPUdi() float GPUTPCGMPropagator::getGlobalY(float X, float Y) const return getGlobalY(mCosAlpha, mSinAlpha, X, Y); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h index 924100c435fd9..27e4a89300ca4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.h @@ -20,9 +20,7 @@ #include "GPUCommonMath.h" #include "GPUO2DataTypes.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGMSectorTrack @@ -142,7 +140,6 @@ class GPUTPCGMSectorTrack ClassDefNV(GPUTPCGMSectorTrack, 1); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h index 1b2468b51000e..e3a5b2f7c1d01 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.h @@ -27,9 +27,7 @@ class AliExternalTrackParam; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCGMMerger; class GPUTPCGMBorderTrack; @@ -285,7 +283,6 @@ GPUdi() float GPUTPCGMTrackParam::GetMirroredY(float Bz) const } return GetY() - 2.f * CAMath::Sqrt(cosPhi2) / qptBz; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h index 7f30d0b568517..74ff251af4247 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCBaseTrackParam.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTrackParam; @@ -80,7 +78,6 @@ struct GPUTPCBaseTrackParam { float mZOffset; // z offset float mP[5]; // 'active' track parameters: Y, Z, SinPhi, DzDs, q/Pt }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h b/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h index 1961ffabd791c..cf269a27bd6a5 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCClusterData.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData { int32_t id; @@ -37,7 +35,6 @@ struct GPUTPCClusterData { float sigmaTime2; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // CLUSTERDATA_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 99bfc2d8fa804..9327699c9404b 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -42,7 +40,6 @@ class GPUTPCCreateTrackingData : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCCREATESECTORDATA_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCDef.h b/GPU/GPUTracking/SectorTracker/GPUTPCDef.h index 3b53c3e66875a..84ea8e836007c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCDef.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCDef.h @@ -21,9 +21,7 @@ #define CALINK_INVAL ((calink) -1) #define CALINK_DEAD_CHANNEL ((calink) -2) -namespace o2 -{ -namespace gpu +namespace o2::gpu { #if defined(GPUCA_O2_LIB) || defined(GPUCA_O2_INTERFACE) typedef uint32_t calink; @@ -33,8 +31,7 @@ typedef uint32_t calink; typedef uint32_t cahit; #endif struct cahit2 { cahit x, y; }; -} -} // o2::GPU +} // namespace o2::GPU #ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME // Needs full clusterdata #define GPUCA_FULL_CLUSTERDATA diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index 593bc172303fe..2d2b275d06399 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -18,9 +18,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -61,7 +59,6 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker, int32_t n); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h index df8706d8c65b4..ebb6f9e52500e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCGrid.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCGrid @@ -66,7 +64,6 @@ class GPUTPCGrid float mStepYInv; //* inverse bin size in Y float mStepZInv; //* inverse bin size in Z }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCGRID_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCHit.h b/GPU/GPUTracking/SectorTracker/GPUTPCHit.h index 34a59b2f08dd2..e064441a1dba3 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCHit.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCHit.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCHit @@ -43,7 +41,6 @@ class GPUTPCHit private: friend class GPUTPCNeighboursFinder; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCHIT_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h b/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h index 19cfde1d76f4b..51c72969da8ae 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCHitId.h @@ -15,9 +15,7 @@ #ifndef GPUTPCHITID_H #define GPUTPCHITID_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCHitId { @@ -29,7 +27,6 @@ class GPUTPCHitId private: int32_t mId; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCHITID_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h b/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h index 13f2753db6c93..ffd95cd807413 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCMCInfo.h @@ -15,9 +15,7 @@ #ifndef GPUTPCMCINFO_H #define GPUTPCMCINFO_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCMCInfo { int32_t charge; @@ -39,7 +37,6 @@ struct GPUTPCMCInfoCol { uint32_t first; uint32_t num; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index e1a7437f695b4..7af6e8eb1a582 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -19,9 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -48,7 +46,6 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate template GPUd() static void Thread(int32_t /*nBlocks*/, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCNEIGHBOURSCLEANER_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index cc61eaacf994f..54dc0876f8a55 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -59,7 +57,6 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCNEIGHBOURSFINDER_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCRow.h b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h index c7e5b3ebc7217..d16343b4360c2 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h @@ -18,9 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCGrid.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCRow @@ -78,7 +76,6 @@ class GPUTPCRow // offset in GPUTPCTrackingData::LinkUp/DownData/HitDataY/... uint32_t mFirstHitInBinOffset; // offset in Tracker::mRowData to find the FirstHitInBin }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCROW_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h index 5a51f160576eb..2c62a2ca184b2 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutCluster.h @@ -17,9 +17,7 @@ #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCSectorOutCluster @@ -63,7 +61,6 @@ class GPUTPCSectorOutCluster float mTime; #endif }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h index 4d294dadc7974..cc02206dc09a7 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h @@ -18,9 +18,7 @@ #include "GPUTPCDef.h" #include "GPUTPCTrack.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUOutputControl; @@ -77,6 +75,5 @@ class GPUTPCSectorOutput uint32_t mNTrackClusters; // total number of track clusters size_t mMemorySize; // Amount of memory really used }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index 5005e4f5e15d0..5e620180570c8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -48,7 +46,6 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCSTARTHITSFINDER_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index 2e40fdc549d32..b0349d660dbc1 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -48,7 +46,6 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCSTARTHITSSORTER_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index 8c88e89e92b25..225f5f0e2c7ad 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -19,9 +19,7 @@ #include "GPUTPCDef.h" #include "GPUTPCSectorOutCluster.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrack @@ -67,7 +65,6 @@ class GPUTPCTrack private: }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACK_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h index c9ab6158179bd..06ead3ce00f6c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackLinearisation.h @@ -17,9 +17,7 @@ #include "GPUTPCTrackParam.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrackLinearisation @@ -87,7 +85,6 @@ GPUdi() void GPUTPCTrackLinearisation::Set(float SinPhi1, float CosPhi1, float D SetDzDs(DzDs1); SetQPt(QPt1); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLINEARISATION_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h index ae86ad97e41c9..e31abe338d2b8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackParam.h @@ -19,9 +19,7 @@ #include "GPUTPCDef.h" #include "GPUCommonMath.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTrackLinearisation; @@ -181,7 +179,6 @@ GPUdi() void GPUTPCTrackParam::InitParam() SetCov(14, 1000.f); SetZOffset(0); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKPARAM_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index ba5d95e1cc53e..a92614e7fef5e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -27,9 +27,7 @@ #include "GPUTPCTracklet.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCSectorOutput; struct GPUTPCClusterData; @@ -276,7 +274,6 @@ class GPUTPCTracker : public GPUProcessor static int32_t StarthitSortComparison(const void* a, const void* b); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKER_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index 656bb1c5e68f8..d7d5e76bc9d44 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -21,9 +21,7 @@ #include "GPUParam.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData; class GPUTPCHit; @@ -190,7 +188,6 @@ GPUdi() void GPUTPCTrackingData::SetHitWeight(const GPUTPCRow& row, uint32_t hit } GPUdi() int32_t GPUTPCTrackingData::HitWeight(const GPUTPCRow& row, uint32_t hitIndex) const { return mHitWeights[row.mHitNumberOffset + hitIndex]; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCSECTORDATA_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 6d6d466c903b5..10ff0a32aeaf3 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -18,9 +18,7 @@ #include "GPUTPCBaseTrackParam.h" #include "GPUTPCDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTracklet @@ -32,7 +30,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } @@ -54,7 +52,6 @@ class GPUTPCTracklet int32_t mHitWeight; // Hit Weight of Tracklet uint32_t mFirstHit; // first hit in row hit array }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLET_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index a98fe9af0e74c..8757ed87072da 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { /** * @class GPUTPCTrackletConstructor @@ -108,7 +106,6 @@ class GPUTPCTrackletConstructor GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETCONSTRUCTOR_H diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index 52ebbf2af1eeb..bb969d866ef29 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -20,9 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; @@ -52,7 +50,6 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& tracker); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTPCTRACKLETSELECTOR_H diff --git a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h index a53f73ed69e26..62695f2ae30a5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfConsts.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfConsts.h @@ -17,11 +17,7 @@ #include "clusterFinderDefs.h" -namespace o2 -{ -namespace gpu -{ -namespace cfconsts +namespace o2::gpu::cfconsts { GPUconstexpr() tpccf::Delta2 InnerNeighbors[8] = @@ -190,8 +186,6 @@ GPUconstexpr() uint32_t NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] = (1 << 24), (1 << 24) | (1 << 25)}; -} // namespace cfconsts -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu::cfconsts #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index 10e375ee6f4bd..b4a4752b0f932 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -17,9 +17,7 @@ #include "clusterFinderDefs.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { #define INVALID_TIME_BIN (-GPUCF_PADDING_TIME - 1) @@ -59,7 +57,6 @@ struct ChargePos { inline constexpr ChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h index 0dc691ae6044a..2344c089a4436 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChainContext.h @@ -22,9 +22,7 @@ #include #include -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCCFChainContext { @@ -83,7 +81,6 @@ struct GPUTPCCFChainContext { } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h index 52beb195d7c83..9bf40417192b6 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.h @@ -18,9 +18,7 @@ #include "GPUDef.h" #include "GPUProcessor.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTPCClusterData; @@ -53,7 +51,6 @@ class GPUTPCConvert : public GPUProcessor int16_t mMemoryResOutput = -1; int16_t mMemoryResMemory = -1; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h index 4a142debcaafa..dd9a74f9b9131 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertImpl.h @@ -20,9 +20,7 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCConvertImpl @@ -46,7 +44,6 @@ class GPUTPCConvertImpl } }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h index 085260dc48067..d62e10e682a4b 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvertKernel.h @@ -17,9 +17,7 @@ #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCConvertKernel : public GPUKernelTemplate { @@ -28,7 +26,6 @@ class GPUTPCConvertKernel : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h index 2af6fbf922ed4..a99cc5f4a7a2d 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDGeometry.h @@ -24,9 +24,7 @@ class TObjArray; #include "DataFormatsTRD/Constants.h" #include "GPUCommonTransform3D.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDpadPlane : private o2::trd::PadPlane @@ -78,7 +76,6 @@ class GPUTRDGeometry : private o2::trd::GeometryFlat static constexpr int32_t kNstack = o2::trd::constants::NSTACK; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDGEOMETRY_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 7aed063f05ad9..24624e60ceba7 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -23,23 +23,18 @@ #include "GPUTPCGMTrackParam.h" #include "GPUTRDDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class trackInterface; template class propagatorInterface; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "DetectorsBase/Propagator.h" #include "GPUTRDInterfaceO2Track.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { GPUdi() trackInterface::trackInterface(const GPUTPCGMMergedTrack& trk) { set(trk.OuterParam().X, trk.OuterParam().alpha, trk.OuterParam().P, trk.OuterParam().C); } @@ -87,8 +82,7 @@ class propagatorInterface const o2::base::Propagator* mProp; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #include "GPUTPCGMPropagator.h" #include "GPUParam.h" @@ -96,9 +90,7 @@ class propagatorInterface #include "DataFormatsTPC/TrackTPC.h" #include "ReconstructionDataFormats/TrackTPCITS.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { template <> @@ -235,7 +227,6 @@ class propagatorInterface : public GPUTPCGMPropagator trackInterface* mTrack; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDINTERFACES_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h index f7e89169cde24..c11e60bed26db 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDSpacePoint.h @@ -18,9 +18,7 @@ #ifndef GPUCA_TPC_GEOMETRY_O2 // compatibility to Run 2 data types -namespace o2 -{ -namespace gpu +namespace o2::gpu { // class to hold the information on the space points @@ -44,16 +42,13 @@ class GPUTRDSpacePoint float mDy; // deflection over drift length }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else // compatibility with Run 3 data types #include "DataFormatsTRD/CalibratedTracklet.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet @@ -62,8 +57,7 @@ class GPUTRDSpacePoint : public o2::trd::CalibratedTracklet static_assert(sizeof(GPUTRDSpacePoint) == sizeof(o2::trd::CalibratedTracklet), "Incorrect memory layout"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index 4e8fcd13e0801..1bd2eca769913 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -1105,12 +1105,9 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const #ifndef GPUCA_GPUCODE -namespace o2 -{ -namespace gpu +namespace o2::gpu { template class GPUTRDTracker_t; template class GPUTRDTracker_t; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 274dfd6668eaf..29a9b529b0558 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -30,9 +30,7 @@ #include #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord; @@ -196,7 +194,6 @@ class GPUTRDTracker_t : public GPUProcessor float mTPCTDriftOffset; // TPC drift time additive offset GPUTRDTrackerDebug* mDebug; // debug output }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKER_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h index 45b083a4cca66..4f2199792b2b4 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerDebug.h @@ -23,9 +23,7 @@ #else -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -65,8 +63,7 @@ template class GPUTRDTrackerDebug; template class GPUTRDTrackerDebug; #endif #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif #endif // GPUTRDTRACKERDEBUG_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 8745eabb02473..70b525420f294 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -17,9 +17,7 @@ #include "GPUGeneralKernels.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackerKernels : public GPUKernelTemplate @@ -32,7 +30,6 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKERKERNELSCA_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h index d56ee1cbbba5e..20e1df7c72212 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletLabels.h @@ -17,15 +17,12 @@ #ifndef GPUTRDTRACKLETLABELS_H #define GPUTRDTRACKLETLABELS_H -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUTRDTrackletLabels { int32_t mLabel[3]; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUTRDTRACKLETLABELS_H diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index 83acbcda8e3a1..fc874070ec9b8 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -24,9 +24,7 @@ class AliTRDtrackletWord; class AliTRDtrackletMCM; -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord @@ -72,16 +70,13 @@ class GPUTRDTrackletWord uint32_t mTrackletWord; // tracklet word: PID | Z | deflection length | Y // bits: 8 4 7 13 }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else // compatibility with Run 3 data types #include "DataFormatsTRD/Tracklet64.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTRDTrackletWord : private o2::trd::Tracklet64 @@ -108,8 +103,7 @@ class GPUTRDTrackletWord : private o2::trd::Tracklet64 static_assert(sizeof(GPUTRDTrackletWord) == sizeof(o2::trd::Tracklet64), "Incorrect memory layout"); -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCA_TPC_GEOMETRY_O2 diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 286409ef146b1..6c0a96d3adb75 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -24,9 +24,7 @@ #include "CalibdEdxContainer.h" #include "GPUDebugStreamer.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUdEdx @@ -198,7 +196,6 @@ GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestri mNSubThresh++; } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 8f5808eb41261..73f65b6b24241 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -29,9 +29,7 @@ #include "utils/timer.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUTPCTracker; struct GPUParam; @@ -321,7 +319,6 @@ class GPUDisplay : public GPUDisplayInterface float mYFactor = 1.0f; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 0f2aabffbc5af..3c6928c78e5a1 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -17,9 +17,7 @@ #include "GPUSettings.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChainTracking; @@ -61,7 +59,6 @@ class GPUDisplayFrontendInterface GPUDisplayFrontendInterface(); }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUDISPLAYINTERFACE_H diff --git a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h index 23d382466ba22..88162ef29fda3 100644 --- a/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h +++ b/GPU/GPUTracking/display/shaders/GPUDisplayShaders.h @@ -16,9 +16,7 @@ #define GPUDISPLAYSHADERS_H #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { struct GPUDisplayShaders { @@ -468,7 +466,6 @@ void main() { } )"; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 76774f740477f..679a2d7aa1a28 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -33,9 +33,7 @@ typedef int16_t Color_t; #if !defined(GPUCA_BUILD_QA) || defined(GPUCA_GPUCODE) -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUQA { @@ -59,8 +57,7 @@ class GPUQA static bool IsInitialized() { return false; } void UpdateChain(GPUChainTracking* chain) {} }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #else diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index 92da6bbac94e8..5cfe854ca1f6a 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -26,11 +26,9 @@ struct AliHLTTPCClusterMCWeight; struct AliHLTTPCClusterMCLabel; -namespace o2 +namespace o2::gpu { class MCCompLabel; -namespace gpu -{ namespace internal { @@ -167,6 +165,6 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) } } // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index 93a3a3e512ac7..fb3c5f22d61ef 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUChainTracking; struct GPUParam; @@ -75,7 +73,6 @@ class genEvents }; #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif From 484c1d23ee5b4ccd770fb6b54cc20e6a73a22b76 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 17:30:04 +0100 Subject: [PATCH 0107/1914] GPU: Better hiding of internal structures --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 ++++-- .../Base/GPUReconstructionConvert.cxx | 32 +++++++------ .../Base/GPUReconstructionProcessing.cxx | 6 +++ .../GPUTPCClusterStatistics.cxx | 7 ++- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 48 +++++++++++++++---- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 7 ++- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 11 +++-- .../frontend/GPUDisplayFrontendWayland.cxx | 25 +++++----- .../display/frontend/GPUDisplayGUIWrapper.cxx | 6 +-- .../display/frontend/GPUDisplayGUIWrapper.h | 5 +- GPU/GPUTracking/qa/GPUQA.cxx | 6 +-- GPU/GPUTracking/qa/GPUQA.h | 5 +- GPU/GPUTracking/qa/GPUQAHelper.h | 6 ++- 13 files changed, 122 insertions(+), 57 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c8d1bfc1dfe51..158aa94127a4d 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -46,6 +46,8 @@ namespace o2::gpu { +namespace // anonymous +{ struct GPUReconstructionPipelineQueue { uint32_t op = 0; // For now, 0 = process, 1 = terminate GPUChain* chain = nullptr; @@ -54,6 +56,7 @@ struct GPUReconstructionPipelineQueue { bool done = false; int32_t retVal = 0; }; +} // namespace struct GPUReconstructionPipelineContext { std::queue queue; @@ -951,8 +954,12 @@ int32_t GPUReconstruction::unregisterMemoryForGPU(const void* ptr) return 1; } +namespace o2::gpu::internal +{ +namespace // anonymous +{ template -static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num") +constexpr static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* err = "Invalid step num") { static_assert(sizeof(step) == sizeof(uint32_t), "Invalid step enum size"); int32_t retVal = 8 * sizeof(uint32_t) - 1 - CAMath::Clz((uint32_t)step); @@ -964,9 +971,11 @@ static inline int32_t getStepNum(T step, bool validCheck, int32_t N, const char* } return retVal; } +} // anonymous namespace +} // namespace o2::gpu::internal -int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); } -int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); } +int32_t GPUReconstruction::getRecoStepNum(RecoStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_RECO_STEPS, "Invalid Reco Step"); } +int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) { return internal::getStepNum(step, validCheck, GPUDataTypes::N_GENERAL_STEPS, "Invalid General Step"); } void GPUReconstruction::RunPipelineWorker() { diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index c1a0a78dce6fe..8f5cab6807050 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -174,6 +174,8 @@ int32_t GPUReconstructionConvert::GetMaxTimeBin(const GPUTrackingInOutZS& zspage // ------------------------------------------------- TPC ZS ------------------------------------------------- #ifdef GPUCA_TPC_GEOMETRY_O2 +namespace o2::gpu +{ namespace // anonymous { @@ -1306,19 +1308,9 @@ size_t zsEncoderRun::compare(std::vector* buffer, std::vector void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr* outBuffer, uint32_t* outSizes, o2::raw::RawFileWriter* raw, const o2::InteractionRecord* ir, const GPUParam& param, int32_t version, bool verify, float threshold, bool padding, std::function&)> digitsFilter) { @@ -1329,7 +1321,14 @@ void GPUReconstructionConvert::RunZSEncoder(const S& in, std::unique_ptr buffer[NSECTORS][GPUTrackingInOutZS::NENDPOINTS]; - auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS), o2::gpu::internal::tmpReductionResult(), [&](const auto range, auto red) { + struct tmpReductionResult { + uint32_t totalPages = 0; + size_t totalSize = 0; + size_t nErrors = 0; + size_t digitsInput = 0; + size_t digitsEncoded = 0; + }; + auto reduced = tbb::parallel_reduce(tbb::blocked_range(0, NSECTORS), tmpReductionResult(), [&](const auto range, auto red) { for (uint32_t i = range.begin(); i < range.end(); i++) { std::vector tmpBuffer; red.digitsInput += ZSEncoderGetNDigits(in, i); @@ -1455,6 +1454,8 @@ void GPUReconstructionConvert::RunZSFilter(std::unique_ptr* bu } #ifdef GPUCA_O2_LIB +namespace o2::gpu::internal +{ template static inline auto GetDecoder_internal(const GPUParam* param, int32_t version) { @@ -1480,15 +1481,16 @@ static inline auto GetDecoder_internal(const GPUParam* param, int32_t version) enc->decodePage(outBuffer, (const zsPage*)page, endpoint, firstTfOrbit, triggerBC); }; } +} // namespace o2::gpu::internal std::function&, const void*, uint32_t, uint32_t)> GPUReconstructionConvert::GetDecoder(int32_t version, const GPUParam* param) { if (version >= o2::tpc::ZSVersion::ZSVersionRowBased10BitADC && version <= o2::tpc::ZSVersion::ZSVersionRowBased12BitADC) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else if (version == o2::tpc::ZSVersion::ZSVersionLinkBasedWithMeta) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else if (version >= o2::tpc::ZSVersion::ZSVersionDenseLinkBased && version <= o2::tpc::ZSVersion::ZSVersionDenseLinkBasedV2) { - return GetDecoder_internal(param, version); + return o2::gpu::internal::GetDecoder_internal(param, version); } else { throw std::runtime_error("Invalid ZS version "s + std::to_string(version) + ", cannot create decoder"s); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 7909bc0720430..18662870ed45e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -50,7 +50,13 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr }); } +namespace o2::gpu +{ +namespace // anonymous +{ static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation +} // anonymous namespace +} // namespace o2::gpu GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx index 254bf1797bdd6..b11a3b13d7132 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCClusterStatistics.cxx @@ -23,7 +23,9 @@ using namespace o2::gpu; // Small helper to compute Huffman probabilities -namespace +namespace o2::gpu +{ +namespace // anonymous { typedef std::vector HuffCode; typedef std::map HuffCodeMap; @@ -101,7 +103,8 @@ void GenerateCodes(const INode* node, const HuffCode& prefix, HuffCodeMap& outCo GenerateCodes(in->right, rightPrefix, outCodes); } } -} // namespace +} // anonymous namespace +} // namespace o2::gpu void GPUTPCClusterStatistics::RunStatistics(const o2::tpc::ClusterNativeAccess* clustersNative, const o2::tpc::CompressedClusters* clustersCompressed, const GPUParam& param) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 43a214cf37d3e..2ee472ac4ef2f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -58,14 +58,18 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif +namespace o2::gpu::internal +{ +} using namespace o2::gpu; +using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; static constexpr int32_t kMaxParts = 400; static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; -namespace o2::gpu +namespace o2::gpu::internal { struct MergeLooperParam { float refz; @@ -73,7 +77,7 @@ struct MergeLooperParam { float y; uint32_t id; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal #ifndef GPUCA_GPUCODE @@ -741,6 +745,10 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3> +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { @@ -761,6 +769,8 @@ struct MergeBorderTracks_compMin { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) @@ -1436,6 +1446,10 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks } +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMerger_CompareClusterIdsLooper { struct clcomparestruct { uint8_t leg; @@ -1489,6 +1503,8 @@ struct GPUTPCGMMerger_CompareClusterIds { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { @@ -1803,6 +1819,10 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMergerSortTracks_comp { const GPUTPCGMMergedTrack* const mCmp; GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} @@ -1833,14 +1853,6 @@ struct GPUTPCGMMergerSortTracks_comp { } }; -template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAsyncAllocator alloc(this); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} - struct GPUTPCGMMergerSortTracksQPt_comp { const GPUTPCGMMergedTrack* const mCmp; GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} @@ -1861,6 +1873,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp { #endif } }; +} // anonymous namespace +} // namespace o2::gpu::internal + +template <> +inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); + ThrustVolatileAsyncAllocator alloc(this); + thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) @@ -2074,12 +2096,18 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, } #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt +namespace o2::gpu::internal +{ +namespace // anonymous +{ struct GPUTPCGMMergerMergeLoopers_comp { GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); } }; +} // anonymous namespace +} // namespace o2::gpu::internal template <> inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index f821a543af0a9..338df5d2604cd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -48,7 +48,10 @@ class GPUTPCTracker; class GPUChainTracking; class GPUTPCGMPolynomialField; struct GPUTPCGMLoopData; +namespace internal +{ struct MergeLooperParam; +} // namespace internal /** * @class GPUTPCGMMerger @@ -126,7 +129,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint2* ClusRefTmp() { return mClusRefTmp; } GPUhdi() uint32_t* TrackSort() { return mTrackSort; } GPUhdi() tmpSort* TrackSortO2() { return mTrackSortO2; } - GPUhdi() MergeLooperParam* LooperCandidates() { return mLooperCandidates; } + GPUhdi() internal::MergeLooperParam* LooperCandidates() { return mLooperCandidates; } GPUhdi() GPUAtomic(uint32_t) * SharedCount() { return mSharedCount; } GPUhdi() gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) { return mBorderRange[i]; } GPUhdi() const gputpcgmmergertypes::GPUTPCGMBorderRange* BorderRange(int32_t i) const { return mBorderRange[i]; } @@ -267,7 +270,7 @@ class GPUTPCGMMerger : public GPUProcessor o2::tpc::TrackTPC* mOutputTracksTPCO2; uint32_t* mOutputClusRefsTPCO2; o2::MCCompLabel* mOutputTracksTPCO2MC; - MergeLooperParam* mLooperCandidates; + internal::MergeLooperParam* mLooperCandidates; uint32_t* mTrackOrderAttach; uint32_t* mTrackOrderProcess; diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 542b05c1220c0..9d10d40107b8f 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -63,7 +63,9 @@ void GPUTrackingRefitProcessor::SetMaxData(const GPUTrackingInOutPointers& io) } #endif -namespace +namespace o2::gpu::internal +{ +namespace // anonymous { template struct refitTrackTypes; @@ -76,6 +78,7 @@ struct refitTrackTypes { using propagator = const Propagator*; }; } // anonymous namespace +} // namespace o2::gpu::internal template <> GPUd() void GPUTrackingRefit::initProp(GPUTPCGMPropagator& prop) // FIXME: GPUgeneric() needed to make the clang spirv output link correctly @@ -210,10 +213,10 @@ template GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov) { CADEBUG(int32_t ii; printf("\nRefitting track\n")); - typename refitTrackTypes::propagator prop; + typename internal::refitTrackTypes::propagator prop; S trk; float TrackParCovChi2 = 0.f; - convertTrack::propagator>(trk, trkX, prop, &TrackParCovChi2); + convertTrack::propagator>(trk, trkX, prop, &TrackParCovChi2); int32_t begin = 0, count; float tOffset; if constexpr (std::is_same_v) { @@ -417,7 +420,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov static_assert("Invalid template"); } - convertTrack::propagator>(trkX, trk, prop, &TrackParCovChi2); + convertTrack::propagator>(trkX, trk, prop, &TrackParCovChi2); return nFitted; } diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index ed0d08fb24add..ad3b620ba8f55 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -39,16 +39,8 @@ using namespace o2::gpu; -GPUDisplayFrontendWayland::GPUDisplayFrontendWayland() -{ - mFrontendType = TYPE_WAYLAND; - mFrontendName = "Wayland"; -} - -void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton) +namespace o2::gpu::internal { -} - template struct CCallWrapper { std::function func; @@ -58,6 +50,17 @@ struct CCallWrapper { return funcwrap->func(std::forward(args)...); } }; +} // namespace o2::gpu::internal + +GPUDisplayFrontendWayland::GPUDisplayFrontendWayland() +{ + mFrontendType = TYPE_WAYLAND; + mFrontendName = "Wayland"; +} + +void GPUDisplayFrontendWayland::OpenGLPrint(const char* s, float x, float y, float r, float g, float b, float a, bool fromBotton) +{ +} int32_t GPUDisplayFrontendWayland::GetKey(uint32_t key, uint32_t state) { @@ -283,7 +286,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain() wl_keyboard_add_listener(mKeyboard, &keyboard_listener, this); } }; - auto seat_capabilities_c = CCallWrapper{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }}; + auto seat_capabilities_c = internal::CCallWrapper{[seat_capabilities](wl_seat* seat, uint32_t capabilities) { seat_capabilities(seat, capabilities); }}; auto seat_name = [](void* data, struct wl_seat* seat, const char* name) { if (((GPUDisplayFrontendWayland*)data)->mDisplay->param()->par.debugLevel >= 2) { @@ -317,7 +320,7 @@ int32_t GPUDisplayFrontendWayland::FrontendMain() } }; - auto registry_global_c = CCallWrapper{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }}; + auto registry_global_c = internal::CCallWrapper{[registry_global](wl_registry* registry, uint32_t name, const char* interface, uint32_t version) { registry_global(registry, name, interface, version); }}; auto registry_global_remove = [](void* a, wl_registry* b, uint32_t c) {}; const wl_registry_listener registry_listener = {.global = ®istry_global_c.callback, .global_remove = registry_global_remove}; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx index 69d24538123c6..ff7763ea62948 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.cxx @@ -24,7 +24,7 @@ using namespace o2::gpu; -namespace o2::gpu +namespace o2::gpu::internal { struct GPUDisplayGUIWrapperObjects { std::unique_ptr app; @@ -39,7 +39,7 @@ struct GPUDisplayGUIWrapperObjects { std::mutex mutex, mutexRet; std::condition_variable signal, signalRet; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal GPUDisplayGUIWrapper::GPUDisplayGUIWrapper() { @@ -52,7 +52,7 @@ GPUDisplayGUIWrapper::GPUDisplayGUIWrapper() first = true; } } - mO.reset(new GPUDisplayGUIWrapperObjects); + mO.reset(new internal::GPUDisplayGUIWrapperObjects); mO->t = std::thread(&GPUDisplayGUIWrapper::guiThread, this); } GPUDisplayGUIWrapper::~GPUDisplayGUIWrapper() diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h index 00542321d6a19..4bf88b4726532 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayGUIWrapper.h @@ -20,7 +20,10 @@ namespace o2::gpu { +namespace internal +{ struct GPUDisplayGUIWrapperObjects; +} // namespace internal class GPUDisplayGUIWrapper { @@ -35,7 +38,7 @@ class GPUDisplayGUIWrapper int32_t focus(); private: - std::unique_ptr mO; + std::unique_ptr mO; void guiThread(); }; diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index edb6c6e762a65..552c82f1bd299 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -315,12 +315,12 @@ void GPUQA::createHist(T*& h, const char* name, Args... args) p.second->emplace_back(&h); } -namespace o2::gpu +namespace o2::gpu::internal { struct GPUQAGarbageCollection { std::tuple>, std::vector>, std::vector>, std::vector>, std::vector>> v; }; -} // namespace o2::gpu +} // namespace o2::gpu::internal template T* GPUQA::createGarbageCollected(Args... args) @@ -335,7 +335,7 @@ void GPUQA::clearGarbagageCollector() std::apply([](auto&&... args) { ((args.clear()), ...); }, mGarbageCollector->v); } -GPUQA::GPUQA(GPUChainTracking* chain, const GPUSettingsQA* config, const GPUParam* param) : mTracking(chain), mConfig(config ? *config : GPUQA_GetConfig(chain)), mParam(param ? param : &chain->GetParam()), mGarbageCollector(std::make_unique()) +GPUQA::GPUQA(GPUChainTracking* chain, const GPUSettingsQA* config, const GPUParam* param) : mTracking(chain), mConfig(config ? *config : GPUQA_GetConfig(chain)), mParam(param ? param : &chain->GetParam()), mGarbageCollector(std::make_unique()) { mMCEventOffset.resize(1, 0); } diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 679a2d7aa1a28..32b0553700f90 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -86,7 +86,10 @@ namespace o2::gpu class GPUChainTracking; struct GPUParam; struct GPUTPCMCInfo; +namespace internal +{ struct GPUQAGarbageCollection; +} // namespace internal class GPUQA { @@ -321,7 +324,7 @@ class GPUQA template void createHist(T*& h, const char* name, Args... args); - std::unique_ptr mGarbageCollector; + std::unique_ptr mGarbageCollector; template T* createGarbageCollected(Args... args); void clearGarbagageCollector(); diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index 5cfe854ca1f6a..92da6bbac94e8 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -26,9 +26,11 @@ struct AliHLTTPCClusterMCWeight; struct AliHLTTPCClusterMCLabel; -namespace o2::gpu +namespace o2 { class MCCompLabel; +namespace gpu +{ namespace internal { @@ -165,6 +167,6 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) } } // namespace gpu -} // namespace o2::gpu +} // namespace o2 #endif From ebdc0f4988e7b58f76ae344227a453dec6b12ac0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 17:50:56 +0100 Subject: [PATCH 0108/1914] GPU Multithreading TBB: Isolate outer-loop thread from other inner loops --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 7d03b908b86de..d4312580a4141 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -72,12 +72,14 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS if (mProcessingSettings.debugLevel >= 5) { printf("Running %d Threads\n", nThreads); } - mThreading->activeThreads->execute([&] { - tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { - typename T::GPUSharedMemory smem; - for (uint32_t iB = r.begin(); iB < r.end(); iB++) { - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } + tbb::this_task_arena::isolate([&] { + mThreading->activeThreads->execute([&] { + tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { + typename T::GPUSharedMemory smem; + for (uint32_t iB = r.begin(); iB < r.end(); iB++) { + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); + } + }); }); }); } else { From d0c88c4e2985e621215f004542a2474674bca9be Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 18:06:21 +0100 Subject: [PATCH 0109/1914] GPU: Switch some of the older classes to using C++11 style initialization --- GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 3 - GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 82 +++++++++---------- GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx | 24 ------ GPU/GPUTracking/SectorTracker/GPUTPCRow.h | 7 +- .../SectorTracker/GPUTPCTracker.cxx | 5 -- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 50 +++++------ GPU/GPUTracking/kernels.cmake | 2 +- 8 files changed, 68 insertions(+), 106 deletions(-) delete mode 100644 GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 308122a2c5a59..b65674a68e6aa 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -44,7 +44,6 @@ set(SRCS SectorTracker/GPUTPCNeighboursFinder.cxx SectorTracker/GPUTPCGrid.cxx SectorTracker/GPUTPCTrackletSelector.cxx - SectorTracker/GPUTPCRow.cxx SectorTracker/GPUTPCExtrapolationTracking.cxx SectorTracker/GPUTPCCreateTrackingData.cxx Merger/GPUTPCGMMerger.cxx diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 2ee472ac4ef2f..f6a50565bac52 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -85,10 +85,7 @@ struct MergeLooperParam { #include "GPUMemorySizeScalers.h" GPUTPCGMMerger::GPUTPCGMMerger() - : mTrackLinks(nullptr), mNTotalSectorTracks(0), mNMaxTracks(0), mNMaxSingleSectorTracks(0), mNMaxOutputTrackClusters(0), mNMaxClusters(0), mMemoryResMemory(-1), mNClusters(0), mOutputTracks(nullptr), mSectorTrackInfos(nullptr), mSectorTrackInfoIndex(nullptr), mClusters(nullptr), mClustersXYZ(nullptr), mClusterAttachment(nullptr), mOutputTracksTPCO2(nullptr), mOutputClusRefsTPCO2(nullptr), mOutputTracksTPCO2MC(nullptr), mTrackOrderAttach(nullptr), mTrackOrderProcess(nullptr), mBorderMemory(nullptr), mBorderRangeMemory(nullptr), mMemory(nullptr), mRetryRefitIds(nullptr), mLoopData(nullptr) { - //* constructor - for (int32_t iSector = 0; iSector < NSECTORS; iSector++) { mNextSectorInd[iSector] = iSector + 1; mPrevSectorInd[iSector] = iSector - 1; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 338df5d2604cd..ae6a2582d833a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -241,53 +241,53 @@ class GPUTPCGMMerger : public GPUProcessor int32_t mNextSectorInd[NSECTORS]; int32_t mPrevSectorInd[NSECTORS]; - int32_t* mTrackLinks; + int32_t* mTrackLinks = nullptr; int32_t* mTrackCCRoots; // root of the connected component of this track - uint32_t mNTotalSectorTracks; // maximum number of incoming sector tracks - uint32_t mNMaxTracks; // maximum number of output tracks - uint32_t mNMaxSingleSectorTracks; // max N tracks in one sector - uint32_t mNMaxOutputTrackClusters; // max number of clusters in output tracks (double-counting shared clusters) - uint32_t mNMaxClusters; // max total unique clusters (in event) - uint32_t mNMaxLooperMatches; // Maximum number of candidate pairs for looper matching + uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks + uint32_t mNMaxTracks = 0; // maximum number of output tracks + uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector + uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxClusters = 0; // max total unique clusters (in event) + uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching - uint16_t mMemoryResMemory; - uint16_t mMemoryResOutput; - uint16_t mMemoryResOutputState; - uint16_t mMemoryResOutputO2; - uint16_t mMemoryResOutputO2Clus; - uint16_t mMemoryResOutputO2MC; - uint16_t mMemoryResOutputO2Scratch; + uint16_t mMemoryResMemory = (uint16_t)-1; + uint16_t mMemoryResOutput = (uint16_t)-1; + uint16_t mMemoryResOutputState = (uint16_t)-1; + uint16_t mMemoryResOutputO2 = (uint16_t)-1; + uint16_t mMemoryResOutputO2Clus = (uint16_t)-1; + uint16_t mMemoryResOutputO2MC = (uint16_t)-1; + uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; - int32_t mNClusters; // Total number of incoming clusters (from sector tracks) - GPUTPCGMMergedTrack* mOutputTracks; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx; //* dEdx information - GPUTPCGMSectorTrack* mSectorTrackInfos; //* additional information for sector tracks - int32_t* mSectorTrackInfoIndex; - GPUTPCGMMergedTrackHit* mClusters; - GPUTPCGMMergedTrackHitXYZ* mClustersXYZ; - GPUAtomic(uint32_t) * mClusterAttachment; - o2::tpc::TrackTPC* mOutputTracksTPCO2; - uint32_t* mOutputClusRefsTPCO2; - o2::MCCompLabel* mOutputTracksTPCO2MC; - internal::MergeLooperParam* mLooperCandidates; + int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) + GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information + GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks + int32_t* mSectorTrackInfoIndex = nullptr; + GPUTPCGMMergedTrackHit* mClusters = nullptr; + GPUTPCGMMergedTrackHitXYZ* mClustersXYZ = nullptr; + GPUAtomic(uint32_t) * mClusterAttachment = nullptr; + o2::tpc::TrackTPC* mOutputTracksTPCO2 = nullptr; + uint32_t* mOutputClusRefsTPCO2 = nullptr; + o2::MCCompLabel* mOutputTracksTPCO2MC = nullptr; + internal::MergeLooperParam* mLooperCandidates = nullptr; - uint32_t* mTrackOrderAttach; - uint32_t* mTrackOrderProcess; - uint8_t* mClusterStateExt; - uint2* mClusRefTmp; - int32_t* mTrackIDs; - int32_t* mTmpSortMemory; - uint32_t* mTrackSort; - tmpSort* mTrackSortO2; - GPUAtomic(uint32_t) * mSharedCount; // Must be uint32_t unfortunately for atomic support - GPUTPCGMBorderTrack* mBorderMemory; // memory for border tracks + uint32_t* mTrackOrderAttach = nullptr; + uint32_t* mTrackOrderProcess = nullptr; + uint8_t* mClusterStateExt = nullptr; + uint2* mClusRefTmp = nullptr; + int32_t* mTrackIDs = nullptr; + int32_t* mTmpSortMemory = nullptr; + uint32_t* mTrackSort = nullptr; + tmpSort* mTrackSortO2 = nullptr; + GPUAtomic(uint32_t) * mSharedCount = nullptr; // Must be uint32_t unfortunately for atomic support + GPUTPCGMBorderTrack* mBorderMemory = nullptr; // memory for border tracks GPUTPCGMBorderTrack* mBorder[2 * NSECTORS]; - gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRangeMemory; // memory for border tracks - gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSECTORS]; // memory for border tracks - memory* mMemory; - uint32_t* mRetryRefitIds; - GPUTPCGMLoopData* mLoopData; + gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRangeMemory = nullptr; // memory for border tracks + gputpcgmmergertypes::GPUTPCGMBorderRange* mBorderRange[NSECTORS]; // memory for border tracks + memory* mMemory = nullptr; + uint32_t* mRetryRefitIds = nullptr; + GPUTPCGMLoopData* mLoopData = nullptr; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx deleted file mode 100644 index 3d0102f2938e6..0000000000000 --- a/GPU/GPUTracking/SectorTracker/GPUTPCRow.cxx +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCRow.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUTPCRow.h" -using namespace o2::gpu; - -#if !defined(GPUCA_GPUCODE) -GPUTPCRow::GPUTPCRow() : mNHits(0), mX(0), mMaxY(0), mGrid(), mHy0(0), mHz0(0), mHstepY(0), mHstepZ(0), mHstepYi(0), mHstepZi(0), mHitNumberOffset(0), mFirstHitInBinOffset(0) -{ - // dummy constructor -} - -#endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCRow.h b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h index d16343b4360c2..eb787f99ea336 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCRow.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCRow.h @@ -32,10 +32,6 @@ class GPUTPCRow friend class GPUTPCTrackingData; public: -#if !defined(GPUCA_GPUCODE) - GPUTPCRow(); -#endif //! GPUCA_GPUCODE - GPUhd() int32_t NHits() const { return mNHits; @@ -72,8 +68,7 @@ class GPUTPCRow float mHstepYi; // inverse step size float mHstepZi; // inverse step size - int32_t mHitNumberOffset; // index of the first hit in the hit array, used as - // offset in GPUTPCTrackingData::LinkUp/DownData/HitDataY/... + int32_t mHitNumberOffset; // index of the first hit in the hit array, used as offset in GPUTPCTrackingData::LinkUp/DownData/HitDataY/... uint32_t mFirstHitInBinOffset; // offset in Tracker::mRowData to find the FirstHitInBin }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index bb49548163ff8..63c64f78cc095 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -39,11 +39,6 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) -GPUTPCTracker::GPUTPCTracker() - : GPUProcessor(), mLinkTmpMemory(nullptr), mISector(-1), mData(), mNMaxStartHits(0), mNMaxRowStartHits(0), mNMaxTracklets(0), mNMaxRowHits(0), mNMaxTracks(0), mNMaxTrackHits(0), mMemoryResLinks(-1), mMemoryResScratchHost(-1), mMemoryResCommon(-1), mMemoryResTracklets(-1), mMemoryResOutput(-1), mMemoryResSectorScratch(-1), mRowStartHitCountOffset(nullptr), mTrackletTmpStartHits(nullptr), mGPUTrackletTemp(nullptr), mGPUParametersConst(), mCommonMem(nullptr), mTrackletStartHits(nullptr), mTracklets(nullptr), mTrackletRowHits(nullptr), mTracks(nullptr), mTrackHits(nullptr), mOutput(nullptr), mOutputMemory(nullptr) -{ -} - GPUTPCTracker::~GPUTPCTracker() { if (mOutputMemory) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index a92614e7fef5e..4a789b5adf6bf 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -40,7 +40,7 @@ class GPUTPCTracker : public GPUProcessor { public: #ifndef GPUCA_GPUCODE_DEVICE - GPUTPCTracker(); + GPUTPCTracker() = default; ~GPUTPCTracker(); GPUTPCTracker(const GPUTPCTracker&) = delete; GPUTPCTracker& operator=(const GPUTPCTracker&) = delete; @@ -233,40 +233,40 @@ class GPUTPCTracker : public GPUProcessor friend class GPUTPCNeighboursFinder; friend class GPUTPCStartHitsSorter; friend class GPUTPCStartHitsFinder; - char* mLinkTmpMemory; // tmp memory for hits after neighbours finder + char* mLinkTmpMemory = nullptr; // tmp memory for hits after neighbours finder - int32_t mISector; // Number of sector + int32_t mISector = -1; // Number of sector GPUTPCTrackingData mData; // The TrackingData object. It is used to encapsulate the storage in memory from the access - uint32_t mNMaxStartHits; - uint32_t mNMaxRowStartHits; - uint32_t mNMaxTracklets; - uint32_t mNMaxRowHits; - uint32_t mNMaxTracks; - uint32_t mNMaxTrackHits; - int16_t mMemoryResLinks; - int16_t mMemoryResScratch; - int16_t mMemoryResScratchHost; - int16_t mMemoryResCommon; - int16_t mMemoryResTracklets; - int16_t mMemoryResOutput; - int16_t mMemoryResSectorScratch; + uint32_t mNMaxStartHits = 0; + uint32_t mNMaxRowStartHits = 0; + uint32_t mNMaxTracklets = 0; + uint32_t mNMaxRowHits = 0; + uint32_t mNMaxTracks = 0; + uint32_t mNMaxTrackHits = 0; + uint16_t mMemoryResLinks = (uint16_t)-1; + uint16_t mMemoryResScratch = (uint16_t)-1; + uint16_t mMemoryResScratchHost = (uint16_t)-1; + uint16_t mMemoryResCommon = (uint16_t)-1; + uint16_t mMemoryResTracklets = (uint16_t)-1; + uint16_t mMemoryResOutput = (uint16_t)-1; + uint16_t mMemoryResSectorScratch = (uint16_t)-1; // GPU Temp Arrays - GPUglobalref() int32_t* mRowStartHitCountOffset; // Offset, length and new offset of start hits in row - GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits; // Unsorted start hits - GPUglobalref() char* mGPUTrackletTemp; // Temp Memory for GPU Tracklet Constructor + GPUglobalref() int32_t* mRowStartHitCountOffset = nullptr; // Offset, length and new offset of start hits in row + GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits = nullptr; // Unsorted start hits + GPUglobalref() char* mGPUTrackletTemp = nullptr; // Temp Memory for GPU Tracklet Constructor StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker // event - GPUglobalref() commonMemoryStruct* mCommonMem; // common event memory - GPUglobalref() GPUTPCHitId* mTrackletStartHits; // start hits for the tracklets - GPUglobalref() GPUTPCTracklet* mTracklets; // tracklets - GPUglobalref() calink* mTrackletRowHits; // Hits for each Tracklet in each row - GPUglobalref() GPUTPCTrack* mTracks; // reconstructed tracks - GPUglobalref() GPUTPCHitId* mTrackHits; // array of track hit numbers + GPUglobalref() commonMemoryStruct* mCommonMem = nullptr; // common event memory + GPUglobalref() GPUTPCHitId* mTrackletStartHits = nullptr; // start hits for the tracklets + GPUglobalref() GPUTPCTracklet* mTracklets = nullptr; // tracklets + GPUglobalref() calink* mTrackletRowHits = nullptr; // Hits for each Tracklet in each row + GPUglobalref() GPUTPCTrack* mTracks = nullptr; // reconstructed tracks + GPUglobalref() GPUTPCHitId* mTrackHits = nullptr; // array of track hit numbers // output GPUglobalref() GPUTPCSectorOutput* mOutput; // address of pointer pointing to SectorOutput Object diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index c84a1be8e6890..c973264bfde2a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -13,7 +13,7 @@ # author David Rohr o2_gpu_kernel_file_list(ERRORS GPUErrors.cxx) -o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx GPUTPCGrid.cxx GPUTPCRow.cxx GPUTPCTracker.cxx) +o2_gpu_kernel_file_list(TPCTRACKER ERRORS GPUTPCTrackParam.cxx GPUTPCTrack.cxx GPUTPCGrid.cxx GPUTPCTracker.cxx) o2_gpu_kernel_file_list(TPCTRACKLETCONS GPUTPCTrackletConstructor.cxx) o2_gpu_kernel_file_list(TPCSECTORDATA TPCTRACKER GPUTPCTrackingData.cxx) o2_gpu_kernel_file_list(TPCOCCUPANCY GPUTPCClusterOccupancyMap.cxx) From 0e75f9202e8feaac8a237c0980f649fa762fc13c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 18:31:52 +0100 Subject: [PATCH 0110/1914] GPU multi-threading: Tuned values for number of CPU threads for clusterizer and sector-tracking with TBB --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index d80cf5c09f355..9de8b2174a182 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -718,7 +718,7 @@ int32_t GPUChainTracking::RunChain() } if (GetProcessingSettings().autoAdjustHostThreads && !mRec->IsGPU() && mIOPtrs.clustersNative) { - mRec->SetNActiveThreads(mIOPtrs.clustersNative->nClustersTotal / 5000); + mRec->SetNActiveThreads(mIOPtrs.clustersNative->nClustersTotal / 1500); } if (mIOPtrs.clustersNative && runRecoStep(RecoStep::TPCConversion, &GPUChainTracking::ConvertNativeToClusterData)) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 282a04c99c056..121d60873324f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -577,7 +577,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return 1; } if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { - mRec->SetNActiveThreads(mRec->MemoryScalers()->nTPCdigits / 20000); + mRec->SetNActiveThreads(mRec->MemoryScalers()->nTPCdigits / 6000); } mRec->MemoryScalers()->nTPCHits = mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits); @@ -1067,6 +1067,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) mPipelineNotifyCtx = nullptr; } + if (GetProcessingSettings().autoAdjustHostThreads && !doGPU) { + mRec->SetNActiveThreads(-1); + } + #endif return 0; } From 563b6dcd77f8599d394210aa1a301632fa4c0b93 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Feb 2025 19:16:24 +0100 Subject: [PATCH 0111/1914] GPU: Add optional cpu-time measurement per recoStep --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 ++ GPU/GPUTracking/Base/GPUReconstruction.h | 1 + GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 34 ++++++++++++------- .../Base/GPUReconstructionProcessing.h | 25 +++++++------- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Global/GPUChain.h | 16 ++++++--- GPU/GPUTracking/utils/timer.h | 2 -- 7 files changed, 51 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 158aa94127a4d..6ce07f67c3645 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -249,6 +249,9 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.debugLevel < 1) { mProcessingSettings.deviceTimers = false; } + if (mProcessingSettings.debugLevel > 0) { + mProcessingSettings.recoTaskTiming = true; + } if (mProcessingSettings.deterministicGPUReconstruction == -1) { mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index de4e94a886a3b..529cce2bd087f 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -332,6 +332,7 @@ class GPUReconstruction uint32_t mNEventsProcessed = 0; double mStatKernelTime = 0.; double mStatWallTime = 0.; + double mStatCPUTime = 0.; std::shared_ptr mROOTDump; std::vector>* mOutputErrorCodes = nullptr; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d4312580a4141..d95a57c8f2063 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -33,6 +33,7 @@ #include "GPUConstantMem.h" #include "GPUMemorySizeScalers.h" #include +#include #define GPUCA_LOGGING_PRINTF #include "GPULogging.h" @@ -220,7 +221,8 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; - timerTotal.Start(); + mTimerTotal.Start(); + const std::clock_t cpuTimerStart = std::clock(); if (mProcessingSettings.doublePipeline) { int32_t retVal = EnqueuePipeline(); if (retVal) { @@ -237,17 +239,18 @@ int32_t GPUReconstructionCPU::RunChains() } } } - timerTotal.Stop(); + mTimerTotal.Stop(); + mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; - mStatWallTime = (timerTotal.GetElapsedTime() * 1000000. / mStatNEvents); + mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents); std::string nEventReport; if (GetProcessingSettings().debugLevel >= 0 && mStatNEvents > 1) { nEventReport += " (avergage of " + std::to_string(mStatNEvents) + " runs)"; } - if (GetProcessingSettings().debugLevel >= 1) { - double kernelTotal = 0; - std::vector kernelStepTimes(GPUDataTypes::N_RECO_STEPS); + double kernelTotal = 0; + std::vector kernelStepTimes(GPUDataTypes::N_RECO_STEPS, 0.); + if (GetProcessingSettings().debugLevel >= 1) { for (uint32_t i = 0; i < mTimers.size(); i++) { double time = 0; if (mTimers[i] == nullptr) { @@ -277,9 +280,12 @@ int32_t GPUReconstructionCPU::RunChains() mTimers[i]->memSize = 0; } } + } + if (GetProcessingSettings().recoTaskTiming) { for (int32_t i = 0; i < GPUDataTypes::N_RECO_STEPS; i++) { if (kernelStepTimes[i] != 0. || mTimersRecoSteps[i].timerTotal.GetElapsedTime() != 0.) { - printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us)\n", "Tasks", GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents); + printf("Execution Time: Step : %11s %38s Time: %'10.0f us %64s ( Total Time : %'14.0f us, CPU Time : %'14.0f us, %'7.2fx )\n", "Tasks", + GPUDataTypes::RECO_STEP_NAMES[i], kernelStepTimes[i] * 1000000 / mStatNEvents, "", mTimersRecoSteps[i].timerTotal.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU * 1000000 / mStatNEvents, mTimersRecoSteps[i].timerCPU / mTimersRecoSteps[i].timerTotal.GetElapsedTime()); } if (mTimersRecoSteps[i].bytesToGPU) { printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToGPU, "DMA to GPU", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToGPU.GetElapsedTime() * 1000000 / mStatNEvents, @@ -294,6 +300,7 @@ int32_t GPUReconstructionCPU::RunChains() mTimersRecoSteps[i].timerToGPU.Reset(); mTimersRecoSteps[i].timerToHost.Reset(); mTimersRecoSteps[i].timerTotal.Reset(); + mTimersRecoSteps[i].timerCPU = 0; mTimersRecoSteps[i].countToGPU = 0; mTimersRecoSteps[i].countToHost = 0; } @@ -303,15 +310,18 @@ int32_t GPUReconstructionCPU::RunChains() printf("Execution Time: General Step : %50s Time: %'10.0f us\n", GPUDataTypes::GENERAL_STEP_NAMES[i], mTimersGeneralSteps[i].GetElapsedTime() * 1000000 / mStatNEvents); } } - mStatKernelTime = kernelTotal * 1000000 / mStatNEvents; - printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str()); - printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Wall", mStatWallTime, nEventReport.c_str()); + if (GetProcessingSettings().debugLevel >= 1) { + mStatKernelTime = kernelTotal * 1000000 / mStatNEvents; + printf("Execution Time: Total : %50s Time: %'10.0f us%s\n", "Total Kernel", mStatKernelTime, nEventReport.c_str()); + } + printf("Execution Time: Total : %50s Time: %'10.0f us ( CPU Time : %'10.0f us, %7.2fx ) %s\n", "Total Wall", mStatWallTime, mStatCPUTime * 1000000 / mStatNEvents, mStatCPUTime / mTimerTotal.GetElapsedTime(), nEventReport.c_str()); } else if (GetProcessingSettings().debugLevel >= 0) { - GPUInfo("Total Wall Time: %lu us%s", (uint64_t)mStatWallTime, nEventReport.c_str()); + GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str()); } if (mProcessingSettings.resetTimers) { mStatNEvents = 0; - timerTotal.Reset(); + mStatCPUTime = 0; + mTimerTotal.Reset(); } return 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 62ad99f7fa606..4ccfb9ff10311 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -77,7 +77,7 @@ class GPUReconstructionProcessing : public GPUReconstruction constexpr static const char* GetKernelName(); // Public queries for timers - HighResTimer& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)].timerTotal; } + auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } HighResTimer& getGeneralStepTimer(GeneralStep step) { return mTimersGeneralSteps[getGeneralStepNum(step)]; } template @@ -85,6 +85,17 @@ class GPUReconstructionProcessing : public GPUReconstruction virtual std::unique_ptr GetThreadContext() override; + struct RecoStepTimerMeta { + HighResTimer timerToGPU; + HighResTimer timerToHost; + HighResTimer timerTotal; + double timerCPU = 0.; + size_t bytesToGPU = 0; + size_t bytesToHost = 0; + uint32_t countToGPU = 0; + uint32_t countToHost = 0; + }; + protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} using deviceEvent = gpu_reconstruction_kernels::deviceEvent; @@ -105,21 +116,11 @@ class GPUReconstructionProcessing : public GPUReconstruction size_t memSize; // Memory size for memory bandwidth computation }; - struct RecoStepTimerMeta { - HighResTimer timerToGPU; - HighResTimer timerToHost; - HighResTimer timerTotal; - size_t bytesToGPU = 0; - size_t bytesToHost = 0; - uint32_t countToGPU = 0; - uint32_t countToHost = 0; - }; - HighResTimer mTimersGeneralSteps[GPUDataTypes::N_GENERAL_STEPS]; std::vector> mTimers; RecoStepTimerMeta mTimersRecoSteps[GPUDataTypes::N_RECO_STEPS]; - HighResTimer timerTotal; + HighResTimer mTimerTotal; template HighResTimer& getKernelTimer(RecoStep step, int32_t num = 0, size_t addMemorySize = 0, bool increment = true); template diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index d0447f189e40e..98e0be1bdb1e5 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -234,6 +234,7 @@ AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") +AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6") AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics") AddOption(runCompressionStatistics, bool, false, "compressionStat", 0, "Run statistics and verification for cluster compression") diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 6f23487c1c4b9..a7c582b79d964 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -17,6 +17,8 @@ #include "GPUReconstructionCPU.h" +#include + namespace o2::gpu { class GPUChain @@ -280,16 +282,20 @@ template int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) { if (GetRecoSteps().isSet(step)) { - if (GetProcessingSettings().debugLevel >= 1) { - mRec->getRecoStepTimer(step).Start(); + auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; + std::clock_t c; + if (timer) { + timer->timerTotal.Start(); + c = std::clock(); } int32_t retVal = (reinterpret_cast(this)->*func)(args...); - if (GetProcessingSettings().debugLevel >= 1) { - mRec->getRecoStepTimer(step).Stop(); + if (timer) { + timer->timerTotal.Stop(); + timer->timerCPU += (double)(std::clock() - c) / CLOCKS_PER_SEC; } return retVal; } - return false; + return 0; } } // namespace o2::gpu diff --git a/GPU/GPUTracking/utils/timer.h b/GPU/GPUTracking/utils/timer.h index 6365a63263cfe..44a01b04747cb 100644 --- a/GPU/GPUTracking/utils/timer.h +++ b/GPU/GPUTracking/utils/timer.h @@ -40,9 +40,7 @@ class HighResTimer static double GetFrequency(); static double GetTime(); -#ifndef GPUCODE static double Frequency; -#endif }; #endif From 7560127d5dc9e81d78213c6a4b8345d3c6bd0c9a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Feb 2025 08:53:57 +0100 Subject: [PATCH 0112/1914] Fix some whitespace and copyright headers of macros --- GPU/GPUTracking/Merger/macros/checkPropagation.C | 11 +++++++++++ GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C | 11 +++++++++++ GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C | 11 +++++++++++ GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C | 11 +++++++++++ GPU/GPUTracking/display/3rdparty/GL/glcorearb.h | 2 +- GPU/GPUTracking/display/filterMacros/TRDCandidate.C | 11 +++++++++++ GPU/GPUTracking/display/filterMacros/filterGPUTrack.C | 11 +++++++++++ GPU/GPUTracking/display/filterMacros/filterTPCTrack.C | 11 +++++++++++ GPU/GPUTracking/display/filterMacros/hasTRD.C | 11 +++++++++++ 9 files changed, 89 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/macros/checkPropagation.C b/GPU/GPUTracking/Merger/macros/checkPropagation.C index d3b1c80b55bb1..fd32cf0bc3a85 100644 --- a/GPU/GPUTracking/Merger/macros/checkPropagation.C +++ b/GPU/GPUTracking/Merger/macros/checkPropagation.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #if !defined(__CLING__) || defined(__ROOTCLING__) #include "Riostream.h" #include "TFile.h" diff --git a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C index 937d5a58dfc2d..2b17011db1d60 100644 --- a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C +++ b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldIts.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + int32_t fitPolynomialFieldIts() { gSystem->Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C index 6ffa2bbe4babe..e18f0f703b0d5 100644 --- a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C +++ b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTpc.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + int32_t fitPolynomialFieldTpc() { gSystem->Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C index bc515e1fa5849..67eea34110ab8 100644 --- a/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C +++ b/GPU/GPUTracking/Merger/macros/fitPolynomialFieldTrd.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + int32_t fitPolynomialFieldTrd() { gSystem->Load("libAliHLTTPC"); diff --git a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h index ce1fbba68838a..fd3ab61ab0323 100644 --- a/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h +++ b/GPU/GPUTracking/display/3rdparty/GL/glcorearb.h @@ -56,7 +56,7 @@ extern "C" { ** included as . ** ** glcorearb.h includes only APIs in the latest OpenGL core profile -** implementation together with APIs in newer ARB extensions which +** implementation together with APIs in newer ARB extensions which ** can be supported by the core profile. It does not, and never will ** include functionality removed from the core profile, such as ** fixed-function vertex and fragment processing. diff --git a/GPU/GPUTracking/display/filterMacros/TRDCandidate.C b/GPU/GPUTracking/display/filterMacros/TRDCandidate.C index f00681d0ca335..4bbab658c31c4 100644 --- a/GPU/GPUTracking/display/filterMacros/TRDCandidate.C +++ b/GPU/GPUTracking/display/filterMacros/TRDCandidate.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #include "GPUConstantMem.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C b/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C index 886ed29611553..a27d988e84e43 100644 --- a/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C +++ b/GPU/GPUTracking/display/filterMacros/filterGPUTrack.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #include "GPUTPCGMMergedTrack.h" diff --git a/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C b/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C index 636cdd0319011..484fff3e7d4ef 100644 --- a/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C +++ b/GPU/GPUTracking/display/filterMacros/filterTPCTrack.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" #if !defined(__CLING__) || defined(__ROOTCLING__) #include "DataFormatsTPC/TrackTPC.h" diff --git a/GPU/GPUTracking/display/filterMacros/hasTRD.C b/GPU/GPUTracking/display/filterMacros/hasTRD.C index cd98fb2fe349b..2392442c4a961 100644 --- a/GPU/GPUTracking/display/filterMacros/hasTRD.C +++ b/GPU/GPUTracking/display/filterMacros/hasTRD.C @@ -1,3 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #include "GPUO2Interface.h" using namespace o2::gpu; From 3957037234a111fa282b9adb06da2ce565f4a3b0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Feb 2025 11:23:54 +0100 Subject: [PATCH 0113/1914] GPU: Do not try use OpenCL platforms where device query fails or which have 0 devices --- .../Base/opencl/GPUReconstructionOCL.cxx | 101 +++++++++--------- .../Base/opencl/GPUReconstructionOCL.h | 1 - 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 30a8fc193774b..03f123e97fb78 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -110,6 +110,45 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } bool found = false; + char platform_profile[256] = {}, platform_version[256] = {}, platform_name[256] = {}, platform_vendor[256] = {}; + auto queryPlatforms = [&platform_profile, &platform_version, &platform_name, &platform_vendor](auto platform) { + clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof(platform_profile), platform_profile, nullptr); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); + clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, nullptr); + clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); + }; + auto checkPlatform = [&](auto platform) { + cl_uint tmp; + if (clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &tmp) != CL_SUCCESS || tmp == 0) { + return false; + } + + queryPlatforms(platform); + float ver1 = 0; + sscanf(platform_version, "OpenCL %f", &ver1); + if (ver1 >= 2.2f) { + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("OpenCL 2.2 capable platform found"); + } + return true; + } + + if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && ver1 >= 2.0f) { + float ver2 = 0; + const char* pos = strchr(platform_version, '('); + if (pos) { + sscanf(pos, "(%f)", &ver2); + } + if ((ver1 >= 2.f && ver2 >= 2000.f) || ver1 >= 2.1f) { + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo("AMD ROCm OpenCL Platform found"); + } + return true; + } + } + return false; + }; + if (mProcessingSettings.platformNum >= 0) { if (mProcessingSettings.platformNum >= (int32_t)num_platforms) { quit("Invalid platform specified"); @@ -117,22 +156,14 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mInternals->platform = mInternals->platforms[mProcessingSettings.platformNum]; found = true; if (mProcessingSettings.debugLevel >= 2) { - char platform_profile[256] = {}, platform_version[256] = {}, platform_name[256] = {}, platform_vendor[256] = {}; - clGetPlatformInfo(mInternals->platform, CL_PLATFORM_PROFILE, sizeof(platform_profile), platform_profile, nullptr); - clGetPlatformInfo(mInternals->platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, nullptr); - clGetPlatformInfo(mInternals->platform, CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); + queryPlatforms(mInternals->platform); GPUInfo("Selected Platform %d: (%s %s) %s %s", mProcessingSettings.platformNum, platform_profile, platform_version, platform_vendor, platform_name); } } else { for (uint32_t i_platform = 0; i_platform < num_platforms; i_platform++) { - char platform_profile[256] = {}, platform_version[256] = {}, platform_name[256] = {}, platform_vendor[256] = {}; - clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_PROFILE, sizeof(platform_profile), platform_profile, nullptr); - clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_NAME, sizeof(platform_name), platform_name, nullptr); - clGetPlatformInfo(mInternals->platforms[i_platform], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); + queryPlatforms(mInternals->platforms[i_platform]); const char* platformUsageInfo = ""; - if (!found && CheckPlatform(i_platform)) { + if (!found && checkPlatform(mInternals->platforms[i_platform])) { found = true; mInternals->platform = mInternals->platforms[i_platform]; if (mProcessingSettings.debugLevel >= 2) { @@ -149,14 +180,14 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() quit("Did not find compatible OpenCL Platform"); } - cl_uint count, bestDevice = (cl_uint)-1; - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &count))) { + cl_uint deviceCount, bestDevice = (cl_uint)-1; + if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount))) { quit("Error getting OPENCL Device Count"); } // Query devices - mInternals->devices.reset(new cl_device_id[count]); - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, count, mInternals->devices.get(), nullptr))) { + mInternals->devices.reset(new cl_device_id[deviceCount]); + if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, deviceCount, mInternals->devices.get(), nullptr))) { quit("Error getting OpenCL devices"); } @@ -167,8 +198,8 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (mProcessingSettings.debugLevel >= 2) { GPUInfo("Available OPENCL devices:"); } - std::vector devicesOK(count, false); - for (uint32_t i = 0; i < count; i++) { + std::vector devicesOK(deviceCount, false); + for (uint32_t i = 0; i < deviceCount; i++) { if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Examining device %d", i); } @@ -215,11 +246,11 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } } if (bestDevice == (cl_uint)-1) { - quit("No %sOPENCL Device available, aborting OPENCL Initialisation", count ? "appropriate " : ""); + quit("No %sOPENCL Device available, aborting OPENCL Initialisation", deviceCount ? "appropriate " : ""); } if (mProcessingSettings.deviceNum > -1) { - if (mProcessingSettings.deviceNum >= (signed)count) { + if (mProcessingSettings.deviceNum >= (signed)deviceCount) { quit("Requested device ID %d does not exist", mProcessingSettings.deviceNum); } else if (!devicesOK[mProcessingSettings.deviceNum]) { quit("Unsupported device requested (%d)", mProcessingSettings.deviceNum); @@ -269,7 +300,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mWarpSize = 32; mMaxBackendThreads = std::max(mMaxBackendThreads, maxWorkGroup * mBlockCount); - mInternals->context = clCreateContext(nullptr, ContextForAllPlatforms() ? count : 1, ContextForAllPlatforms() ? mInternals->devices.get() : &mInternals->device, nullptr, nullptr, &ocl_error); + mInternals->context = clCreateContext(nullptr, ContextForAllPlatforms() ? deviceCount : 1, ContextForAllPlatforms() ? mInternals->devices.get() : &mInternals->device, nullptr, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { quit("Could not create OPENCL Device Context!"); } @@ -608,33 +639,3 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() return 0; } - -bool GPUReconstructionOCLBackend::CheckPlatform(uint32_t i) -{ - char platform_version[64] = {}, platform_vendor[64] = {}; - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(mInternals->platforms[i], CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); - float ver1 = 0; - sscanf(platform_version, "OpenCL %f", &ver1); - if (ver1 >= 2.2f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("OpenCL 2.2 capable platform found"); - } - return true; - } - - if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && ver1 >= 2.0f) { - float ver2 = 0; - const char* pos = strchr(platform_version, '('); - if (pos) { - sscanf(pos, "(%f)", &ver2); - } - if ((ver1 >= 2.f && ver2 >= 2000.f) || ver1 >= 2.1f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("AMD ROCm OpenCL Platform found"); - } - return true; - } - } - return false; -} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 245e9674801f6..d052ba53dfac5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -74,7 +74,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase S& getKernelObject(); int32_t GetOCLPrograms(); - bool CheckPlatform(uint32_t i); }; using GPUReconstructionOCL = GPUReconstructionKernels; From 58dfa1ecc6c1821529603117193c35d2fe7b192d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Feb 2025 23:01:07 +0100 Subject: [PATCH 0114/1914] GPU OpenCL: Improve device detection and do not consider platforms/devices, which do not support SPIR-V --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 2 +- .../Base/opencl/GPUReconstructionOCL.cxx | 382 +++++++++--------- .../Base/opencl/GPUReconstructionOCL.h | 3 +- .../opencl/GPUReconstructionOCLInternals.h | 2 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 3 +- 5 files changed, 187 insertions(+), 205 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 1aa3739b0b44a..2a361356283a8 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -106,4 +106,4 @@ endif() if(OPENCL_ENABLED_SPIRV) target_compile_definitions(${targetName} PRIVATE OPENCL_ENABLED_SPIRV) endif() -target_compile_definitions(${targetName} PRIVATE OCL_FLAGS=$) +target_compile_definitions(${targetName} PRIVATE GPUCA_OCL_BUILD_FLAGS=$) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 03f123e97fb78..6b918fe501330 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -26,10 +26,10 @@ using namespace o2::gpu; #include #include -#define quit(...) \ - { \ - GPUError(__VA_ARGS__); \ - return (1); \ +#define GPUErrorReturn(...) \ + { \ + GPUError(__VA_ARGS__); \ + return (1); \ } #define GPUCA_KRNL(x_class, x_attributes, ...) GPUCA_KRNL_PROP(x_class, x_attributes) @@ -94,215 +94,205 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() cl_int ocl_error; cl_uint num_platforms; if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) { - quit("Error getting OpenCL Platform Count"); + GPUErrorReturn("Error getting OpenCL Platform Count"); } if (num_platforms == 0) { - quit("No OpenCL Platform found"); + GPUErrorReturn("No OpenCL Platform found"); } if (mProcessingSettings.debugLevel >= 2) { GPUInfo("%d OpenCL Platforms found", num_platforms); } - // Query platforms - mInternals->platforms.reset(new cl_platform_id[num_platforms]); - if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, mInternals->platforms.get(), nullptr))) { - quit("Error getting OpenCL Platforms"); + // Query platforms and devices + std::unique_ptr platforms; + platforms.reset(new cl_platform_id[num_platforms]); + if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { + GPUErrorReturn("Error getting OpenCL Platforms"); } - bool found = false; - char platform_profile[256] = {}, platform_version[256] = {}, platform_name[256] = {}, platform_vendor[256] = {}; - auto queryPlatforms = [&platform_profile, &platform_version, &platform_name, &platform_vendor](auto platform) { - clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof(platform_profile), platform_profile, nullptr); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr); - clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, nullptr); - clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(platform_vendor), platform_vendor, nullptr); + auto query = [&](auto func, auto obj, auto var) { + size_t size; + func(obj, var, 0, nullptr, &size); + std::string retVal(size - 1, ' '); + func(obj, var, size, retVal.data(), nullptr); + return retVal; }; - auto checkPlatform = [&](auto platform) { - cl_uint tmp; - if (clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &tmp) != CL_SUCCESS || tmp == 0) { - return false; - } - queryPlatforms(platform); - float ver1 = 0; - sscanf(platform_version, "OpenCL %f", &ver1); - if (ver1 >= 2.2f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("OpenCL 2.2 capable platform found"); - } - return true; - } + std::string platform_profile, platform_version, platform_name, platform_vendor; + float platform_version_f; + auto queryPlatform = [&](auto platform) { + platform_profile = query(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE); + platform_version = query(clGetPlatformInfo, platform, CL_PLATFORM_VERSION); + platform_name = query(clGetPlatformInfo, platform, CL_PLATFORM_NAME); + platform_vendor = query(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR); + sscanf(platform_version.c_str(), "OpenCL %f", &platform_version_f); + }; - if (strcmp(platform_vendor, "Advanced Micro Devices, Inc.") == 0 && ver1 >= 2.0f) { - float ver2 = 0; - const char* pos = strchr(platform_version, '('); - if (pos) { - sscanf(pos, "(%f)", &ver2); - } - if ((ver1 >= 2.f && ver2 >= 2000.f) || ver1 >= 2.1f) { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("AMD ROCm OpenCL Platform found"); - } - return true; - } - } - return false; + std::vector devices; + std::string device_vendor, device_name, device_il_version; + cl_device_type device_type; + cl_uint device_freq, device_shaders, device_nbits; + cl_bool device_endian; + auto queryDevice = [&](auto device) { + platform_name = query(clGetDeviceInfo, device, CL_DEVICE_NAME); + device_vendor = query(clGetDeviceInfo, device, CL_DEVICE_VENDOR); + device_il_version = query(clGetDeviceInfo, device, CL_DEVICE_IL_VERSION); + clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type), &device_type, nullptr); + clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(device_freq), &device_freq, nullptr); + clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(device_shaders), &device_shaders, nullptr); + clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(device_nbits), &device_nbits, nullptr); + clGetDeviceInfo(device, CL_DEVICE_ENDIAN_LITTLE, sizeof(device_endian), &device_endian, nullptr); }; - if (mProcessingSettings.platformNum >= 0) { - if (mProcessingSettings.platformNum >= (int32_t)num_platforms) { - quit("Invalid platform specified"); - } - mInternals->platform = mInternals->platforms[mProcessingSettings.platformNum]; - found = true; - if (mProcessingSettings.debugLevel >= 2) { - queryPlatforms(mInternals->platform); - GPUInfo("Selected Platform %d: (%s %s) %s %s", mProcessingSettings.platformNum, platform_profile, platform_version, platform_vendor, platform_name); + cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1; + for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) { + if (mProcessingSettings.oclPlatformNum >= 0) { + if (mProcessingSettings.oclPlatformNum >= (int32_t)num_platforms) { + GPUErrorReturn("Invalid platform specified"); + } + iPlatform = mProcessingSettings.oclPlatformNum; } - } else { - for (uint32_t i_platform = 0; i_platform < num_platforms; i_platform++) { - queryPlatforms(mInternals->platforms[i_platform]); - const char* platformUsageInfo = ""; - if (!found && checkPlatform(mInternals->platforms[i_platform])) { - found = true; - mInternals->platform = mInternals->platforms[i_platform]; - if (mProcessingSettings.debugLevel >= 2) { - platformUsageInfo = " !!! Using this platform !!!"; - } + std::string platformUsageInfo; + bool platformCompatible = false; + queryPlatform(platforms[iPlatform]); + if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) { + if (mProcessingSettings.oclPlatformNum >= 0) { + GPUErrorReturn("No device in requested platform or error obtaining device count"); } - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Available Platform %d: (%s %s) %s %s%s", i_platform, platform_profile, platform_version, platform_vendor, platform_name, platformUsageInfo); + platformUsageInfo += " - no devices"; + } else { + if (platform_version_f >= 2.1f) { + platformUsageInfo += " - OpenCL 2.2 capable"; + platformCompatible = true; } } - } - - if (found == false) { - quit("Did not find compatible OpenCL Platform"); - } - - cl_uint deviceCount, bestDevice = (cl_uint)-1; - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount))) { - quit("Error getting OPENCL Device Count"); - } - // Query devices - mInternals->devices.reset(new cl_device_id[deviceCount]); - if (GPUFailedMsgI(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, deviceCount, mInternals->devices.get(), nullptr))) { - quit("Error getting OpenCL devices"); - } + if (mProcessingSettings.oclPlatformNum >= 0 || mProcessingSettings.debugLevel >= 2) { + GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", mProcessingSettings.oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() : ""); + } - char device_vendor[64], device_name[64]; - cl_device_type device_type; - cl_uint freq, shaders; + if (platformCompatible || mProcessingSettings.oclPlatformNum >= 0 || (mProcessingSettings.oclPlatformNum == -2 && deviceCount)) { + if (deviceCount > devices.size()) { + devices.resize(deviceCount); + } + if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) { + if (mProcessingSettings.oclPlatformNum >= 0) { + GPUErrorReturn("Error getting OpenCL devices"); + } + continue; + } - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Available OPENCL devices:"); - } - std::vector devicesOK(deviceCount, false); - for (uint32_t i = 0; i < deviceCount; i++) { - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Examining device %d", i); - } - cl_uint nbits; - cl_bool endian; - - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_NAME, 64, device_name, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_VENDOR, 64, device_vendor, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(freq), &freq, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(shaders), &shaders, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_ADDRESS_BITS, sizeof(nbits), &nbits, nullptr); - clGetDeviceInfo(mInternals->devices[i], CL_DEVICE_ENDIAN_LITTLE, sizeof(endian), &endian, nullptr); - int32_t deviceOK = true; - const char* deviceFailure = ""; - if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { - deviceOK = false; - deviceFailure = "No GPU device"; - } - if (nbits / 8 != sizeof(void*)) { - deviceOK = false; - deviceFailure = "No 64 bit device"; - } - if (!endian) { - deviceOK = false; - deviceFailure = "No Little Endian Mode"; - } + for (uint32_t i = 0; i < deviceCount; i++) { + if (mProcessingSettings.deviceNum >= 0) { + if (mProcessingSettings.deviceNum >= (signed)deviceCount) { + GPUErrorReturn("Requested device ID %d does not exist", mProcessingSettings.deviceNum); + } + i = mProcessingSettings.deviceNum; + } + bool deviceOK = true; + queryDevice(devices[i]); + std::string deviceFailure; + if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { + deviceOK = false; + deviceFailure += " - No GPU device"; + } + if (device_nbits / 8 != sizeof(void*)) { + deviceOK = false; + deviceFailure += " - No 64 bit device"; + } + if (!device_endian) { + deviceOK = false; + deviceFailure += " - No Little Endian Mode"; + } + if (!GetProcessingSettings().oclCompileFromSources) { + size_t pos = 0; + while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) { + float spirvVersion; + sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion); + if (spirvVersion >= 1.2) { + break; + } + pos += strlen("SPIR-V_0.0"); + } + if (pos == std::string::npos) { + deviceOK = false; + deviceFailure += " - No SPIR-V 1.6 (" + device_il_version + ")"; + } + } - double bestDeviceSpeed = -1, deviceSpeed = (double)freq * (double)shaders; - if (mProcessingSettings.debugLevel >= 2) { - GPUImportant("Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor, device_name, (int32_t)freq, (int32_t)shaders, (int32_t)nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure); - } - if (!deviceOK) { - continue; - } - devicesOK[i] = true; - if (deviceSpeed > bestDeviceSpeed) { - bestDevice = i; - bestDeviceSpeed = deviceSpeed; - } else { - if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Skipping: Speed %f < %f", deviceSpeed, bestDeviceSpeed); + double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders; + if (mProcessingSettings.debugLevel >= 2) { + GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str()); + } + if (!deviceOK) { + if (mProcessingSettings.deviceNum >= 0) { + GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, mProcessingSettings.deviceNum); + break; + } + continue; + } + if (deviceSpeed > bestDeviceSpeed) { + bestDevice = i; + bestPlatform = iPlatform; + bestDeviceSpeed = deviceSpeed; + mOclVersion = platform_version_f; + } + if (mProcessingSettings.deviceNum >= 0) { + break; + } } } - } - if (bestDevice == (cl_uint)-1) { - quit("No %sOPENCL Device available, aborting OPENCL Initialisation", deviceCount ? "appropriate " : ""); - } - - if (mProcessingSettings.deviceNum > -1) { - if (mProcessingSettings.deviceNum >= (signed)deviceCount) { - quit("Requested device ID %d does not exist", mProcessingSettings.deviceNum); - } else if (!devicesOK[mProcessingSettings.deviceNum]) { - quit("Unsupported device requested (%d)", mProcessingSettings.deviceNum); - } else { - bestDevice = mProcessingSettings.deviceNum; + if (mProcessingSettings.oclPlatformNum >= 0) { + break; } } - mInternals->device = mInternals->devices[bestDevice]; - - cl_ulong constantBuffer, globalMem, localMem; - char deviceVersion[64]; - size_t maxWorkGroup, maxWorkItems[3]; - clGetDeviceInfo(mInternals->device, CL_DEVICE_NAME, 64, device_name, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_VENDOR, 64, device_vendor, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(freq), &freq, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(shaders), &shaders, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalMem), &globalMem, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(constantBuffer), &constantBuffer, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(localMem), &localMem, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_VERSION, sizeof(deviceVersion) - 1, deviceVersion, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(maxWorkGroup), &maxWorkGroup, nullptr); - clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(maxWorkItems), maxWorkItems, nullptr); + + if (bestDevice == (cl_uint)-1) { + GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation"); + } + mInternals->platform = platforms[bestPlatform]; + GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); + mInternals->device = devices[bestDevice]; + queryDevice(mInternals->device); + + cl_ulong deviceConstantBuffer, deviceGlobalMem, deviceLocalMem; + std::string deviceVersion; + size_t deviceMaxWorkGroup, deviceMaxWorkItems[3]; + clGetDeviceInfo(mInternals->device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(deviceGlobalMem), &deviceGlobalMem, nullptr); + clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(deviceConstantBuffer), &deviceConstantBuffer, nullptr); + clGetDeviceInfo(mInternals->device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(deviceLocalMem), &deviceLocalMem, nullptr); + clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(deviceMaxWorkGroup), &deviceMaxWorkGroup, nullptr); + clGetDeviceInfo(mInternals->device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(deviceMaxWorkItems), deviceMaxWorkItems, nullptr); + deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION); int versionMajor, versionMinor; - sscanf(deviceVersion, "OpenCL %d.%d", &versionMajor, &versionMinor); + sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor); if (mProcessingSettings.debugLevel >= 2) { - GPUInfo("Using OpenCL device %d: %s %s with properties:", bestDevice, device_vendor, device_name); + GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str()); GPUInfo("\tVersion = %s", deviceVersion); - GPUInfo("\tFrequency = %d", (int32_t)freq); - GPUInfo("\tShaders = %d", (int32_t)shaders); - GPUInfo("\tGLobalMemory = %ld", (int64_t)globalMem); - GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)constantBuffer); - GPUInfo("\tLocalMemory = %ld", (int64_t)localMem); - GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)maxWorkGroup); - GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)maxWorkItems[0], (int64_t)maxWorkItems[1], (int64_t)maxWorkItems[2]); + GPUInfo("\tFrequency = %d", (int32_t)device_freq); + GPUInfo("\tShaders = %d", (int32_t)device_shaders); + GPUInfo("\tGLobalMemory = %ld", (int64_t)deviceGlobalMem); + GPUInfo("\tContantMemoryBuffer = %ld", (int64_t)deviceConstantBuffer); + GPUInfo("\tLocalMemory = %ld", (int64_t)deviceLocalMem); + GPUInfo("\tmaxThreadsPerBlock = %ld", (int64_t)deviceMaxWorkGroup); + GPUInfo("\tmaxThreadsDim = %ld %ld %ld", (int64_t)deviceMaxWorkItems[0], (int64_t)deviceMaxWorkItems[1], (int64_t)deviceMaxWorkItems[2]); GPUInfo(" "); } #ifndef GPUCA_NO_CONSTANT_MEMORY - if (gGPUConstantMemBufferSize > constantBuffer) { - quit("Insufficient constant memory available on GPU %d < %d!", (int32_t)constantBuffer, (int32_t)gGPUConstantMemBufferSize); + if (gGPUConstantMemBufferSize > deviceConstantBuffer) { + GPUErrorReturn("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceConstantBuffer, (int32_t)gGPUConstantMemBufferSize); } #endif - mDeviceName = device_name; + mDeviceName = device_name.c_str(); mDeviceName += " (OpenCL)"; - mBlockCount = shaders; + mBlockCount = device_shaders; mWarpSize = 32; - mMaxBackendThreads = std::max(mMaxBackendThreads, maxWorkGroup * mBlockCount); + mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount); - mInternals->context = clCreateContext(nullptr, ContextForAllPlatforms() ? deviceCount : 1, ContextForAllPlatforms() ? mInternals->devices.get() : &mInternals->device, nullptr, nullptr, &ocl_error); + mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { - quit("Could not create OPENCL Device Context!"); + GPUErrorReturn("Could not create OPENCL Device Context!"); } if (GetOCLPrograms()) { @@ -316,14 +306,14 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { clReleaseContext(mInternals->context); - quit("OPENCL Memory Allocation Error"); + GPUErrorReturn("OPENCL Memory Allocation Error"); } mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { clReleaseMemObject(mInternals->mem_gpu); clReleaseContext(mInternals->context); - quit("OPENCL Constant Memory Allocation Error"); + GPUErrorReturn("OPENCL Constant Memory Allocation Error"); } if (device_type & CL_DEVICE_TYPE_CPU) { @@ -349,36 +339,36 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error); #endif if (GPUFailedMsgI(ocl_error)) { - quit("Error creating OpenCL command queue"); + GPUErrorReturn("Error creating OpenCL command queue"); } } if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { - quit("Error migrating buffer"); + GPUErrorReturn("Error migrating buffer"); } if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { - quit("Error migrating buffer"); + GPUErrorReturn("Error migrating buffer"); } mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { - quit("Error allocating pinned host memory"); + GPUErrorReturn("Error allocating pinned host memory"); } const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}"; cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { - quit("Error creating program object"); + GPUErrorReturn("Error creating program object"); } ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr); if (GPUFailedMsgI(ocl_error)) { char build_log[16384]; clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr); GPUImportant("Build Log:\n\n%s\n\n", build_log); - quit("Error compiling program"); + GPUErrorReturn("Error compiling program"); } cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error); if (GPUFailedMsgI(ocl_error)) { - quit("Error creating kernel"); + GPUErrorReturn("Error creating kernel"); } if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || @@ -386,7 +376,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUFailedMsgI(clFinish(mInternals->command_queue[0])) || GPUFailedMsgI(clReleaseKernel(kernel)) || GPUFailedMsgI(clReleaseProgram(program))) { - quit("Error obtaining device memory ptr"); + GPUErrorReturn("Error obtaining device memory ptr"); } if (mProcessingSettings.debugLevel >= 2) { @@ -394,7 +384,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); if (GPUFailedMsgI(ocl_error)) { - quit("Error allocating Page Locked Host Memory"); + GPUErrorReturn("Error allocating Page Locked Host Memory"); } mDeviceMemoryBase = ((void**)mHostMemoryBase)[0]; @@ -405,8 +395,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() memset(mHostMemoryBase, 0xDD, mHostMemorySize); } - GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)freq, (int32_t)shaders, (int64_t)mDeviceMemorySize, - (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize); + GPUInfo("OPENCL Initialisation successfull (%d: %s %s (Frequency %d, Shaders %d), %ld / %ld bytes host / global memory, Stack frame %d, Constant memory %ld)", bestDevice, device_vendor, device_name, (int32_t)device_freq, (int32_t)device_shaders, (int64_t)mDeviceMemorySize, (int64_t)mHostMemorySize, -1, (int64_t)gGPUConstantMemBufferSize); } else { GPUReconstructionOCL* master = dynamic_cast(mMaster); mBlockCount = master->mBlockCount; @@ -510,7 +499,7 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve } if (tmp != CL_COMPLETE) { mGPUStuck = 1; - quit("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp); + GPUErrorReturn("GPU Stuck, future processing in this component is disabled, skipping event (GPU Event State %d)", (int32_t)tmp); } } else { clFinish(mInternals->command_queue[stream]); @@ -581,24 +570,19 @@ S& GPUReconstructionOCLBackend::getKernelObject() int32_t GPUReconstructionOCLBackend::GetOCLPrograms() { - char platform_version[256] = {}; - GPUFailedMsg(clGetPlatformInfo(mInternals->platform, CL_PLATFORM_VERSION, sizeof(platform_version), platform_version, nullptr)); - float ver = 0; - sscanf(platform_version, "OpenCL %f", &ver); - cl_int ocl_error; - const char* ocl_flags = GPUCA_M_STR(OCL_FLAGS); + const char* oclBuildFlags = GetProcessingSettings().oclOverrideSourceBuildFlags != "" ? GetProcessingSettings().oclOverrideSourceBuildFlags.c_str() : GPUCA_M_STR(GPUCA_OCL_BUILD_FLAGS); #ifdef OPENCL_ENABLED_SPIRV // clang-format off - if (ver >= 2.2f && !GetProcessingSettings().oclCompileFromSources) { - GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", ver); + if (mOclVersion >= 2.1f && !GetProcessingSettings().oclCompileFromSources) { + GPUInfo("Reading OpenCL program from SPIR-V IL (Platform version %4.2f)", mOclVersion); mInternals->program = clCreateProgramWithIL(mInternals->context, _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error); - ocl_flags = ""; + oclBuildFlags = ""; } else #endif // clang-format on { - GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", ver); + GPUInfo("Compiling OpenCL program from sources (Platform version %4.2f)", mOclVersion); size_t program_sizes[1] = {_binary_GPUReconstructionOCLCode_src_len}; char* programs_sources[1] = {_binary_GPUReconstructionOCLCode_src_start}; mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); @@ -609,7 +593,7 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() return 1; } - if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, ocl_flags, nullptr, nullptr))) { + if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { cl_build_status status; if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { size_t log_size; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index d052ba53dfac5..fadb393277758 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -55,8 +55,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase void ReleaseEvent(deviceEvent ev) override; void RecordMarker(deviceEvent* ev, int32_t stream) override; - virtual bool ContextForAllPlatforms() { return false; } - template int32_t AddKernel(bool multi = false); template @@ -67,6 +65,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); GPUReconstructionOCLInternals* mInternals; + float mOclVersion; template int32_t runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h index 1020ce85563c2..b47c612b192d7 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h @@ -160,8 +160,6 @@ static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, s struct GPUReconstructionOCLInternals { cl_platform_id platform; cl_device_id device; - std::unique_ptr platforms; - std::unique_ptr devices; cl_context context; cl_command_queue command_queue[GPUCA_MAX_STREAMS]; cl_mem mem_gpu; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 98e0be1bdb1e5..25419f3483dd6 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -223,7 +223,6 @@ AddHelp("help", 'h') EndConfig() BeginSubConfig(GPUSettingsProcessing, proc, configStandalone, "PROC", 0, "Processing settings", proc) -AddOption(platformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select)") AddOption(deviceNum, int32_t, -1, "gpuDevice", 0, "Set GPU device to use (-1: automatic, -2: for round-robin usage in timeslice-pipeline)") AddOption(gpuDeviceOnly, bool, false, "", 0, "Use only GPU as device (i.e. no CPU for OpenCL)") AddOption(globalInitMutex, bool, false, "", 0, "Use global mutex to synchronize initialization of multiple GPU instances") @@ -296,7 +295,9 @@ AddOption(tpcApplyDebugClusterFilter, bool, false, "", 0, "Apply custom cluster AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") +AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select, -2 query all platforms (also incompatible))") AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") +AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) From f259510367bcdcab17a1e0976b56bc989e56ea68 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Feb 2025 23:05:10 +0100 Subject: [PATCH 0115/1914] GPU OpenCL: Make SPIR-V version settable in CMake --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 8 ++++++-- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 2a361356283a8..d6aa945fc77b7 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -39,6 +39,10 @@ set(OCL_DEFINECL "-D$= 17 @@ -47,7 +51,7 @@ if(OPENCL_ENABLED_SPIRV) # BUILD OpenCL intermediate code for SPIR-V target OUTPUT ${CL_BIN}.spirv COMMAND ${CMAKE_COMMAND} -E env "PATH=${TMP_LLVM_SPIRV_PATH}:\$$PATH" ${LLVM_CLANG} -O0 - --target=spirv64 + --target=spirv64v${GPUCA_OCL_SPIRV_VERSION} -fno-integrated-objemitter -ferror-limit=1000 -Wno-invalid-constexpr -Wno-unused-command-line-argument ${OCL_FLAGS} @@ -106,4 +110,4 @@ endif() if(OPENCL_ENABLED_SPIRV) target_compile_definitions(${targetName} PRIVATE OPENCL_ENABLED_SPIRV) endif() -target_compile_definitions(${targetName} PRIVATE GPUCA_OCL_BUILD_FLAGS=$) +target_compile_definitions(${targetName} PRIVATE GPUCA_OCL_BUILD_FLAGS=$ GPUCA_OCL_SPIRV_VERSION=${GPUCA_OCL_SPIRV_VERSION}) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 6b918fe501330..5fac46a214815 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -209,14 +209,14 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() while ((pos = device_il_version.find("SPIR-V", pos)) != std::string::npos) { float spirvVersion; sscanf(device_il_version.c_str() + pos, "SPIR-V_%f", &spirvVersion); - if (spirvVersion >= 1.2) { + if (spirvVersion >= GPUCA_OCL_SPIRV_VERSION) { break; } pos += strlen("SPIR-V_0.0"); } if (pos == std::string::npos) { deviceOK = false; - deviceFailure += " - No SPIR-V 1.6 (" + device_il_version + ")"; + deviceFailure += " - No SPIR-V " + std::to_string(GPUCA_OCL_SPIRV_VERSION) + " (" + device_il_version + ")"; } } From d626aa7dc7d4fca94fa421e7357bf871f11be507 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Tue, 25 Feb 2025 14:09:52 +0100 Subject: [PATCH 0116/1914] Adding default value for FST_BFIELD as "ccdb" --- prodtests/full_system_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full_system_test.sh b/prodtests/full_system_test.sh index bc15c9b119f23..5c7157f93c652 100755 --- a/prodtests/full_system_test.sh +++ b/prodtests/full_system_test.sh @@ -59,6 +59,7 @@ FIRSTSAMPLEDORBIT=${FIRSTSAMPLEDORBIT:-0} OBLIGATORYSOR=${OBLIGATORYSOR:-false} FST_TPC_ZSVERSION=${FST_TPC_ZSVERSION:-4} TPC_SLOW_REALISITC_FULL_SIM=${TPC_SLOW_REALISITC_FULL_SIM:-0} +FST_BFIELD="${FST_BFIELD:-}ccdb" if [[ $BEAMTYPE == "PbPb" ]]; then FST_GENERATOR=${FST_GENERATOR:-pythia8hi} FST_COLRATE=${FST_COLRATE:-50000} From 13082a5398911cedb19bb084faa9f02699c99ca7 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 25 Feb 2025 23:39:46 +0100 Subject: [PATCH 0117/1914] [MCH] fixes to the ST1 pads mapping (#13996) The relation between pad positions and electronics channels was wrong for seven DS boards on the edge of the bending cathodes, all corresponding to the motif type "1BG". For more details see https://its.cern.ch/jira/browse/MCH-4 --- .../Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx index 0e4f0bc6c5db1..dfdfb99e2d790 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType0.cxx @@ -269,10 +269,10 @@ CathodeSegmentation* createSegType0(bool isBendingPlane) /* 1BD */ {4, 16, {61, 9, 63, 59, 14, 7, 8, 16, 58, 62, 21, 57, 54, 19, 60, 10, 5, 12, 56, 52, 2, 6, 55, 1, 44, 51, 0, 48, 11, 4, 46, 3, 35, 36, 42, 13, 17, 33, 15, 34, 23, 32, 20, 18, 24, 43, 22, 37, 49, 25, 45, 38, 27, 28, 47, 26, 53, 41, 29, 50, 30, 31, 40, 39}}, /* 1BE */ {3, 16, {61, 9, 63, 14, 7, 8, 58, 62, 21, 54, 19, 60, 5, 12, 56, 2, 6, 55, 44, 51, 0, 11, 4, 46, 35, 36, 42, 17, 33, 15, 23, 32, 20, 24, 43, 22, 49, 25, 45, 27, 28, 47, 53, 41, 29, 30, 31, 40}}, /* 1BF */ {3, 21, {19, 9, 60, 14, 62, 59, 58, 63, 56, 6, 7, 57, 54, 8, 10, 5, 61, 52, 51, 16, 0, 2, 12, 48, 44, 55, 3, 4, 1, 42, 36, 46, 13, 35, 11, 15, 17, 34, 18, 23, 32, 33, 38, 20, 37, 43, 25, 22, 47, 26, 45, 49, 28, 24, 27, 39, 50, 41, 40, 29, 31, 30, 53}}, - /* 1BG */ {3, 16, {61, 9, 63, 14, 7, 8, 58, 62, 21, 54, 19, 60, 5, 12, 56, 2, 6, 55, 44, 51, 0, 11, 4, 46, 35, 36, 42, 17, 33, 15, 23, 32, 20, 24, 43, 22, 49, 25, 45, 27, 28, 47, 53, 41, 29, 30, 31, 40}}, + /* 1BG */ {3, 16, {7, 9, 63, 61, 62, 60, 19, 21, 16, 58, 56, 57, 12, 6, 55, 54, 52, 0, 51, 48, 46, 2, 4, 42, 44, 13, 35, 11, 34, 33, 17, 32, 20, 23, 22, 43, 25, 47, 45, 49, 50, 27, 31, 29, 53, 40, 30, 39}}, /* 1BH */ {3, 16, {7, 9, 63, 61, 62, 60, 19, 21, 16, 58, 56, 57, 12, 6, 55, 54, 52, 0, 51, 48, 46, 2, 4, 42, 44, 13, 35, 11, 34, 33, 17, 32, 20, 23, 22, 43, 25, 47, 45, 49, 50, 27, 31, 29, -1, 40, 30, -1}}, /* 1BI */ {2, 16, {7, 9, 61, 62, 19, 21, 58, 56, 12, 6, 54, 52, 51, 48, 2, 4, 44, 13, 11, 34, 17, 32, 23, 22, 25, 47, 49, 50, 31, 29, 40, 30}}, - /* 1BG */ {1, 16, {59, 16, 57, 10, 52, 1, 48, 3, 13, 34, 18, 37, 38, 26, 50, 39}}}, + /* 1BG */ {1, 16, {8, 59, 14, 10, 5, 1, 3, 36, 15, 18, 37, 38, 24, 26, 28, 41}}}, /* PS */ {{0.63, 0.42}, {0.84, 0.42}, From 930d71c7dfe23e31690f098355559cf53163529a Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 25 Feb 2025 23:40:18 +0100 Subject: [PATCH 0118/1914] [MCH] fixes to the ST2 pads mapping (#13938) The relation between pad positions and electronics channels was wrong for five consecutive motif types on the bending side of ST2 quadrants (types "2Bv1" to "2Bv5"). The code is generated from the changes applied in https://github.com/mrrtf/alo/pull/74 For more details see https://its.cern.ch/jira/browse/MCH-5 --- .../src/GenCathodeSegmentationCreatorForSegType1.cxx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx index 48e7cf98469fa..d5cdd2a5f4624 100644 --- a/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx +++ b/Detectors/MUON/MCH/Mapping/Impl4/src/GenCathodeSegmentationCreatorForSegType1.cxx @@ -290,11 +290,11 @@ CathodeSegmentation* createSegType1(bool isBendingPlane) /* 2Bp1 */ {5, 16, {-1, -1, -1, 28, 27, -1, -1, 40, 30, 26, -1, 53, 29, 39, 47, -1, 49, 50, 31, 22, -1, 45, 25, 41, 38, -1, 20, 43, 24, 37, -1, 32, 18, 23, 15, -1, 34, 33, 17, 11, -1, 13, 35, 36, 4, -1, 3, 42, 2, 46, -1, 44, 1, 48, 5, -1, 0, 6, 55, 51, -1, 52, 12, 57, 54, 10, 16, 19, 8, 14, 56, 60, 21, 62, 58, 7, 61, 9, 63, 59}}, /* 2Bt1 */ {18, 8, {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45, 53, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 32, 22, 27, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 42, 11, 17, 43, 50, 40, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 51, 1, 35, 33, 38, 49, 39, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 0, 3, 34, 18, 24, 28, -1, -1, -1, -1, -1, -1, -1, -1, 10, 12, 61, 56, 52, 44, 13, 20, 25, 29, -1, -1, -1, -1, -1, -1, -1, 8, 63, 7, 62, 58, 55, 46, 15, 23, 26, 30, 31, 16, 19, 21, 9, 60, 59, 57, 14, 5, 54, 48, 2, 4, 36, 37, 47, 41, -1}}, /* 2Bu1 */ {4, 15, {20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, - /* 2Bv1 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv2 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv3 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv4 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}, - /* 2Bv5 */ {4, 16, {26, 27, 29, 43, 47, 28, 40, 20, 22, 30, 50, 32, 15, 39, 53, 33, 38, 31, 25, 13, 37, 41, 49, 35, 4, 24, 45, 0, 11, 23, 18, 42, 46, 17, 34, 1, 5, 36, 3, 6, 51, 2, 44, 10, 54, 48, 52, 16, 14, 55, 12, 19, 58, 57, 56, 60, 59, 8, 21, 61, 7, 62, 63, 9}}}, + /* 2Bv1 */ {4, 16, {43, 29, 27, 26, 20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, + /* 2Bv2 */ {4, 16, {43, 29, 27, 26, 20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, + /* 2Bv3 */ {4, 16, {43, 29, 27, 26, 20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, + /* 2Bv4 */ {4, 16, {43, 29, 27, 26, 20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}, + /* 2Bv5 */ {4, 16, {43, 29, 27, 26, 20, 40, 28, 47, 32, 50, 30, 22, 33, 53, 39, 15, 13, 25, 31, 38, 35, 49, 41, 37, 0, 45, 24, 4, 42, 18, 23, 11, 1, 34, 17, 46, 6, 3, 36, 5, 10, 44, 2, 51, 16, 52, 48, 54, 19, 12, 55, 14, 60, 56, 57, 58, 61, 21, 8, 59, 9, 63, 62, 7}}}, /* PS */ {{0.75, 0.5}, {1.5, 0.5}, From 7f30b03c0e393cfe48e0dd2fd399d9cf66437d58 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 25 Feb 2025 23:41:12 +0100 Subject: [PATCH 0119/1914] MCH: introduce digit modifier in filtering workflow (#13924) * [MCH] introduce digit modifier in filtering workflow The digit modifier allows to change the contents of the digits in the filtering step. It is introduced in order to correct some mapping issues in the CTFs already collected, but the interface is general, and in the future it might be used for any kind of digit manipulation, if needed. * [MCH] added pad remapping function for ST1 The remapping function corrects the pads mapping for seven DS boards on the edge of the bending cathodes, all corresponding to the motif type "1BG". * [MCH] added pad remapping function for ST2 The remapping function corrects the pads mapping for five consecutive motif types on the bending side of ST2 quadrants (types "2Bv1" to "2Bv5"). --- .../MUON/MCH/DigitFiltering/CMakeLists.txt | 4 +- .../include/MCHDigitFiltering/DigitModifier.h | 28 ++ .../MCHDigitFiltering/DigitModifierParam.h | 35 ++ .../DigitFiltering/src/DigitFilteringSpec.cxx | 36 +- .../MCH/DigitFiltering/src/DigitModifier.cxx | 325 ++++++++++++++++++ .../DigitFiltering/src/DigitModifierParam.cxx | 15 + .../src/MCHDigitFilteringLinkDef.h | 3 + 7 files changed, 441 insertions(+), 5 deletions(-) create mode 100644 Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifier.h create mode 100644 Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifierParam.h create mode 100644 Detectors/MUON/MCH/DigitFiltering/src/DigitModifier.cxx create mode 100644 Detectors/MUON/MCH/DigitFiltering/src/DigitModifierParam.cxx diff --git a/Detectors/MUON/MCH/DigitFiltering/CMakeLists.txt b/Detectors/MUON/MCH/DigitFiltering/CMakeLists.txt index f6d7fbd03701d..14e920debd441 100644 --- a/Detectors/MUON/MCH/DigitFiltering/CMakeLists.txt +++ b/Detectors/MUON/MCH/DigitFiltering/CMakeLists.txt @@ -13,6 +13,8 @@ o2_add_library(MCHDigitFiltering SOURCES src/DigitFilter.cxx src/DigitFilterParam.cxx + src/DigitModifier.cxx + src/DigitModifierParam.cxx src/DigitFilteringSpec.cxx PUBLIC_LINK_LIBRARIES O2::Framework @@ -27,4 +29,4 @@ o2_add_executable( COMPONENT_NAME mch PUBLIC_LINK_LIBRARIES O2::MCHDigitFiltering) -o2_target_root_dictionary(MCHDigitFiltering HEADERS include/MCHDigitFiltering/DigitFilterParam.h) +o2_target_root_dictionary(MCHDigitFiltering HEADERS include/MCHDigitFiltering/DigitFilterParam.h include/MCHDigitFiltering/DigitModifierParam.h) diff --git a/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifier.h b/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifier.h new file mode 100644 index 0000000000000..0177ea134ab1d --- /dev/null +++ b/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifier.h @@ -0,0 +1,28 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_MCH_DIGITFILTERING_DIGITMODIFIER_H_ +#define O2_MCH_DIGITFILTERING_DIGITMODIFIER_H_ + +#include "DataFormatsMCH/Digit.h" +#include + +namespace o2::mch +{ +typedef std::function DigitModifier; + +DigitModifier createDigitModifier(int runNumber, + bool updateST1, + bool updateST2); + +} // namespace o2::mch + +#endif diff --git a/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifierParam.h b/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifierParam.h new file mode 100644 index 0000000000000..dc95396835f33 --- /dev/null +++ b/Detectors/MUON/MCH/DigitFiltering/include/MCHDigitFiltering/DigitModifierParam.h @@ -0,0 +1,35 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_MCH_DIGITFILTERING_DIGIT_MODIFIER_PARAM_H_ +#define O2_MCH_DIGITFILTERING_DIGIT_MODIFIER_PARAM_H_ + +#include "CommonUtils/ConfigurableParam.h" +#include "CommonUtils/ConfigurableParamHelper.h" + +namespace o2::mch +{ + +/** + * @class DigitModifierParam + * @brief Configurable parameters for the digit updating + */ +struct DigitModifierParam : public o2::conf::ConfigurableParamHelper { + + bool updateST1 = false; ///< whether or not to modify ST1 digits + bool updateST2 = false; ///< whether or not to modify ST2 digits + + O2ParamDef(DigitModifierParam, "MCHDigitModifier"); +}; + +} // namespace o2::mch + +#endif diff --git a/Detectors/MUON/MCH/DigitFiltering/src/DigitFilteringSpec.cxx b/Detectors/MUON/MCH/DigitFiltering/src/DigitFilteringSpec.cxx index fe40659bc9265..f43b04369bc6e 100644 --- a/Detectors/MUON/MCH/DigitFiltering/src/DigitFilteringSpec.cxx +++ b/Detectors/MUON/MCH/DigitFiltering/src/DigitFilteringSpec.cxx @@ -23,6 +23,8 @@ #include "MCHStatus/StatusMap.h" #include "MCHDigitFiltering/DigitFilter.h" #include "MCHDigitFiltering/DigitFilterParam.h" +#include "MCHDigitFiltering/DigitModifier.h" +#include "MCHDigitFiltering/DigitModifierParam.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" #include @@ -48,6 +50,10 @@ class DigitFilteringTask mRejectBackground = DigitFilterParam::Instance().rejectBackground; mStatusMask = DigitFilterParam::Instance().statusMask; mTimeCalib = DigitFilterParam::Instance().timeOffset; + + mUpdateDigitsST1 = DigitModifierParam::Instance().updateST1; + mUpdateDigitsST2 = DigitModifierParam::Instance().updateST2; + auto stop = [this]() { LOG(info) << "digit filtering duration = " << std::chrono::duration(mElapsedTime).count() << " ms"; @@ -82,6 +88,11 @@ class DigitFilteringTask auto tStart = std::chrono::high_resolution_clock::now(); + const auto& tinfo = pc.services().get(); + if (tinfo.runNumber != 0) { + mRunNumber = tinfo.runNumber; + } + if (mSanityCheck) { LOGP(info, "performing sanity checks"); auto error = sanityCheck(iRofs, iDigits); @@ -101,8 +112,12 @@ class DigitFilteringTask auto oLabels = mUseMC ? &pc.outputs().make>(OutputRef{"labels"}) : nullptr; if (!abort) { - bool selectSignal = false; + mDigitModifier = createDigitModifier(mRunNumber, + mUpdateDigitsST1, + mUpdateDigitsST2); + + bool selectSignal = false; mIsGoodDigit = createDigitFilter(mMinADC, mRejectBackground, selectSignal, @@ -114,20 +129,29 @@ class DigitFilteringTask // the clustering resolution will suffer. // That's why we only apply the "reject background" filter, which // is a loose background cut that does not penalize the signal + int cursor{0}; for (const auto& irof : iRofs) { const auto digits = iDigits.subspan(irof.getFirstIdx(), irof.getNEntries()); // filter the digits from the current ROF for (auto i = 0; i < digits.size(); i++) { - const auto& d = digits[i]; - if (mIsGoodDigit(d)) { - oDigits.emplace_back(d); + auto digit = digits[i]; + + // modify the digit if needed + if (mDigitModifier) { + mDigitModifier(digit); + } + + // check the digit quality + if (mIsGoodDigit(digit)) { + oDigits.emplace_back(digit); if (iLabels) { oLabels->addElements(oLabels->getIndexedSize(), iLabels->getLabels(i + irof.getFirstIdx())); } } } + int nofGoodDigits = oDigits.size() - cursor; if (nofGoodDigits > 0) { // we create an ouput ROF only if at least one digit from @@ -160,6 +184,7 @@ class DigitFilteringTask } private: + int mRunNumber{0}; bool mRejectBackground{false}; bool mSanityCheck{false}; bool mUseMC{false}; @@ -167,7 +192,10 @@ class DigitFilteringTask int mMinADC{1}; int32_t mTimeCalib{0}; uint32_t mStatusMask{0}; + bool mUpdateDigitsST1{false}; + bool mUpdateDigitsST2{false}; DigitFilter mIsGoodDigit; + DigitModifier mDigitModifier; std::chrono::duration mElapsedTime{}; }; diff --git a/Detectors/MUON/MCH/DigitFiltering/src/DigitModifier.cxx b/Detectors/MUON/MCH/DigitFiltering/src/DigitModifier.cxx new file mode 100644 index 0000000000000..0aa885e053dc9 --- /dev/null +++ b/Detectors/MUON/MCH/DigitFiltering/src/DigitModifier.cxx @@ -0,0 +1,325 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "MCHDigitFiltering/DigitModifier.h" + +#include "DataFormatsMCH/Digit.h" +#include "MCHMappingInterface/Segmentation.h" +#include +#include +#include +#include +#include +#include +#include + +namespace +{ +using PadRemappingTable = std::unordered_map; +using PadRemappingTableWithLimits = std::pair>; +using PadRemappingTablesForDE = std::vector; +using PadRemappingTables = std::unordered_map; + +// utility function that updates a digit with a given pad remapping table +bool updateDigitMapping(o2::mch::Digit& digit, const PadRemappingTables& padsRemapping) +{ + int deId = digit.getDetID(); + // check if the current DE is included in the pad remapping table + auto padsRemappingForDE = padsRemapping.find(deId); + if (padsRemappingForDE == padsRemapping.end()) { + return false; + } + + // find the remapping table that contains this padId, if existing + int padId = digit.getPadID(); + for (auto& padsRemappingForDS : padsRemappingForDE->second) { + if (padId < padsRemappingForDS.second.first || padId > padsRemappingForDS.second.second) { + continue; + } + + auto padIDRemapped = padsRemappingForDS.first.find(digit.getPadID()); + if (padIDRemapped == padsRemappingForDS.first.end()) { + continue; + } + + // update the digit + digit.setPadID(padIDRemapped->second); + return true; + } + return false; +} + +/** Initialization of the pad remapping table for Station 1 DEs + * See https://its.cern.ch/jira/browse/MCH-4 for detals + */ +void initST1PadsRemappingTable(PadRemappingTables& fullTable) +{ + std::array deToRemap{100, 101, 102, 103, 200, 201, 202, 203}; + std::array dsToRemap{1, 27, 53, 79, 105, 131, 157}; + + std::vector newToOld(64); + newToOld[0] = 55; + newToOld[1] = 1; + newToOld[2] = 11; + newToOld[3] = 48; + newToOld[4] = 4; + newToOld[5] = 52; + newToOld[6] = 12; + newToOld[7] = 61; + newToOld[8] = 59; + newToOld[9] = 9; + newToOld[10] = 10; + newToOld[11] = 17; + newToOld[12] = 5; + newToOld[13] = 36; + newToOld[14] = 57; + newToOld[15] = 13; + newToOld[16] = 21; + newToOld[17] = 23; + newToOld[18] = 34; + newToOld[19] = 58; + newToOld[20] = 20; + newToOld[21] = 62; + newToOld[22] = 43; + newToOld[23] = 24; + newToOld[24] = 38; + newToOld[25] = 49; + newToOld[26] = 26; + newToOld[27] = 47; + newToOld[28] = 50; + newToOld[29] = 41; + newToOld[30] = 31; + newToOld[31] = 53; + newToOld[32] = 32; + newToOld[33] = 15; + newToOld[34] = 33; + newToOld[35] = 42; + newToOld[36] = 3; + newToOld[37] = 18; + newToOld[38] = 37; + newToOld[39] = 40; + newToOld[40] = 30; + newToOld[41] = 39; + newToOld[42] = 46; + newToOld[43] = 22; + newToOld[44] = 35; + newToOld[45] = 45; + newToOld[46] = 0; + newToOld[47] = 25; + newToOld[48] = 51; + newToOld[49] = 27; + newToOld[50] = 28; + newToOld[51] = 44; + newToOld[52] = 6; + newToOld[53] = 29; + newToOld[54] = 2; + newToOld[55] = 56; + newToOld[56] = 19; + newToOld[57] = 60; + newToOld[58] = 54; + newToOld[59] = 16; + newToOld[60] = 8; + newToOld[61] = 14; + newToOld[62] = 7; + newToOld[63] = 63; + + for (auto deId : deToRemap) { + + // create an empty table, or reset the existing one + fullTable[deId] = PadRemappingTablesForDE(); + // get a reference to the table for the current DE + auto& tableForDE = fullTable[deId]; + + const o2::mch::mapping::Segmentation& segment = o2::mch::mapping::segmentation(deId); + for (auto dsId : dsToRemap) { + // add an empty table for the currend DS board + auto& tableForDSWithLimits = tableForDE.emplace_back(); + auto& tableForDS = tableForDSWithLimits.first; + + int padIdMin = std::numeric_limits::max(); + int padIdMax = -1; + for (int channel = 0; channel < 64; channel++) { + // get the pad ID associated to the channel in the new mapping + // this IS NOT the pad that originally fired + int padId = segment.findPadByFEE(dsId, channel); + // get the corresponding channel number in the old mapping + // this IS the electronic channel that originally fired + int channelInOldMapping = newToOld[channel]; + // get the pad ID associated to the fired channel in the new mapping + int padIdRemapped = segment.findPadByFEE(dsId, channelInOldMapping); + // update the pad remapping table + tableForDS[padId] = padIdRemapped; + + padIdMin = std::min(padIdMin, padId); + padIdMax = std::max(padIdMax, padId); + } + + tableForDSWithLimits.second.first = padIdMin; + tableForDSWithLimits.second.second = padIdMax; + } + } +} + +o2::mch::DigitModifier createST1MappingCorrector(int runNumber) +{ + static PadRemappingTables padsRemapping; + + constexpr int lastRunToBeFixed = 560402; + // ST2 mapping needs to be corrected only for data collected up to the end of 2024 Pb-Pb + if (runNumber > lastRunToBeFixed) { + // do not modify digits collected after 2024 Pb-Pb + return {}; + } + + if (padsRemapping.empty()) { + initST1PadsRemappingTable(padsRemapping); + } + + return [](o2::mch::Digit& digit) { + updateDigitMapping(digit, padsRemapping); + }; +} + +/** Initialization of the pad remapping table for Station 2 DEs + * See https://its.cern.ch/jira/browse/MCH-5 for details + */ +void initST2PadsRemappingTable(PadRemappingTables& fullTable) +{ + // Remapping of ST2 DS boards near the rounded part + std::array deToRemap{300, 301, 302, 303, 400, 401, 402, 403}; + std::array dsToRemap{99, 100, 101, 102, 103}; + + for (auto deId : deToRemap) { + + // create an empty table, or reset the existing one + fullTable[deId] = PadRemappingTablesForDE(); + // get a reference to the table for the current DE + auto& tableForDE = fullTable[deId]; + + const o2::mch::mapping::Segmentation& segment = o2::mch::mapping::segmentation(deId); + for (auto dsId : dsToRemap) { + + auto& tableForDSWithLimits = tableForDE.emplace_back(); + auto& tableForDS = tableForDSWithLimits.first; + + // double loop on DS channels + // 1. find the minimum pad index of the DS board + int padIdMin = -1; + int channelForPadIdMin = -1; + for (int channel = 0; channel < 64; channel++) { + auto padId = segment.findPadByFEE(dsId, int(channel)); + if (padId < 0) { + // this should never occur in this specific case, as all channels of this group of boards + // is connected to pads, hence we rise an exception + throw std::out_of_range(fmt::format("Unknown padId for DE{} DS{} channel {}", deId, dsId, channel)); + } + if (padIdMin < 0 || padId < padIdMin) { + padIdMin = padId; + channelForPadIdMin = channel; + } + } + + int padIdMax = -1; + // 2. build the re-mapping table + for (int channel = 0; channel < 64; channel++) { + auto padId = segment.findPadByFEE(dsId, int(channel)); + if (padId < padIdMin) { + // something is wrong here... + continue; + } + + // update maximum padId value + padIdMax = std::max(padIdMax, padId); + + int padIdInDS = padId - padIdMin; + int padColumn = padIdInDS / 16; + int padRow = padIdInDS % 16; + + int padIdRemapped = -1; + + switch (padColumn) { + case 0: + // shift right by 3 columns + padIdRemapped = padId + 16 * 3; + break; + case 1: + // shift right by 1 column + padIdRemapped = padId + 16; + break; + case 2: + // shift left by 1 column + padIdRemapped = padId - 16; + break; + case 3: + // shift left by 3 columns + padIdRemapped = padId - 16 * 3; + break; + } + + // padsRemapping[deId][padId] = padIdRemapped; + tableForDS[padId] = padIdRemapped; + } + + tableForDSWithLimits.second.first = padIdMin; + tableForDSWithLimits.second.second = padIdMax; + } + } +} + +o2::mch::DigitModifier createST2MappingCorrector(int runNumber) +{ + // static std::unordered_map> padsRemapping; + static PadRemappingTables padsRemapping; + + constexpr int lastRunToBeFixed = 560402; + // ST2 mapping needs to be corrected only for data collected up to the end of 2024 Pb-Pb + if (runNumber > lastRunToBeFixed) { + // do not modify digits collected after 2024 Pb-Pb + return {}; + } + + if (padsRemapping.empty()) { + initST2PadsRemappingTable(padsRemapping); + } + + return [](o2::mch::Digit& digit) { + updateDigitMapping(digit, padsRemapping); + }; +} +} // namespace + +namespace o2::mch +{ +DigitModifier createDigitModifier(int runNumber, + bool updateST1, + bool updateST2) +{ + DigitModifier modifierST1 = updateST1 ? createST1MappingCorrector(runNumber) : DigitModifier{}; + DigitModifier modifierST2 = updateST2 ? createST2MappingCorrector(runNumber) : DigitModifier{}; + + if (modifierST1 || modifierST2) { + return [modifierST1, modifierST2](Digit& digit) { + // the ST1/ST2 modifiers are mutually exclusive, depending on the DeID associated to the digit + auto detID = digit.getDetID(); + if (modifierST1 && detID >= 100 && detID < 300) { + modifierST1(digit); + } + if (modifierST2 && detID >= 300 && detID < 500) { + modifierST2(digit); + } + }; + } else { + // return an empty function if none of the modifiers is set + return {}; + } +} + +} // namespace o2::mch diff --git a/Detectors/MUON/MCH/DigitFiltering/src/DigitModifierParam.cxx b/Detectors/MUON/MCH/DigitFiltering/src/DigitModifierParam.cxx new file mode 100644 index 0000000000000..c10a8a87d6bd7 --- /dev/null +++ b/Detectors/MUON/MCH/DigitFiltering/src/DigitModifierParam.cxx @@ -0,0 +1,15 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "MCHDigitFiltering/DigitModifierParam.h" +#include "CommonUtils/ConfigurableParam.h" + +O2ParamImpl(o2::mch::DigitModifierParam) diff --git a/Detectors/MUON/MCH/DigitFiltering/src/MCHDigitFilteringLinkDef.h b/Detectors/MUON/MCH/DigitFiltering/src/MCHDigitFilteringLinkDef.h index 1182d37654c37..c4de20393fbe0 100644 --- a/Detectors/MUON/MCH/DigitFiltering/src/MCHDigitFilteringLinkDef.h +++ b/Detectors/MUON/MCH/DigitFiltering/src/MCHDigitFilteringLinkDef.h @@ -18,4 +18,7 @@ #pragma link C++ class o2::mch::DigitFilterParam + ; #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::mch::DigitFilterParam> + ; +#pragma link C++ class o2::mch::DigitModifierParam + ; +#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::mch::DigitModifierParam> + ; + #endif From ac4411206217e01c803c0f4877f4c644fd408f95 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Tue, 25 Feb 2025 23:41:31 +0100 Subject: [PATCH 0120/1914] [MCH] fix for the DE600 electronics mapping (#13921) The readout cables for the TB2 and TN2 groups are swapped for DE600, since the beginning of Run3. The mapping is corrected to reflect this swap. For details see https://its.cern.ch/jira/browse/MCH-11 --- Detectors/MUON/MCH/Raw/ElecMap/src/CH6R.cxx | 18 +++++++++--------- Detectors/MUON/MCH/Raw/ElecMap/src/fec.map | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Detectors/MUON/MCH/Raw/ElecMap/src/CH6R.cxx b/Detectors/MUON/MCH/Raw/ElecMap/src/CH6R.cxx index ff6d6863d2f02..21d15e484dfb3 100644 --- a/Detectors/MUON/MCH/Raw/ElecMap/src/CH6R.cxx +++ b/Detectors/MUON/MCH/Raw/ElecMap/src/CH6R.cxx @@ -286,11 +286,11 @@ void fillElec2DetCH6R(std::map& e2d) add(e2d, 600, 3, 218, 0, 0); add(e2d, 600, 2, 218, 0, 1); add(e2d, 600, 1, 218, 0, 2); - add(e2d, 600, 10, 218, 2, 0); - add(e2d, 600, 9, 218, 2, 1); - add(e2d, 600, 8, 218, 2, 2); - add(e2d, 600, 7, 218, 2, 3); - add(e2d, 600, 6, 218, 2, 4); + add(e2d, 600, 10, 219, 2, 0); + add(e2d, 600, 9, 219, 2, 1); + add(e2d, 600, 8, 219, 2, 2); + add(e2d, 600, 7, 219, 2, 3); + add(e2d, 600, 6, 219, 2, 4); add(e2d, 600, 15, 218, 4, 0); add(e2d, 600, 14, 218, 4, 1); add(e2d, 600, 13, 218, 4, 2); @@ -308,9 +308,9 @@ void fillElec2DetCH6R(std::map& e2d) add(e2d, 600, 104, 218, 3, 4); add(e2d, 600, 1028, 219, 0, 0); add(e2d, 600, 1029, 219, 0, 1); - add(e2d, 600, 1044, 219, 2, 0); - add(e2d, 600, 1045, 219, 2, 1); - add(e2d, 600, 1046, 219, 2, 2); + add(e2d, 600, 1044, 218, 2, 0); + add(e2d, 600, 1045, 218, 2, 1); + add(e2d, 600, 1046, 218, 2, 2); add(e2d, 600, 1040, 219, 4, 0); add(e2d, 600, 1041, 219, 4, 1); add(e2d, 600, 1042, 219, 4, 2); @@ -659,4 +659,4 @@ void fillSolar2FeeLinkCH6R(std::map& s2f) add_cru(s2f, 29, 8, 410); add_cru(s2f, 29, 9, 411); add_cru(s2f, 29, 10, 412); -} \ No newline at end of file +} diff --git a/Detectors/MUON/MCH/Raw/ElecMap/src/fec.map b/Detectors/MUON/MCH/Raw/ElecMap/src/fec.map index 41dbb5c1eca36..58a6971df900b 100644 --- a/Detectors/MUON/MCH/Raw/ElecMap/src/fec.map +++ b/Detectors/MUON/MCH/Raw/ElecMap/src/fec.map @@ -658,12 +658,12 @@ 217 4 601 1159 1158 1157 1156 0 218 0 600 3 2 1 0 0 218 1 600 116 115 114 113 112 -218 2 600 10 9 8 7 6 +219 2 600 10 9 8 7 6 218 3 600 108 107 106 105 104 218 4 600 15 14 13 12 11 219 0 600 1028 1029 0 0 0 219 1 600 1133 1134 1135 0 0 -219 2 600 1044 1045 1046 0 0 +218 2 600 1044 1045 1046 0 0 219 3 600 1125 1126 1127 0 0 219 4 600 1040 1041 1042 1043 0 220 0 600 304 305 306 307 0 From 4c9c69757766a9c474d1fbbedae3f80bbe0fdb2b Mon Sep 17 00:00:00 2001 From: pillot Date: Tue, 25 Feb 2025 23:48:18 +0100 Subject: [PATCH 0121/1914] remove duplicates before uploading (#14003) --- .../MUON/MCH/include/DataFormatsMCH/DsChannelId.h | 8 ++++++++ Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/DataFormats/Detectors/MUON/MCH/include/DataFormatsMCH/DsChannelId.h b/DataFormats/Detectors/MUON/MCH/include/DataFormatsMCH/DsChannelId.h index 6485186b66463..cd11d4b71389c 100644 --- a/DataFormats/Detectors/MUON/MCH/include/DataFormatsMCH/DsChannelId.h +++ b/DataFormats/Detectors/MUON/MCH/include/DataFormatsMCH/DsChannelId.h @@ -61,5 +61,13 @@ class DsChannelId ClassDefNV(DsChannelId, 1); // class for MCH readout channel }; + +inline bool operator==(const DsChannelId& a, const DsChannelId& b) { return a.value() == b.value(); } +inline bool operator!=(const DsChannelId& a, const DsChannelId& b) { return !(a == b); } +inline bool operator<(const DsChannelId& a, const DsChannelId& b) { return a.value() < b.value(); } +inline bool operator>(const DsChannelId& a, const DsChannelId& b) { return b < a; } +inline bool operator<=(const DsChannelId& a, const DsChannelId& b) { return !(a > b); } +inline bool operator>=(const DsChannelId& a, const DsChannelId& b) { return !(a < b); } + } // namespace o2::mch #endif diff --git a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx index d453277aa644c..d355a209329ca 100644 --- a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx @@ -10,6 +10,7 @@ // or submit itself to any jurisdiction. #include +#include #include #include #include @@ -209,9 +210,12 @@ void uploadBadChannels(const std::string ccdbUrl, const std::string badChannelType, uint64_t startTimestamp, uint64_t endTimestamp, - const BadChannelsVector& bv, + BadChannelsVector& bv, bool makeDefault) { + std::sort(bv.begin(), bv.end()); + bv.erase(std::unique(bv.begin(), bv.end()), bv.end()); + std::cout << std::endl; o2::ccdb::CcdbApi api; api.init(ccdbUrl); From 7c2439ab343144d6707d1f0b1ec56cceb36c82e6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Feb 2025 23:56:17 +0100 Subject: [PATCH 0122/1914] GPU OpenCL: Improve device detection and do not consider platforms/devices, which do not support SPIR-V --- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 5fac46a214815..078df45145d14 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -457,7 +457,11 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t if (stream == -1) { SynchronizeGPU(); } - if (toGPU == -2) { + if (size == 0) { + if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0 + GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); + } + } else if (toGPU == -2) { GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); } else if (toGPU) { GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); From ee696bbfd9dc6b2d856578867ee5b5f57d4646cf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 00:52:25 +0100 Subject: [PATCH 0123/1914] GPU: runKernel* functions do anyway return always 0, so remove return value --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 14 ++++----- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 29 +++++++++---------- .../Base/GPUReconstructionKernels.h | 4 +-- .../Base/cuda/GPUReconstructionCUDA.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 7 ++--- .../Base/opencl/GPUReconstructionOCL.cxx | 4 +-- .../Base/opencl/GPUReconstructionOCL.h | 4 +-- .../opencl/GPUReconstructionOCLInternals.h | 3 +- GPU/GPUTracking/Global/GPUChain.h | 2 +- 9 files changed, 32 insertions(+), 37 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d95a57c8f2063..969dd06d6297e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -56,7 +56,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU() } template -inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -90,11 +90,10 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS } } } - return 0; } template <> -inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { int32_t nnThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); if (nnThreads > 1) { @@ -112,13 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal -int32_t GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) { - return std::apply([this, &args](auto&... vals) { return runKernelBackendInternal(args.s, vals...); }, args.v); + std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); } template @@ -127,8 +125,8 @@ krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend() return krnlProperties{1, 1}; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - template int32_t GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend(); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index b6225999c68a0..7901c34866c66 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -35,9 +35,9 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing protected: GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {} template - int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); + void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); template - int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); + void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); }; @@ -53,7 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels - int32_t runKernel(krnlSetup&& setup, Args&&... args); + void runKernel(krnlSetup&& setup, Args&&... args); template const gpu_reconstruction_kernels::krnlProperties getKernelProperties() { @@ -77,14 +77,14 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ - { \ - if (cpuFallback) { \ - return GPUReconstructionCPU::runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ - } else { \ - return runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ - } \ +#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \ + inline void runKernelImplWrapper(gpu_reconstruction_kernels::classArgument, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ + { \ + if (cpuFallback) { \ + GPUReconstructionCPU::runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + } else { \ + runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + } \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -131,7 +131,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels -inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) +inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) { HighResTimer* t = nullptr; GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep(); @@ -164,7 +164,7 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args GPUInfo("Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); } if (nThreads == 0 || nBlocks == 0) { - return 0; + return; } if (mProcessingSettings.debugLevel >= 1) { t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); @@ -173,7 +173,7 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args } } double deviceTimerTime = 0.; - int32_t retVal = runKernelImplWrapper(gpu_reconstruction_kernels::classArgument(), cpuFallback, deviceTimerTime, std::forward(setup), std::forward(args)...); + runKernelImplWrapper(gpu_reconstruction_kernels::classArgument(), cpuFallback, deviceTimerTime, std::forward(setup), std::forward(args)...); if (GPUDebug(GetKernelName(), stream, mProcessingSettings.serializeGPU & 1)) { throw std::runtime_error("kernel failure"); } @@ -192,7 +192,6 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args throw std::runtime_error("kernel error code"); } } - return retVal; } } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index e95a59df6cfd5..d541e36a06af9 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -99,9 +99,9 @@ class GPUReconstructionKernels : public T using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; #define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \ - virtual int32_t runKernelImpl(const krnlSetupArgs& args) \ + virtual void runKernelImpl(const krnlSetupArgs& args) \ { \ - return T::template runKernelBackend(args); \ + T::template runKernelBackend(args); \ } \ virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument) \ { \ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index f14696a92a5b0..ee2f069028d74 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -42,7 +42,7 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void PrintKernelOccupancies() override; template - int32_t runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupArgs& args); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 2e695b49ebb6c..70834d03992d0 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -67,7 +67,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet } template -int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args) { auto& x = args.s.x; auto& z = args.s.z; @@ -84,7 +84,6 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgsStreams[x.stream])); } - return 0; } #undef GPUCA_KRNL_REG @@ -93,7 +92,7 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); + template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #else #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY @@ -102,7 +101,7 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); + template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #ifndef __HIPCC__ // CUDA version #define GPUCA_KRNL_CALL_single(x_class, ...) \ GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, args...); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 078df45145d14..6571583d27a3a 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -559,10 +559,10 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, } template -int32_t GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) { cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); - return std::apply([this, &args, &k](auto&... vals) { return runKernelBackendInternal(args.s, k, vals...); }, args.v); + std::apply([this, &args, &k](auto&... vals) { runKernelBackendInternal(args.s, k, vals...); }, args.v); } template diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index fadb393277758..9216f1c6f7b76 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -60,7 +60,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase template uint32_t FindKernel(int32_t num); template - int32_t runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args); + void runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); @@ -68,7 +68,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase float mOclVersion; template - int32_t runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupArgs& args); template S& getKernelObject(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h index b47c612b192d7..83217fd8bb682 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h @@ -171,7 +171,7 @@ struct GPUReconstructionOCLInternals { }; template -inline int32_t GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args) +inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -202,7 +202,6 @@ inline int32_t GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlS GPUFailedMsg(clReleaseEvent(ev)); } } - return 0; } template diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index a7c582b79d964..e017d9b60a269 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -171,7 +171,7 @@ class GPUChain mRec->ReadStructFromFile(file, obj); } template - inline int32_t runKernel(gpu_reconstruction_kernels::krnlSetup&& setup, Args&&... args) + inline void runKernel(gpu_reconstruction_kernels::krnlSetup&& setup, Args&&... args) { return mRec->runKernel(std::forward(setup), std::forward(args)...); } From 72173760d217d10991ce1aa8aa4c0039094b7e6e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 00:55:04 +0100 Subject: [PATCH 0124/1914] GPU OpenCL: Reorganize OpenCL code and remove obsolete internals header --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 2 +- .../Base/opencl/GPUReconstructionOCL.cxx | 231 +++++++++++++++- .../Base/opencl/GPUReconstructionOCL.h | 4 +- .../opencl/GPUReconstructionOCLInternals.h | 246 ------------------ 4 files changed, 231 insertions(+), 252 deletions(-) delete mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index d6aa945fc77b7..9bd1da298e9d6 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -37,7 +37,7 @@ set(OCL_DEFINECL "-D$ +#include +#include +#include +#include +#include "GPULogging.h" + #include "GPUReconstructionOCL.h" -#include "GPUReconstructionOCLInternals.h" #include "GPUReconstructionIncludes.h" using namespace o2::gpu; @@ -26,6 +33,154 @@ using namespace o2::gpu; #include #include +namespace o2::gpu +{ + +static const char* opencl_error_string(int32_t errorcode) +{ + switch (errorcode) { + case CL_SUCCESS: + return "Success!"; + case CL_DEVICE_NOT_FOUND: + return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: + return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: + return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: + return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: + return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: + return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: + return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: + return "Program build failure"; + case CL_MAP_FAILURE: + return "Map failure"; + case CL_INVALID_VALUE: + return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: + return "Invalid device type"; + case CL_INVALID_PLATFORM: + return "Invalid platform"; + case CL_INVALID_DEVICE: + return "Invalid device"; + case CL_INVALID_CONTEXT: + return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: + return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: + return "Invalid command queue"; + case CL_INVALID_HOST_PTR: + return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: + return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: + return "Invalid image size"; + case CL_INVALID_SAMPLER: + return "Invalid sampler"; + case CL_INVALID_BINARY: + return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: + return "Invalid build options"; + case CL_INVALID_PROGRAM: + return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: + return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: + return "Invalid kernel definition"; + case CL_INVALID_KERNEL: + return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: + return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: + return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: + return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: + return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: + return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: + return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: + return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: + return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: + return "Invalid event wait list"; + case CL_INVALID_EVENT: + return "Invalid event"; + case CL_INVALID_OPERATION: + return "Invalid operation"; + case CL_INVALID_GL_OBJECT: + return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: + return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: + return "Invalid mip-map level"; + default: + return "Unknown Errorcode"; + } +} + +#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) +#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) + +static inline int64_t OCLsetKernelParameters_helper(cl_kernel& k, int32_t i) +{ + return 0; +} + +template +static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) +{ + int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); + if (retVal) { + return retVal; + } + return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...); +} + +template +static inline int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) +{ + return OCLsetKernelParameters_helper(kernel, 0, args...); +} + +static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr, cl_int nWaitEvents = 1) +{ + return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); +} + +struct GPUReconstructionOCLInternals { + cl_platform_id platform; + cl_device_id device; + cl_context context; + cl_command_queue command_queue[GPUCA_MAX_STREAMS]; + cl_mem mem_gpu; + cl_mem mem_constant; + cl_mem mem_host; + cl_program program; + + std::vector> kernels; +}; + +static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); +} // namespace o2::gpu + #define GPUErrorReturn(...) \ { \ GPUError(__VA_ARGS__); \ @@ -43,6 +198,77 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_src); QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv); #endif +template +inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +{ + cl_kernel k = _xyz.y.num > 1 ? getKernelObject() : getKernelObject(); + auto& x = _xyz.x; + auto& y = _xyz.y; + auto& z = _xyz.z; + if (y.num <= 1) { + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); + } else { + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); + } + + cl_event ev; + cl_event* evr; + bool tmpEvent = false; + if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + evr = &ev; + tmpEvent = true; + } else { + evr = (cl_event*)z.ev; + } + GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); + if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + cl_ulong time_start, time_end; + GPUFailedMsg(clWaitForEvents(1, evr)); + GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); + GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); + _xyz.t = (time_end - time_start) * 1.e-9f; + if (tmpEvent) { + GPUFailedMsg(clReleaseEvent(ev)); + } + } +} + +template +int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) +{ + std::string name(GetKernelName()); + if (multi) { + name += "_multi"; + } + std::string kname("krnl_" + name); + + cl_int ocl_error; + cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); + if (GPUFailedMsgI(ocl_error)) { + GPUError("Error creating OPENCL Kernel: %s", name.c_str()); + return 1; + } + mInternals->kernels.emplace_back(krnl, name); + return 0; +} + +template +inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) +{ + std::string name(GetKernelName()); + if (num > 1) { + name += "_multi"; + } + + for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { + if (mInternals->kernels[k].second == name) { + return (k); + } + } + GPUError("Could not find OpenCL kernel %s", name.c_str()); + throw ::std::runtime_error("Requested unsupported OpenCL kernel"); +} + GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); } GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) @@ -561,8 +787,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) { - cl_kernel k = args.s.y.num > 1 ? getKernelObject() : getKernelObject(); - std::apply([this, &args, &k](auto&... vals) { runKernelBackendInternal(args.s, k, vals...); }, args.v); + std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); } template diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 9216f1c6f7b76..cd8ffe8bccaa9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -59,8 +59,8 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t AddKernel(bool multi = false); template uint32_t FindKernel(int32_t num); - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args); + template + void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h deleted file mode 100644 index 83217fd8bb682..0000000000000 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLInternals.h +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionOCLInternals.h -/// \author David Rohr, Sergey Gorbunov - -// All OpenCL-header related stuff goes here, so we can run CING over GPUReconstructionOCL - -#ifndef GPUTPCGPUTRACKEROPENCLINTERNALS_H -#define GPUTPCGPUTRACKEROPENCLINTERNALS_H - -#define CL_TARGET_OPENCL_VERSION 220 -#include -#include -#include -#include -#include -#include "GPULogging.h" - -namespace o2::gpu -{ - -static const char* opencl_error_string(int32_t errorcode) -{ - switch (errorcode) { - case CL_SUCCESS: - return "Success!"; - case CL_DEVICE_NOT_FOUND: - return "Device not found."; - case CL_DEVICE_NOT_AVAILABLE: - return "Device not available"; - case CL_COMPILER_NOT_AVAILABLE: - return "Compiler not available"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - return "Memory object allocation failure"; - case CL_OUT_OF_RESOURCES: - return "Out of resources"; - case CL_OUT_OF_HOST_MEMORY: - return "Out of host memory"; - case CL_PROFILING_INFO_NOT_AVAILABLE: - return "Profiling information not available"; - case CL_MEM_COPY_OVERLAP: - return "Memory copy overlap"; - case CL_IMAGE_FORMAT_MISMATCH: - return "Image format mismatch"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: - return "Image format not supported"; - case CL_BUILD_PROGRAM_FAILURE: - return "Program build failure"; - case CL_MAP_FAILURE: - return "Map failure"; - case CL_INVALID_VALUE: - return "Invalid value"; - case CL_INVALID_DEVICE_TYPE: - return "Invalid device type"; - case CL_INVALID_PLATFORM: - return "Invalid platform"; - case CL_INVALID_DEVICE: - return "Invalid device"; - case CL_INVALID_CONTEXT: - return "Invalid context"; - case CL_INVALID_QUEUE_PROPERTIES: - return "Invalid queue properties"; - case CL_INVALID_COMMAND_QUEUE: - return "Invalid command queue"; - case CL_INVALID_HOST_PTR: - return "Invalid host pointer"; - case CL_INVALID_MEM_OBJECT: - return "Invalid memory object"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "Invalid image format descriptor"; - case CL_INVALID_IMAGE_SIZE: - return "Invalid image size"; - case CL_INVALID_SAMPLER: - return "Invalid sampler"; - case CL_INVALID_BINARY: - return "Invalid binary"; - case CL_INVALID_BUILD_OPTIONS: - return "Invalid build options"; - case CL_INVALID_PROGRAM: - return "Invalid program"; - case CL_INVALID_PROGRAM_EXECUTABLE: - return "Invalid program executable"; - case CL_INVALID_KERNEL_NAME: - return "Invalid kernel name"; - case CL_INVALID_KERNEL_DEFINITION: - return "Invalid kernel definition"; - case CL_INVALID_KERNEL: - return "Invalid kernel"; - case CL_INVALID_ARG_INDEX: - return "Invalid argument index"; - case CL_INVALID_ARG_VALUE: - return "Invalid argument value"; - case CL_INVALID_ARG_SIZE: - return "Invalid argument size"; - case CL_INVALID_KERNEL_ARGS: - return "Invalid kernel arguments"; - case CL_INVALID_WORK_DIMENSION: - return "Invalid work dimension"; - case CL_INVALID_WORK_GROUP_SIZE: - return "Invalid work group size"; - case CL_INVALID_WORK_ITEM_SIZE: - return "Invalid work item size"; - case CL_INVALID_GLOBAL_OFFSET: - return "Invalid global offset"; - case CL_INVALID_EVENT_WAIT_LIST: - return "Invalid event wait list"; - case CL_INVALID_EVENT: - return "Invalid event"; - case CL_INVALID_OPERATION: - return "Invalid operation"; - case CL_INVALID_GL_OBJECT: - return "Invalid OpenGL object"; - case CL_INVALID_BUFFER_SIZE: - return "Invalid buffer size"; - case CL_INVALID_MIP_LEVEL: - return "Invalid mip-map level"; - default: - return "Unknown Errorcode"; - } -} - -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - -static inline int64_t OCLsetKernelParameters_helper(cl_kernel& k, int32_t i) -{ - return 0; -} - -template -static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) -{ - int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); - if (retVal) { - return retVal; - } - return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...); -} - -template -static inline int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) -{ - return OCLsetKernelParameters_helper(kernel, 0, args...); -} - -static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr, cl_int nWaitEvents = 1) -{ - return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); -} - -struct GPUReconstructionOCLInternals { - cl_platform_id platform; - cl_device_id device; - cl_context context; - cl_command_queue command_queue[GPUCA_MAX_STREAMS]; - cl_mem mem_gpu; - cl_mem mem_constant; - cl_mem mem_host; - cl_program program; - - std::vector> kernels; -}; - -template -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, K& k, const Args&... args) -{ - auto& x = _xyz.x; - auto& y = _xyz.y; - auto& z = _xyz.z; - if (y.num <= 1) { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); - } else { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); - } - - cl_event ev; - cl_event* evr; - bool tmpEvent = false; - if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { - evr = &ev; - tmpEvent = true; - } else { - evr = (cl_event*)z.ev; - } - GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { - cl_ulong time_start, time_end; - GPUFailedMsg(clWaitForEvents(1, evr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); - _xyz.t = (time_end - time_start) * 1.e-9f; - if (tmpEvent) { - GPUFailedMsg(clReleaseEvent(ev)); - } - } -} - -template -int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) -{ - std::string name(GetKernelName()); - if (multi) { - name += "_multi"; - } - std::string kname("krnl_" + name); - - cl_int ocl_error; - cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); - if (GPUFailedMsgI(ocl_error)) { - GPUError("Error creating OPENCL Kernel: %s", name.c_str()); - return 1; - } - mInternals->kernels.emplace_back(krnl, name); - return 0; -} - -template -inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) -{ - std::string name(GetKernelName()); - if (num > 1) { - name += "_multi"; - } - - for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { - if (mInternals->kernels[k].second == name) { - return (k); - } - } - GPUError("Could not find OpenCL kernel %s", name.c_str()); - throw ::std::runtime_error("Requested unsupported OpenCL kernel"); -} - -static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); -} // namespace o2::gpu - -#endif From 19acaa2e420f87db2e8cad8f3e7b57f51f9695bf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 01:43:52 +0100 Subject: [PATCH 0125/1914] GPU: For spawning a single task, no need to go through TBB --- GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 18662870ed45e..51da17fe58628 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -45,9 +45,14 @@ void GPUReconstructionProcessing::SetNActiveThreads(int32_t n) void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function lambda) { - tbb::task_arena(SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads)).execute([&] { - tbb::parallel_for(0, nThreads, lambda, tbb::simple_partitioner()); - }); + uint32_t nThreadsAdjusted = SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads); + if (nThreadsAdjusted > 1) { + tbb::task_arena(nThreadsAdjusted).execute([&] { + tbb::parallel_for(0, nThreads, lambda, tbb::simple_partitioner()); + }); + } else { + lambda(0); + } } namespace o2::gpu From 9c90527e88af036a6aa5a4c2871c01606d4f9268 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 01:44:05 +0100 Subject: [PATCH 0126/1914] GPU OpenCL: Add specialization for memclean --- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index b10f7302c2c8e..0704dc379a27e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -181,6 +181,13 @@ struct GPUReconstructionOCLInternals { static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); } // namespace o2::gpu +template <> +inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + cl_int4 val0 = {0, 0, 0, 0}; + GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); +} + #define GPUErrorReturn(...) \ { \ GPUError(__VA_ARGS__); \ From 31026278261c7fef7f43ff1212d63bf32fee93b6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 10:55:27 +0100 Subject: [PATCH 0127/1914] GPU: Reorganize some files, split OCL code in kernel and non-kernel related parts --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 +- .../Base/cuda/GPUReconstructionCUDA.cu | 3 +- .../Base/cuda/GPUReconstructionCUDADef.h | 2 +- .../GPUReconstructionCUDAExternalProvider.cu | 3 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 +- ....h => GPUReconstructionCUDAIncludesHost.h} | 4 + .../Base/cuda/GPUReconstructionCUDAKernels.cu | 3 +- .../GPUReconstructionCUDAkernel.template.cu | 3 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 6 +- ...s.h => GPUReconstructionHIPIncludesHost.h} | 6 +- .../GPUReconstructionHIPkernel.template.hip | 3 +- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +- .../Base/opencl/GPUReconstructionOCL.cxx | 380 +++++------------- .../Base/opencl/GPUReconstructionOCL.h | 9 + .../opencl/GPUReconstructionOCLIncludesHost.h | 82 ++++ .../opencl/GPUReconstructionOCLKernels.cxx | 133 ++++++ 16 files changed, 351 insertions(+), 298 deletions(-) rename GPU/GPUTracking/Base/cuda/{GPUReconstructionCUDAIncludes.h => GPUReconstructionCUDAIncludesHost.h} (94%) rename GPU/GPUTracking/Base/hip/{GPUReconstructionHIPIncludes.h => GPUReconstructionHIPIncludesHost.h} (89%) create mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h create mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 5bc1e6e4e6783..a33234db49a27 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludes.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") @@ -67,7 +67,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludes.h > ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesHost.h ${GPU_RTC_BIN}.src COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index b195b375b4503..3c118f402dc4f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -13,8 +13,7 @@ /// \author David Rohr #define GPUCA_GPUCODE_HOSTONLY -#include "GPUReconstructionCUDADef.h" -#include "GPUReconstructionCUDAIncludes.h" +#include "GPUReconstructionCUDAIncludesHost.h" #include diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h index 845ccc9ec09b1..7f77925ca3aaa 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUReconstructionCUDDef.h +/// \file GPUReconstructionCUDADef.h /// \author David Rohr #ifndef O2_GPU_GPURECONSTRUCTIONCUDADEF_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 3862a3a476324..6bcafe565e930 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -12,8 +12,7 @@ /// \file GPUReconstructionCUDAExternalProvider.cu /// \author David Rohr -#include "GPUReconstructionCUDADef.h" -#include "GPUReconstructionCUDAIncludes.h" +#include "GPUReconstructionCUDAIncludesHost.h" #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 3bd3afc0ffc23..1a4721035818e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -32,7 +32,9 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { - std::string rtcparam = std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); + std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + + std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludes.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesHost.h similarity index 94% rename from GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludes.h rename to GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesHost.h index ae79494ded496..e3e26e6482fc4 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludes.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesHost.h @@ -32,4 +32,8 @@ #include #include +#ifndef GPUCA_RTC_CODE +#include "GPUReconstructionCUDADef.h" +#endif + #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 70834d03992d0..c22aff4aab28c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -12,8 +12,7 @@ /// \file GPUReconstructionCUDAKernels.cu /// \author David Rohr -#include "GPUReconstructionCUDADef.h" -#include "GPUReconstructionCUDAIncludes.h" +#include "GPUReconstructionCUDAIncludesHost.h" #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu index c22b873961e09..bcf61eb07383f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu @@ -13,8 +13,7 @@ /// \author David Rohr #define GPUCA_GPUCODE_COMPILEKERNELS -#include "GPUReconstructionCUDAIncludes.h" -#include "GPUReconstructionCUDADef.h" +#include "GPUReconstructionCUDAIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) #define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) #define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 10fbfa8d21ddf..f6e420d5b9656 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -25,7 +25,7 @@ if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) - set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludes.h) + set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) get_filename_component(ABS_CUDA_SORUCE ../cuda/${file} ABSOLUTE) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludes.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) @@ -104,7 +104,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludes.h > ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesHost.h ${GPU_RTC_BIN}.src COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludes.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h similarity index 89% rename from GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludes.h rename to GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h index 94d3e46b8f462..7117dd0c718c6 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludes.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUReconstructionHIPInclude.h +/// \file GPUReconstructionHIPIncludesHost.h /// \author David Rohr #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H @@ -27,4 +27,8 @@ #include #pragma GCC diagnostic pop +#ifndef GPUCA_RTC_CODE +#include "GPUReconstructionHIPDef.h" +#endif + #endif diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip index 0ecaf7a83b18c..ddbc9285763a9 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip @@ -13,8 +13,7 @@ /// \author David Rohr #define GPUCA_GPUCODE_COMPILEKERNELS -#include "GPUReconstructionHIPIncludes.h" -#include "GPUReconstructionHIPDef.h" +#include "GPUReconstructionHIPIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) #define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) #define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 9bd1da298e9d6..89d2f386f768f 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -36,8 +36,8 @@ set(OCL_DEFINECL "-D$ -#include -#include -#include -#include -#include "GPULogging.h" - -#include "GPUReconstructionOCL.h" -#include "GPUReconstructionIncludes.h" - -using namespace o2::gpu; - -#include -#include -#include -#include - -namespace o2::gpu -{ - -static const char* opencl_error_string(int32_t errorcode) -{ - switch (errorcode) { - case CL_SUCCESS: - return "Success!"; - case CL_DEVICE_NOT_FOUND: - return "Device not found."; - case CL_DEVICE_NOT_AVAILABLE: - return "Device not available"; - case CL_COMPILER_NOT_AVAILABLE: - return "Compiler not available"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - return "Memory object allocation failure"; - case CL_OUT_OF_RESOURCES: - return "Out of resources"; - case CL_OUT_OF_HOST_MEMORY: - return "Out of host memory"; - case CL_PROFILING_INFO_NOT_AVAILABLE: - return "Profiling information not available"; - case CL_MEM_COPY_OVERLAP: - return "Memory copy overlap"; - case CL_IMAGE_FORMAT_MISMATCH: - return "Image format mismatch"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: - return "Image format not supported"; - case CL_BUILD_PROGRAM_FAILURE: - return "Program build failure"; - case CL_MAP_FAILURE: - return "Map failure"; - case CL_INVALID_VALUE: - return "Invalid value"; - case CL_INVALID_DEVICE_TYPE: - return "Invalid device type"; - case CL_INVALID_PLATFORM: - return "Invalid platform"; - case CL_INVALID_DEVICE: - return "Invalid device"; - case CL_INVALID_CONTEXT: - return "Invalid context"; - case CL_INVALID_QUEUE_PROPERTIES: - return "Invalid queue properties"; - case CL_INVALID_COMMAND_QUEUE: - return "Invalid command queue"; - case CL_INVALID_HOST_PTR: - return "Invalid host pointer"; - case CL_INVALID_MEM_OBJECT: - return "Invalid memory object"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "Invalid image format descriptor"; - case CL_INVALID_IMAGE_SIZE: - return "Invalid image size"; - case CL_INVALID_SAMPLER: - return "Invalid sampler"; - case CL_INVALID_BINARY: - return "Invalid binary"; - case CL_INVALID_BUILD_OPTIONS: - return "Invalid build options"; - case CL_INVALID_PROGRAM: - return "Invalid program"; - case CL_INVALID_PROGRAM_EXECUTABLE: - return "Invalid program executable"; - case CL_INVALID_KERNEL_NAME: - return "Invalid kernel name"; - case CL_INVALID_KERNEL_DEFINITION: - return "Invalid kernel definition"; - case CL_INVALID_KERNEL: - return "Invalid kernel"; - case CL_INVALID_ARG_INDEX: - return "Invalid argument index"; - case CL_INVALID_ARG_VALUE: - return "Invalid argument value"; - case CL_INVALID_ARG_SIZE: - return "Invalid argument size"; - case CL_INVALID_KERNEL_ARGS: - return "Invalid kernel arguments"; - case CL_INVALID_WORK_DIMENSION: - return "Invalid work dimension"; - case CL_INVALID_WORK_GROUP_SIZE: - return "Invalid work group size"; - case CL_INVALID_WORK_ITEM_SIZE: - return "Invalid work item size"; - case CL_INVALID_GLOBAL_OFFSET: - return "Invalid global offset"; - case CL_INVALID_EVENT_WAIT_LIST: - return "Invalid event wait list"; - case CL_INVALID_EVENT: - return "Invalid event"; - case CL_INVALID_OPERATION: - return "Invalid operation"; - case CL_INVALID_GL_OBJECT: - return "Invalid OpenGL object"; - case CL_INVALID_BUFFER_SIZE: - return "Invalid buffer size"; - case CL_INVALID_MIP_LEVEL: - return "Invalid mip-map level"; - default: - return "Unknown Errorcode"; - } -} - -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - -static inline int64_t OCLsetKernelParameters_helper(cl_kernel& k, int32_t i) -{ - return 0; -} - -template -static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) -{ - int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); - if (retVal) { - return retVal; - } - return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...); -} - -template -static inline int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) -{ - return OCLsetKernelParameters_helper(kernel, 0, args...); -} - -static inline int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait = nullptr, cl_int nWaitEvents = 1) -{ - return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); -} - -struct GPUReconstructionOCLInternals { - cl_platform_id platform; - cl_device_id device; - cl_context context; - cl_command_queue command_queue[GPUCA_MAX_STREAMS]; - cl_mem mem_gpu; - cl_mem mem_constant; - cl_mem mem_host; - cl_program program; - - std::vector> kernels; -}; +#include "GPUReconstructionOCLIncludesHost.h" static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); -} // namespace o2::gpu - -template <> -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) -{ - cl_int4 val0 = {0, 0, 0, 0}; - GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); -} #define GPUErrorReturn(...) \ { \ @@ -194,88 +22,12 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) -{ - cl_kernel k = _xyz.y.num > 1 ? getKernelObject() : getKernelObject(); - auto& x = _xyz.x; - auto& y = _xyz.y; - auto& z = _xyz.z; - if (y.num <= 1) { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); - } else { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); - } - - cl_event ev; - cl_event* evr; - bool tmpEvent = false; - if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { - evr = &ev; - tmpEvent = true; - } else { - evr = (cl_event*)z.ev; - } - GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { - cl_ulong time_start, time_end; - GPUFailedMsg(clWaitForEvents(1, evr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); - _xyz.t = (time_end - time_start) * 1.e-9f; - if (tmpEvent) { - GPUFailedMsg(clReleaseEvent(ev)); - } - } -} - -template -int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) -{ - std::string name(GetKernelName()); - if (multi) { - name += "_multi"; - } - std::string kname("krnl_" + name); - - cl_int ocl_error; - cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); - if (GPUFailedMsgI(ocl_error)) { - GPUError("Error creating OPENCL Kernel: %s", name.c_str()); - return 1; - } - mInternals->kernels.emplace_back(krnl, name); - return 0; -} - -template -inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) -{ - std::string name(GetKernelName()); - if (num > 1) { - name += "_multi"; - } - - for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { - if (mInternals->kernels[k].second == name) { - return (k); - } - } - GPUError("Could not find OpenCL kernel %s", name.c_str()); - throw ::std::runtime_error("Requested unsupported OpenCL kernel"); -} - GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); } GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) @@ -300,7 +52,7 @@ int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const c if (error == CL_SUCCESS) { return (0); } - GPUError("OCL Error: %ld / %s (%s:%d)", error, opencl_error_string(error), file, line); + GPUError("OCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); return 1; } @@ -791,19 +543,6 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, return (0); } -template -void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) -{ - std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); -} - -template -S& GPUReconstructionOCLBackend::getKernelObject() -{ - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); - return mInternals->kernels[krnl].first; -} - int32_t GPUReconstructionOCLBackend::GetOCLPrograms() { cl_int ocl_error; @@ -842,20 +581,105 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() return 1; } -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi + return AddKernels(); +} - return 0; +const char* GPUReconstructionOCLBackend::convertErrorToString(int32_t errorcode) +{ + switch (errorcode) { + case CL_SUCCESS: + return "Success!"; + case CL_DEVICE_NOT_FOUND: + return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: + return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: + return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: + return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: + return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: + return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: + return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: + return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: + return "Program build failure"; + case CL_MAP_FAILURE: + return "Map failure"; + case CL_INVALID_VALUE: + return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: + return "Invalid device type"; + case CL_INVALID_PLATFORM: + return "Invalid platform"; + case CL_INVALID_DEVICE: + return "Invalid device"; + case CL_INVALID_CONTEXT: + return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: + return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: + return "Invalid command queue"; + case CL_INVALID_HOST_PTR: + return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: + return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: + return "Invalid image size"; + case CL_INVALID_SAMPLER: + return "Invalid sampler"; + case CL_INVALID_BINARY: + return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: + return "Invalid build options"; + case CL_INVALID_PROGRAM: + return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: + return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: + return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: + return "Invalid kernel definition"; + case CL_INVALID_KERNEL: + return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: + return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: + return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: + return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: + return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: + return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: + return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: + return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: + return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: + return "Invalid event wait list"; + case CL_INVALID_EVENT: + return "Invalid event"; + case CL_INVALID_OPERATION: + return "Invalid operation"; + case CL_INVALID_GL_OBJECT: + return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: + return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: + return "Invalid mip-map level"; + default: + return "Unknown Errorcode"; + } } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index cd8ffe8bccaa9..15015cdcb43c5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -73,6 +73,15 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase S& getKernelObject(); int32_t GetOCLPrograms(); + + private: + static const char* convertErrorToString(int32_t errorCode); + template + static inline int64_t OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters); + template + static int64_t OCLsetKernelParameters(cl_kernel& kernel, const Args&... args); + static int64_t clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent = nullptr, cl_event* wait = nullptr, cl_int nWaitEvents = 1); + int32_t AddKernels(); }; using GPUReconstructionOCL = GPUReconstructionKernels; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h new file mode 100644 index 0000000000000..aec5708a80f3c --- /dev/null +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -0,0 +1,82 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionOCLIncludesHost.h +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONOCLINCLUDESHOST_H +#define GPURECONSTRUCTIONOCLINCLUDESHOST_H + +#define GPUCA_GPUTYPE_OPENCL +#define __OPENCL_HOST__ + +#define CL_TARGET_OPENCL_VERSION 220 +#include +#include +#include +#include +#include +#include "GPULogging.h" + +#include "GPUReconstructionOCL.h" +#include "GPUReconstructionIncludes.h" + +using namespace o2::gpu; + +#include +#include +#include +#include + +#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) +#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) + +namespace o2::gpu +{ +struct GPUReconstructionOCLInternals { + cl_platform_id platform; + cl_device_id device; + cl_context context; + cl_command_queue command_queue[GPUCA_MAX_STREAMS]; + cl_mem mem_gpu; + cl_mem mem_constant; + cl_mem mem_host; + cl_program program; + + std::vector> kernels; +}; +} // namespace o2::gpu + +template +inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) +{ + int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); + if (retVal) { + return retVal; + } + if constexpr (sizeof...(restOfParameters) > 0) { + return OCLsetKernelParameters_helper(kernel, i + 1, restOfParameters...); + } + return 0; +} + +template +inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) +{ + return OCLsetKernelParameters_helper(kernel, 0, args...); +} + +inline int64_t GPUReconstructionOCLBackend::clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait, cl_int nWaitEvents) +{ + return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); +} + +#endif // GPURECONSTRUCTIONOCLINCLUDESHOST_H diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx new file mode 100644 index 0000000000000..8a1c8a6525c0d --- /dev/null +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -0,0 +1,133 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionOCLKernels.cxx +/// \author David Rohr + +#include "GPUReconstructionOCLIncludesHost.h" + +template <> +inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + cl_int4 val0 = {0, 0, 0, 0}; + GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); +} + +template +inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +{ + cl_kernel k = _xyz.y.num > 1 ? getKernelObject() : getKernelObject(); + auto& x = _xyz.x; + auto& y = _xyz.y; + auto& z = _xyz.z; + if (y.num <= 1) { + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); + } else { + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); + } + + cl_event ev; + cl_event* evr; + bool tmpEvent = false; + if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + evr = &ev; + tmpEvent = true; + } else { + evr = (cl_event*)z.ev; + } + GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); + if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + cl_ulong time_start, time_end; + GPUFailedMsg(clWaitForEvents(1, evr)); + GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); + GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); + _xyz.t = (time_end - time_start) * 1.e-9f; + if (tmpEvent) { + GPUFailedMsg(clReleaseEvent(ev)); + } + } +} + +template +void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) +{ + std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); +} + +template +inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) +{ + std::string name(GetKernelName()); + if (num > 1) { + name += "_multi"; + } + + for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { + if (mInternals->kernels[k].second == name) { + return (k); + } + } + GPUError("Could not find OpenCL kernel %s", name.c_str()); + throw ::std::runtime_error("Requested unsupported OpenCL kernel"); +} + +template +int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) +{ + std::string name(GetKernelName()); + if (multi) { + name += "_multi"; + } + std::string kname("krnl_" + name); + + cl_int ocl_error; + cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); + if (GPUFailedMsgI(ocl_error)) { + GPUError("Error creating OPENCL Kernel: %s", name.c_str()); + return 1; + } + mInternals->kernels.emplace_back(krnl, name); + return 0; +} + +template +S& GPUReconstructionOCLBackend::getKernelObject() +{ + static uint32_t krnl = FindKernel(MULTI ? 2 : 1); + return mInternals->kernels[krnl].first; +} + +int32_t GPUReconstructionOCLBackend::AddKernels() +{ +#define GPUCA_KRNL(...) \ + GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) +#define GPUCA_KRNL_LOAD_single(x_class, ...) \ + if (AddKernel(false)) { \ + return 1; \ + } +#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ + if (AddKernel(true)) { \ + return 1; \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL +#undef GPUCA_KRNL_LOAD_single +#undef GPUCA_KRNL_LOAD_multi + + return 0; +} + +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + GPUCA_KRNL_PROP(x_class, x_attributes) \ + template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL From 31aa182f8bc782be688facc263cbc39bc21e3d92 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Feb 2025 11:02:17 +0100 Subject: [PATCH 0128/1914] GPU OpenCL: Add more human readable error codes, shorten code --- .../Base/opencl/GPUReconstructionOCL.cxx | 164 ++++++++---------- 1 file changed, 68 insertions(+), 96 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index a50e7358e56e3..c7a8be62a12ea 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -14,6 +14,8 @@ #include "GPUReconstructionOCLIncludesHost.h" +#include + static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); #define GPUErrorReturn(...) \ @@ -586,100 +588,70 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() const char* GPUReconstructionOCLBackend::convertErrorToString(int32_t errorcode) { - switch (errorcode) { - case CL_SUCCESS: - return "Success!"; - case CL_DEVICE_NOT_FOUND: - return "Device not found."; - case CL_DEVICE_NOT_AVAILABLE: - return "Device not available"; - case CL_COMPILER_NOT_AVAILABLE: - return "Compiler not available"; - case CL_MEM_OBJECT_ALLOCATION_FAILURE: - return "Memory object allocation failure"; - case CL_OUT_OF_RESOURCES: - return "Out of resources"; - case CL_OUT_OF_HOST_MEMORY: - return "Out of host memory"; - case CL_PROFILING_INFO_NOT_AVAILABLE: - return "Profiling information not available"; - case CL_MEM_COPY_OVERLAP: - return "Memory copy overlap"; - case CL_IMAGE_FORMAT_MISMATCH: - return "Image format mismatch"; - case CL_IMAGE_FORMAT_NOT_SUPPORTED: - return "Image format not supported"; - case CL_BUILD_PROGRAM_FAILURE: - return "Program build failure"; - case CL_MAP_FAILURE: - return "Map failure"; - case CL_INVALID_VALUE: - return "Invalid value"; - case CL_INVALID_DEVICE_TYPE: - return "Invalid device type"; - case CL_INVALID_PLATFORM: - return "Invalid platform"; - case CL_INVALID_DEVICE: - return "Invalid device"; - case CL_INVALID_CONTEXT: - return "Invalid context"; - case CL_INVALID_QUEUE_PROPERTIES: - return "Invalid queue properties"; - case CL_INVALID_COMMAND_QUEUE: - return "Invalid command queue"; - case CL_INVALID_HOST_PTR: - return "Invalid host pointer"; - case CL_INVALID_MEM_OBJECT: - return "Invalid memory object"; - case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: - return "Invalid image format descriptor"; - case CL_INVALID_IMAGE_SIZE: - return "Invalid image size"; - case CL_INVALID_SAMPLER: - return "Invalid sampler"; - case CL_INVALID_BINARY: - return "Invalid binary"; - case CL_INVALID_BUILD_OPTIONS: - return "Invalid build options"; - case CL_INVALID_PROGRAM: - return "Invalid program"; - case CL_INVALID_PROGRAM_EXECUTABLE: - return "Invalid program executable"; - case CL_INVALID_KERNEL_NAME: - return "Invalid kernel name"; - case CL_INVALID_KERNEL_DEFINITION: - return "Invalid kernel definition"; - case CL_INVALID_KERNEL: - return "Invalid kernel"; - case CL_INVALID_ARG_INDEX: - return "Invalid argument index"; - case CL_INVALID_ARG_VALUE: - return "Invalid argument value"; - case CL_INVALID_ARG_SIZE: - return "Invalid argument size"; - case CL_INVALID_KERNEL_ARGS: - return "Invalid kernel arguments"; - case CL_INVALID_WORK_DIMENSION: - return "Invalid work dimension"; - case CL_INVALID_WORK_GROUP_SIZE: - return "Invalid work group size"; - case CL_INVALID_WORK_ITEM_SIZE: - return "Invalid work item size"; - case CL_INVALID_GLOBAL_OFFSET: - return "Invalid global offset"; - case CL_INVALID_EVENT_WAIT_LIST: - return "Invalid event wait list"; - case CL_INVALID_EVENT: - return "Invalid event"; - case CL_INVALID_OPERATION: - return "Invalid operation"; - case CL_INVALID_GL_OBJECT: - return "Invalid OpenGL object"; - case CL_INVALID_BUFFER_SIZE: - return "Invalid buffer size"; - case CL_INVALID_MIP_LEVEL: - return "Invalid mip-map level"; - default: - return "Unknown Errorcode"; - } + static const std::map error_map = { + {CL_SUCCESS, "CL_SUCCESS"}, + {CL_DEVICE_NOT_FOUND, "CL_DEVICE_NOT_FOUND"}, + {CL_DEVICE_NOT_AVAILABLE, "CL_DEVICE_NOT_AVAILABLE"}, + {CL_COMPILER_NOT_AVAILABLE, "CL_COMPILER_NOT_AVAILABLE"}, + {CL_MEM_OBJECT_ALLOCATION_FAILURE, "CL_MEM_OBJECT_ALLOCATION_FAILURE"}, + {CL_OUT_OF_RESOURCES, "CL_OUT_OF_RESOURCES"}, + {CL_OUT_OF_HOST_MEMORY, "CL_OUT_OF_HOST_MEMORY"}, + {CL_PROFILING_INFO_NOT_AVAILABLE, "CL_PROFILING_INFO_NOT_AVAILABLE"}, + {CL_MEM_COPY_OVERLAP, "CL_MEM_COPY_OVERLAP"}, + {CL_IMAGE_FORMAT_MISMATCH, "CL_IMAGE_FORMAT_MISMATCH"}, + {CL_IMAGE_FORMAT_NOT_SUPPORTED, "CL_IMAGE_FORMAT_NOT_SUPPORTED"}, + {CL_BUILD_PROGRAM_FAILURE, "CL_BUILD_PROGRAM_FAILURE"}, + {CL_MAP_FAILURE, "CL_MAP_FAILURE"}, + {CL_MISALIGNED_SUB_BUFFER_OFFSET, "CL_MISALIGNED_SUB_BUFFER_OFFSET"}, + {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"}, + {CL_COMPILE_PROGRAM_FAILURE, "CL_COMPILE_PROGRAM_FAILURE"}, + {CL_LINKER_NOT_AVAILABLE, "CL_LINKER_NOT_AVAILABLE"}, + {CL_LINK_PROGRAM_FAILURE, "CL_LINK_PROGRAM_FAILURE"}, + {CL_DEVICE_PARTITION_FAILED, "CL_DEVICE_PARTITION_FAILED"}, + {CL_KERNEL_ARG_INFO_NOT_AVAILABLE, "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"}, + {CL_INVALID_VALUE, "CL_INVALID_VALUE"}, + {CL_INVALID_DEVICE_TYPE, "CL_INVALID_DEVICE_TYPE"}, + {CL_INVALID_PLATFORM, "CL_INVALID_PLATFORM"}, + {CL_INVALID_DEVICE, "CL_INVALID_DEVICE"}, + {CL_INVALID_CONTEXT, "CL_INVALID_CONTEXT"}, + {CL_INVALID_QUEUE_PROPERTIES, "CL_INVALID_QUEUE_PROPERTIES"}, + {CL_INVALID_COMMAND_QUEUE, "CL_INVALID_COMMAND_QUEUE"}, + {CL_INVALID_HOST_PTR, "CL_INVALID_HOST_PTR"}, + {CL_INVALID_MEM_OBJECT, "CL_INVALID_MEM_OBJECT"}, + {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"}, + {CL_INVALID_IMAGE_SIZE, "CL_INVALID_IMAGE_SIZE"}, + {CL_INVALID_SAMPLER, "CL_INVALID_SAMPLER"}, + {CL_INVALID_BINARY, "CL_INVALID_BINARY"}, + {CL_INVALID_BUILD_OPTIONS, "CL_INVALID_BUILD_OPTIONS"}, + {CL_INVALID_PROGRAM, "CL_INVALID_PROGRAM"}, + {CL_INVALID_PROGRAM_EXECUTABLE, "CL_INVALID_PROGRAM_EXECUTABLE"}, + {CL_INVALID_KERNEL_NAME, "CL_INVALID_KERNEL_NAME"}, + {CL_INVALID_KERNEL_DEFINITION, "CL_INVALID_KERNEL_DEFINITION"}, + {CL_INVALID_KERNEL, "CL_INVALID_KERNEL"}, + {CL_INVALID_ARG_INDEX, "CL_INVALID_ARG_INDEX"}, + {CL_INVALID_ARG_VALUE, "CL_INVALID_ARG_VALUE"}, + {CL_INVALID_ARG_SIZE, "CL_INVALID_ARG_SIZE"}, + {CL_INVALID_KERNEL_ARGS, "CL_INVALID_KERNEL_ARGS"}, + {CL_INVALID_WORK_DIMENSION, "CL_INVALID_WORK_DIMENSION"}, + {CL_INVALID_WORK_GROUP_SIZE, "CL_INVALID_WORK_GROUP_SIZE"}, + {CL_INVALID_WORK_ITEM_SIZE, "CL_INVALID_WORK_ITEM_SIZE"}, + {CL_INVALID_GLOBAL_OFFSET, "CL_INVALID_GLOBAL_OFFSET"}, + {CL_INVALID_EVENT_WAIT_LIST, "CL_INVALID_EVENT_WAIT_LIST"}, + {CL_INVALID_EVENT, "CL_INVALID_EVENT"}, + {CL_INVALID_OPERATION, "CL_INVALID_OPERATION"}, + {CL_INVALID_GL_OBJECT, "CL_INVALID_GL_OBJECT"}, + {CL_INVALID_BUFFER_SIZE, "CL_INVALID_BUFFER_SIZE"}, + {CL_INVALID_MIP_LEVEL, "CL_INVALID_MIP_LEVEL"}, + {CL_INVALID_GLOBAL_WORK_SIZE, "CL_INVALID_GLOBAL_WORK_SIZE"}, + {CL_INVALID_PROPERTY, "CL_INVALID_PROPERTY"}, + {CL_INVALID_IMAGE_DESCRIPTOR, "CL_INVALID_IMAGE_DESCRIPTOR"}, + {CL_INVALID_COMPILER_OPTIONS, "CL_INVALID_COMPILER_OPTIONS"}, + {CL_INVALID_LINKER_OPTIONS, "CL_INVALID_LINKER_OPTIONS"}, + {CL_INVALID_DEVICE_PARTITION_COUNT, "CL_INVALID_DEVICE_PARTITION_COUNT"}, + {CL_INVALID_PIPE_SIZE, "CL_INVALID_PIPE_SIZE"}, + {CL_INVALID_DEVICE_QUEUE, "CL_INVALID_DEVICE_QUEUE"}, + {CL_INVALID_SPEC_ID, "CL_INVALID_SPEC_ID"}, + {CL_MAX_SIZE_RESTRICTION_EXCEEDED, "CL_MAX_SIZE_RESTRICTION_EXCEEDED"}}; + auto entry = error_map.find(errorcode); + return (entry != error_map.end()) ? entry->second : "Unknown Errorcode"; } From ebe17268904f2f099e0cf5f2c7d54c40aebd65c3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Feb 2025 22:19:50 +0100 Subject: [PATCH 0129/1914] GPU: Fix typo in debug message --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 6ce07f67c3645..a7e0c2cb827f1 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -568,7 +568,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, memorypool = (void*)((char*)memorypool + GPUProcessor::getAlignment(memorypool)); } if (memorypoolend ? (memorypool > memorypoolend) : ((size_t)ptrDiff(memorypool, memorybase) > memorysize)) { - std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " < " << memorysize << "\n"; + std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " > " << memorysize << "\n"; throw std::bad_alloc(); } if (mProcessingSettings.allocDebugLevel >= 2) { From 20cd4cd58f0ae64abb04f539c3a1530927d6bf45 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Feb 2025 22:37:59 +0100 Subject: [PATCH 0130/1914] GPU Standalone: Add sanity check for benchmark to warn about incompatible options --- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 58866224943c0..682e6913d58d4 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -223,6 +223,10 @@ int32_t ReadConfiguration(int argc, char** argv) } } if (configStandalone.setO2Settings) { + if (!(configStandalone.inputcontrolmem && configStandalone.outputcontrolmem)) { + printf("setO2Settings requires the usage of --inputMemory and --outputMemory as in O2\n"); + return 1; + } if (configStandalone.runGPU) { configStandalone.proc.forceHostMemoryPoolSize = 1024 * 1024 * 1024; } From de1f9ba3fc7505df130ff7de5ab0000a484e530d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Feb 2025 22:40:42 +0100 Subject: [PATCH 0131/1914] GPU TBB: Fix threading in case nThreadsAdjusted becomes 1 --- GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 51da17fe58628..51c48ebbfc0b2 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -51,7 +51,9 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr tbb::parallel_for(0, nThreads, lambda, tbb::simple_partitioner()); }); } else { - lambda(0); + for (uint32_t i = 0; i < nThreads; i++) { + lambda(i); + } } } From ba8d53e9c4a59f5212b7043f7240641b315929b5 Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 18 Feb 2025 14:10:53 +0100 Subject: [PATCH 0132/1914] Add DCA cut --- .../TPC/workflow/src/MIPTrackFilterSpec.cxx | 63 ++++++++++++++++--- 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index e3970012d1373..b8b95090d8534 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -26,7 +26,9 @@ #include "DataFormatsTPC/TrackCuts.h" #include "DetectorsCalibration/Utils.h" #include "Framework/Logger.h" +#include "DetectorsBase/GRPGeomHelper.h" #include "Framework/Task.h" +#include "DetectorsBase/Propagator.h" #include "Framework/DataProcessorSpec.h" #include "Framework/ConfigParamRegistry.h" #include "TPCWorkflow/ProcessingHelpers.h" @@ -40,20 +42,27 @@ namespace o2::tpc class MIPTrackFilterDevice : public Task { public: + MIPTrackFilterDevice(std::shared_ptr gr) : mGRPGeomRequest(gr) {} + void init(framework::InitContext& ic) final; void run(ProcessingContext& pc) final; void endOfStream(EndOfStreamContext& eos) final; + void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) final; private: void sendOutput(DataAllocator& output); + std::shared_ptr mGRPGeomRequest; TrackCuts mCuts{}; ///< Tracks cuts object std::vector mMIPTracks; ///< Filtered MIP tracks unsigned int mProcessEveryNthTF{1}; ///< process every Nth TF only int mMaxTracksPerTF{-1}; ///< max number of MIP tracks processed per TF uint32_t mTFCounter{0}; ///< counter to keep track of the TFs int mProcessNFirstTFs{0}; ///< number of first TFs which are not sampled + float mDCACut{-1}; ///< DCA cut bool mSendDummy{false}; ///< send empty data in case TF is skipped + + bool acceptDCA(const TrackTPC& track); }; void MIPTrackFilterDevice::init(framework::InitContext& ic) @@ -89,6 +98,10 @@ void MIPTrackFilterDevice::init(framework::InitContext& ic) mCuts.setdEdxMin(mindEdx); mCuts.setdEdxMax(maxdEdx); mCuts.setCutLooper(cutLoopers); + + mDCACut = ic.options().get("dca-cut"); + + o2::base::GRPGeomHelper::instance().setRequest(mGRPGeomRequest); } void MIPTrackFilterDevice::run(ProcessingContext& pc) @@ -102,6 +115,7 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) } return; } + o2::base::GRPGeomHelper::instance().checkUpdates(pc); const auto tracks = pc.inputs().get>("tracks"); const auto nTracks = tracks.size(); @@ -111,7 +125,7 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) std::vector indices; indices.reserve(nTracks); for (size_t i = 0; i < nTracks; ++i) { - if (mCuts.goodTrack(tracks[i])) { + if (mCuts.goodTrack(tracks[i]) && acceptDCA(tracks[i])) { indices.emplace_back(i); } } @@ -135,7 +149,7 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) mMIPTracks.emplace_back(tracks[indices[i]]); } } else { - std::copy_if(tracks.begin(), tracks.end(), std::back_inserter(mMIPTracks), [this](const auto& track) { return mCuts.goodTrack(track); }); + std::copy_if(tracks.begin(), tracks.end(), std::back_inserter(mMIPTracks), [this](const auto& track) { return mCuts.goodTrack(track) && acceptDCA(track); }); } LOGP(info, "Filtered {} MIP tracks out of {} total tpc tracks", mMIPTracks.size(), tracks.size()); @@ -143,6 +157,13 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) mMIPTracks.clear(); } +void MIPTrackFilterDevice::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) +{ + if (o2::base::GRPGeomHelper::instance().finaliseCCDB(matcher, obj)) { + return; + } +} + void MIPTrackFilterDevice::sendOutput(DataAllocator& output) { output.snapshot(Output{header::gDataOriginTPC, "MIPS", 0}, mMIPTracks); } void MIPTrackFilterDevice::endOfStream(EndOfStreamContext& eos) @@ -150,18 +171,44 @@ void MIPTrackFilterDevice::endOfStream(EndOfStreamContext& eos) LOG(info) << "Finalizig MIP Tracks filter"; } +bool MIPTrackFilterDevice::acceptDCA(const TrackTPC& track) +{ + if (mDCACut < 0) { + return true; + } + + auto propagator = o2::base::Propagator::Instance(); + o2::gpu::gpustd::array dca; + const o2::math_utils::Point3D refPoint{0, 0, 0}; + o2::track::TrackPar propTrack(track); + const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); + const auto dcar = std::abs(dca[0]); + + return ok && (dcar < mDCACut); +} + DataProcessorSpec getMIPTrackFilterSpec() { std::vector outputs; outputs.emplace_back(header::gDataOriginTPC, "MIPS", 0, Lifetime::Sporadic); + std::vector inputs; + inputs.emplace_back("tracks", "TPC", "TRACKS"); + + auto ggRequest = std::make_shared(false, // orbitResetTime + true, // GRPECS=true + false, // GRPLHCIF + true, // GRPMagField + true, // askMatLUT + o2::base::GRPGeomRequest::Aligned, // geometry + inputs, + true); + return DataProcessorSpec{ "tpc-miptrack-filter", - Inputs{ - InputSpec{"tracks", "TPC", "TRACKS"}, - }, + inputs, outputs, - adaptFromTask(), + adaptFromTask(ggRequest), Options{ {"min-momentum", VariantType::Double, 0.35, {"minimum momentum cut"}}, {"max-momentum", VariantType::Double, 0.55, {"maximum momentum cut"}}, @@ -172,7 +219,9 @@ DataProcessorSpec getMIPTrackFilterSpec() {"maxTracksPerTF", VariantType::Int, -1, {"Maximum number of processed tracks per TF (-1 for processing all tracks)"}}, {"process-first-n-TFs", VariantType::Int, 1, {"Number of first TFs which are not sampled"}}, {"send-dummy-data", VariantType::Bool, false, {"Send empty data in case TF is skipped"}}, - {"dont-cut-loopers", VariantType::Bool, false, {"Do not cut loopers by comparing zout-zin"}}}}; + {"dont-cut-loopers", VariantType::Bool, false, {"Do not cut loopers by comparing zout-zin"}}, + {"dca-cut", VariantType::Float, 3.f, {"DCA cut in cm, < 0 to disable"}}, + }}; } } // namespace o2::tpc From cc2174dc71af206e8bf12ef2320519c47f214375 Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 25 Feb 2025 18:17:34 +0100 Subject: [PATCH 0133/1914] Improve logging --- Detectors/TPC/calibration/src/CalibdEdx.cxx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Detectors/TPC/calibration/src/CalibdEdx.cxx b/Detectors/TPC/calibration/src/CalibdEdx.cxx index f53749ba8621b..2e20eacb94324 100644 --- a/Detectors/TPC/calibration/src/CalibdEdx.cxx +++ b/Detectors/TPC/calibration/src/CalibdEdx.cxx @@ -549,7 +549,8 @@ void CalibdEdx::finalize(const bool useGausFits) fitter.SetFormula("1"); mCalib.setDims(0); } - LOGP(info, "Fitting {}D dE/dx correction for GEM stacks with gaussian fits {}", mCalib.getDims(), useGausFits); + LOGP(info, "Fitting {}D dE/dx correction for GEM stacks with gaussian fits {}, minStackEntries {}, m2DThreshold {}, m1DThreshold {}, mFitSnp {}", + mCalib.getDims(), useGausFits, entries, m2DThreshold, m1DThreshold, mFitSnp); // if entries below minimum sector threshold, integrate all sectors if (mCalib.getDims() == 0 || entries >= mSectorThreshold) { From 5a7fba3fb395e7d12c0bd2777b44170d6d49ad61 Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 25 Feb 2025 18:20:02 +0100 Subject: [PATCH 0134/1914] Restructure pedestal file creation, add DCS CCDB publishing --- .../base/include/TPCBase/CRUCalibHelpers.h | 15 +++- Detectors/TPC/base/src/CRUCalibHelpers.cxx | 50 +++++++++++- .../calibration/macro/preparePedestalFiles.C | 71 ++--------------- .../TPCWorkflow/CalDetMergerPublisherSpec.h | 2 +- .../src/CalDetMergerPublisherSpec.cxx | 76 +++++++++++++++++-- .../TPC/workflow/src/tpc-calib-pad-raw.cxx | 4 +- 6 files changed, 140 insertions(+), 78 deletions(-) diff --git a/Detectors/TPC/base/include/TPCBase/CRUCalibHelpers.h b/Detectors/TPC/base/include/TPCBase/CRUCalibHelpers.h index 818a9a41d0dcc..0d6351a326742 100644 --- a/Detectors/TPC/base/include/TPCBase/CRUCalibHelpers.h +++ b/Detectors/TPC/base/include/TPCBase/CRUCalibHelpers.h @@ -93,10 +93,8 @@ constexpr float fixedSizeToFloat(uint32_t value) /// write values of map to fileName /// template -void writeValues(const std::string_view fileName, const DataMap& map, bool onlyFilled = false) +void writeValues(std::ostream& str, const DataMap& map, bool onlyFilled = false) { - std::ofstream str(fileName.data(), std::ofstream::out); - for (const auto& [linkInfo, data] : map) { if (onlyFilled) { if (!std::accumulate(data.begin(), data.end(), uint32_t(0))) { @@ -117,6 +115,13 @@ void writeValues(const std::string_view fileName, const DataMap& map, bool onlyF } } +template +void writeValues(const std::string_view fileName, const DataMap& map, bool onlyFilled = false) +{ + std::ofstream str(fileName.data(), std::ofstream::out); + writeValues(str, map, onlyFilled); +} + template struct is_map { static constexpr bool value = false; @@ -126,7 +131,8 @@ template struct is_map> { static constexpr bool value = true; }; -/// fill cal pad object from HV data map + +/// fill cal pad object from HW data map /// TODO: Function to be tested template typename std::enable_if_t::value, void> @@ -251,6 +257,7 @@ o2::tpc::CalDet getCalPad(const std::string_view fileName, const std::str /// \param minADCROCType can be either one value for all ROC types, or {IROC, OROC}, or {IROC, OROC1, OROC2, OROC3} std::unordered_map preparePedestalFiles(const CalPad& pedestals, const CalPad& noise, std::vector sigmaNoiseROCType = {3, 3, 3, 3}, std::vector minADCROCType = {2, 2, 2, 2}, float pedestalOffset = 0, bool onlyFilled = false, bool maskBad = true, float noisyChannelThreshold = 1.5, float sigmaNoiseNoisyChannels = 4, float badChannelThreshold = 6, bool fixedSize = false); +DataMapU32 getDataMap(const CalPad& calPad); } // namespace o2::tpc::cru_calib_helpers #endif diff --git a/Detectors/TPC/base/src/CRUCalibHelpers.cxx b/Detectors/TPC/base/src/CRUCalibHelpers.cxx index f18baa6571f66..fe6023028f6fe 100644 --- a/Detectors/TPC/base/src/CRUCalibHelpers.cxx +++ b/Detectors/TPC/base/src/CRUCalibHelpers.cxx @@ -130,9 +130,6 @@ std::unordered_map cru_calib_helpers::preparePedestalFiles( pedestalsThreshold["PedestalsPhys"] = CalPad("Pedestals"); pedestalsThreshold["ThresholdMapPhys"] = CalPad("ThresholdMap"); - auto& pedestalsCRU = pedestalsThreshold["Pedestals"]; - auto& thresholdCRU = pedestalsThreshold["ThresholdMap"]; - // ===| prepare values |=== for (size_t iroc = 0; iroc < pedestals.getData().size(); ++iroc) { const ROC roc(iroc); @@ -179,7 +176,7 @@ std::unordered_map cru_calib_helpers::preparePedestalFiles( } float noise = std::abs(rocNoise.getValue(ipad)); // it seems with the new fitting procedure, the noise can also be negative, since in gaus sigma is quadratic - float noiseCorr = noise - (0.847601 + 0.031514 * traceLength); + const float noiseCorr = noise - (0.847601 + 0.031514 * traceLength); if ((pedestal <= 0) || (pedestal > 150) || (noise <= 0) || (noise > 50)) { LOGP(info, "Bad pedestal or noise value in ROC {:2}, CRU {:3}, fec in CRU: {:2}, SAMPA: {}, channel: {:2}, pedestal: {:.4f}, noise {:.4f}", iroc, cruID, fecInPartition, sampa, sampaChannel, pedestal, noise); if (maskBad) { @@ -230,3 +227,48 @@ std::unordered_map cru_calib_helpers::preparePedestalFiles( return pedestalsThreshold; } + +cru_calib_helpers::DataMapU32 cru_calib_helpers::getDataMap(const CalPad& calPad) +{ + const auto& mapper = Mapper::instance(); + + DataMapU32 dataMap; + + for (size_t iroc = 0; iroc < calPad.getData().size(); ++iroc) { + const ROC roc(iroc); + + const auto& calRoc = calPad.getCalArray(iroc); + + const int padOffset = roc.isOROC() ? mapper.getPadsInIROC() : 0; + + // skip empty ROCs + if (!(std::abs(calRoc.getSum()) > 0)) { + continue; + } + + // loop over pads + for (size_t ipad = 0; ipad < calRoc.getData().size(); ++ipad) { + const int globalPad = ipad + padOffset; + const FECInfo& fecInfo = mapper.fecInfo(globalPad); + const CRU cru = mapper.getCRU(roc.getSector(), globalPad); + const uint32_t region = cru.region(); + const int cruID = cru.number(); + const int sampa = fecInfo.getSampaChip(); + const int sampaChannel = fecInfo.getSampaChannel(); + + const PartitionInfo& partInfo = mapper.getMapPartitionInfo()[cru.partition()]; + const int nFECs = partInfo.getNumberOfFECs(); + const int fecOffset = (nFECs + 1) / 2; + const int fecInPartition = fecInfo.getIndex() - partInfo.getSectorFECOffset(); + const int dataWrapperID = fecInPartition >= fecOffset; + const int globalLinkID = (fecInPartition % fecOffset) + dataWrapperID * 12; + + const int hwChannel = getHWChannel(sampa, sampaChannel, region % 2); + + const auto value = calRoc.getValue(ipad); + dataMap[LinkInfo(cruID, globalLinkID)][hwChannel] = floatToFixedSize(value); + } + } + + return dataMap; +} diff --git a/Detectors/TPC/calibration/macro/preparePedestalFiles.C b/Detectors/TPC/calibration/macro/preparePedestalFiles.C index 0dcd02b64551a..92bc1456e48d7 100644 --- a/Detectors/TPC/calibration/macro/preparePedestalFiles.C +++ b/Detectors/TPC/calibration/macro/preparePedestalFiles.C @@ -60,72 +60,17 @@ void preparePedestalFiles(const std::string_view pedestalFile, std::string outpu f.GetObject("Noise", calNoise); } - DataMapU32 pedestalValues; - DataMapU32 thresholdlValues; - DataMapU32 pedestalValuesPhysics; - DataMapU32 thresholdlValuesPhysics; - auto pedestalsThreshold = preparePedestalFiles(*calPedestal, *calNoise, sigmaNoiseROCType, minADCROCType, pedestalOffset, onlyFilled, maskBad, noisyChannelThreshold, sigmaNoiseNoisyChannels, badChannelThreshold); - // ===| prepare values |=== - for (size_t iroc = 0; iroc < calPedestal->getData().size(); ++iroc) { - const ROC roc(iroc); - - const auto& rocPedestal = calPedestal->getCalArray(iroc); - const auto& rocNoise = calNoise->getCalArray(iroc); - auto& rocOut = output.getCalArray(iroc); + const auto& pedestals = pedestalsThreshold["Pedestals"]; + const auto& thresholds = pedestalsThreshold["ThresholdMap"]; + const auto& pedestalsPhys = pedestalsThreshold["PedestalsPhys"]; + const auto& thresholdsPhys = pedestalsThreshold["ThresholdMapPhys"]; - const int padOffset = roc.isOROC() ? mapper.getPadsInIROC() : 0; - - // skip empty - if (!(std::abs(rocPedestal.getSum() + rocNoise.getSum()) > 0)) { - continue; - } - - // loop over pads - for (size_t ipad = 0; ipad < rocPedestal.getData().size(); ++ipad) { - const int globalPad = ipad + padOffset; - const FECInfo& fecInfo = mapper.fecInfo(globalPad); - const CRU cru = mapper.getCRU(roc.getSector(), globalPad); - const uint32_t region = cru.region(); - const int cruID = cru.number(); - const int sampa = fecInfo.getSampaChip(); - const int sampaChannel = fecInfo.getSampaChannel(); - // int globalLinkID = fecInfo.getIndex(); - - const PartitionInfo& partInfo = mapper.getMapPartitionInfo()[cru.partition()]; - const int nFECs = partInfo.getNumberOfFECs(); - const int fecOffset = (nFECs + 1) / 2; - const int fecInPartition = fecInfo.getIndex() - partInfo.getSectorFECOffset(); - const int dataWrapperID = fecInPartition >= fecOffset; - const int globalLinkID = (fecInPartition % fecOffset) + dataWrapperID * 12; - - const auto pedestal = pedestalsThreshold["Pedestals"].getCalArray(iroc).getValue(ipad); - const auto threshold = pedestalsThreshold["ThresholdMap"].getCalArray(iroc).getValue(ipad); - const auto pedestalHighNoise = pedestalsThreshold["PedestalsPhys"].getCalArray(iroc).getValue(ipad); - const auto thresholdHighNoise = pedestalsThreshold["ThresholdMapPhys"].getCalArray(iroc).getValue(ipad); - - const int hwChannel = getHWChannel(sampa, sampaChannel, region % 2); - // for debugging - // printf("%4d %4d %4d %4d %4d: %u\n", cru.number(), globalLinkID, hwChannel, fecInfo.getSampaChip(), fecInfo.getSampaChannel(), getADCValue(pedestal)); - - // default thresholds - const auto adcPedestal = floatToFixedSize(pedestal); - const auto adcThreshold = floatToFixedSize(threshold); - pedestalValues[LinkInfo(cruID, globalLinkID)][hwChannel] = adcPedestal; - thresholdlValues[LinkInfo(cruID, globalLinkID)][hwChannel] = adcThreshold; - - // higher thresholds for physics data taking - const auto adcPedestalPhysics = floatToFixedSize(pedestalHighNoise); - const auto adcThresholdPhysics = floatToFixedSize(thresholdHighNoise); - pedestalValuesPhysics[LinkInfo(cruID, globalLinkID)][hwChannel] = adcPedestalPhysics; - thresholdlValuesPhysics[LinkInfo(cruID, globalLinkID)][hwChannel] = adcThresholdPhysics; - // for debugging - // if(!(std::abs(pedestal - fixedSizeToFloat(adcPedestal)) <= 0.5 * 0.25)) { - // printf("%4d %4d %4d %4d %4d: %u %.2f %.4f %.4f\n", cru.number(), globalLinkID, hwChannel, sampa, sampaChannel, adcPedestal, fixedSizeToFloat(adcPedestal), pedestal, pedestal - fixedSizeToFloat(adcPedestal)); - //} - } - } + auto pedestalValues = getDataMap(pedestals); + auto thresholdlValues = getDataMap(thresholds); + auto pedestalValuesPhysics = getDataMap(pedestalsPhys); + auto thresholdlValuesPhysics = getDataMap(thresholdsPhys); // text files const auto outFilePedestalTXT(outputDir + "/pedestal_values.txt"); diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/CalDetMergerPublisherSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/CalDetMergerPublisherSpec.h index 9d365700582b3..b16ef8777193a 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/CalDetMergerPublisherSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/CalDetMergerPublisherSpec.h @@ -23,7 +23,7 @@ namespace o2 namespace tpc { -o2::framework::DataProcessorSpec getCalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool dumpAfterComplete = false); +o2::framework::DataProcessorSpec getCalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool sendToDCS, bool dumpAfterComplete = false); } // namespace tpc } // namespace o2 diff --git a/Detectors/TPC/workflow/src/CalDetMergerPublisherSpec.cxx b/Detectors/TPC/workflow/src/CalDetMergerPublisherSpec.cxx index 9b1e08de521a2..a504ffa606b84 100644 --- a/Detectors/TPC/workflow/src/CalDetMergerPublisherSpec.cxx +++ b/Detectors/TPC/workflow/src/CalDetMergerPublisherSpec.cxx @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -37,6 +38,7 @@ #include "CCDB/CcdbObjectInfo.h" #include "TPCBase/CDBInterface.h" #include "TPCBase/CalDet.h" +#include "TPCBase/CRUCalibHelpers.h" #include "TPCWorkflow/CalibRawPartInfo.h" #include "TPCWorkflow/CalDetMergerPublisherSpec.h" #include "TPCWorkflow/ProcessingHelpers.h" @@ -52,7 +54,7 @@ class CalDetMergerPublisherSpec : public o2::framework::Task using CcdbObjectInfo = o2::ccdb::CcdbObjectInfo; public: - CalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool dumpAfterComplete = false) : mLanesToExpect(lanes), mCalibInfos(lanes), mSkipCCDB(skipCCDB), mPublishAfterComplete(dumpAfterComplete) {} + CalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool sendToDCS, bool dumpAfterComplete = false) : mLanesToExpect(lanes), mCalibInfos(lanes), mSkipCCDB(skipCCDB), mSendToDCS(sendToDCS), mPublishAfterComplete(dumpAfterComplete) {} void init(o2::framework::InitContext& ic) final { @@ -154,10 +156,12 @@ class CalDetMergerPublisherSpec : public o2::framework::Task CDBType mCalDetMapType; ///< calibration type of CalDetMap object uint64_t mRunNumber{0}; ///< processed run number uint32_t mLanesToExpect{0}; ///< number of expected lanes sending data + uint32_t mDCSSpecOffset{32768}; ///< offset for DCS specs bool mForceQuit{false}; ///< for quit after processing finished bool mDirectFileDump{false}; ///< directly dump the calibration data to file bool mPublishAfterComplete{false}; ///< dump calibration directly after data from all lanes received bool mSkipCCDB{false}; ///< skip sending of calibration data + bool mSendToDCS{false}; ///< skip sending of calibration data bool mCheckCalibInfos{false}; ///< check calib infos //____________________________________________________________________________ @@ -170,7 +174,6 @@ class CalDetMergerPublisherSpec : public o2::framework::Task } // perhaps should be changed to time of the run - const auto now = std::chrono::system_clock::now(); const long timeStart = mCalibInfos[0].tfIDInfo.creation + mCalibInfos[0].publishCycle; const long timeEnd = o2::ccdb::CcdbObjectInfo::INFINITE_TIMESTAMP; @@ -193,6 +196,11 @@ class CalDetMergerPublisherSpec : public o2::framework::Task o2::header::DataHeader::SubSpecificationType subSpec{(o2::header::DataHeader::SubSpecificationType)mCalDetMapType}; output.snapshot(Output{clbUtils::gDataOriginCDBPayload, "TPC_CALIB", subSpec}, *image.get()); output.snapshot(Output{clbUtils::gDataOriginCDBWrapper, "TPC_CALIB", subSpec}, w); + + // for pedestal calibration send to DCS if requested + if (mSendToDCS && (mCalDetMapType == CDBType::CalPedestalNoise)) { + sendPedestalNoiseToDCS(output); + } } for (auto& [type, object] : mMergedCalDets) { @@ -238,9 +246,62 @@ class CalDetMergerPublisherSpec : public o2::framework::Task } } } + + void sendPedestalNoiseToDCS(DataAllocator& output) + { + auto sendObject = [this, &output](const CalPad& data, const std::string& path, const std::string& fileNameBase = "") { + const long timeStart = mCalibInfos[0].tfIDInfo.creation + mCalibInfos[0].publishCycle; + const long timeEnd = o2::ccdb::CcdbObjectInfo::INFINITE_TIMESTAMP; + + const auto dataMap = cru_calib_helpers::getDataMap(data); + std::ostringstream dataStr; + cru_calib_helpers::writeValues(dataStr, dataMap); + + std::vector dataVec; + const auto& str = dataStr.str(); + std::copy(str.begin(), str.end(), std::back_inserter(dataVec)); + + o2::ccdb::CcdbObjectInfo w; + + w.setPath(path); + w.setFileName(fmt::format("{}_{}_{}.txt", fileNameBase, mRunNumber, timeStart)); + w.setStartValidityTimestamp(timeStart); + w.setEndValidityTimestamp(timeEnd); + + auto md = w.getMetaData(); + md[o2::base::NameConf::CCDBRunTag.data()] = std::to_string(mRunNumber); + w.setMetaData(md); + + LOGP(info, "Sending object to DCS DB {}/{} of size {} ({}) bytes, valid for {} : {}", w.getPath(), w.getFileName(), dataVec.size(), dataStr.str().size(), w.getStartValidityTimestamp(), w.getEndValidityTimestamp()); + + o2::header::DataHeader::SubSpecificationType subSpec{(o2::header::DataHeader::SubSpecificationType)mCalDetMapType + mDCSSpecOffset}; + output.snapshot(Output{clbUtils::gDataOriginCDBPayload, "TPC_CALIB_DCS", subSpec}, dataVec); + output.snapshot(Output{clbUtils::gDataOriginCDBWrapper, "TPC_CALIB_DCS", subSpec}, w); + }; + + const auto& pedestals = mMergedCalDetsMap.at("Pedestals"); + const auto& noise = mMergedCalDetsMap.at("Noise"); + + bool first = true; + for (auto threshold : {2.5f, 3.f, 3.5f}) { + auto pedestalsThreshold = cru_calib_helpers::preparePedestalFiles(pedestals, noise, {threshold}); + + // pedestals don't depend on threshold, publish on first iteration only + if (first) { + const auto& pedestalsPhys = pedestalsThreshold["PedestalsPhys"]; + sendObject(pedestalsPhys, "TPC/Calib/PedestalsPhys", "Pedestals"); + } + + const auto& thresholdsPhys = pedestalsThreshold["ThresholdMapPhys"]; + const auto fileNameBase = fmt::format("ThresholdsPhys-{:.0f}", threshold * 10); + sendObject(thresholdsPhys, "TPC/Calib/" + fileNameBase, fileNameBase); + + first = false; + } + } }; -o2::framework::DataProcessorSpec o2::tpc::getCalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool dumpAfterComplete) +o2::framework::DataProcessorSpec o2::tpc::getCalDetMergerPublisherSpec(uint32_t lanes, bool skipCCDB, bool sendToDCS, bool dumpAfterComplete) { std::vector outputs; if (!skipCCDB) { @@ -248,6 +309,11 @@ o2::framework::DataProcessorSpec o2::tpc::getCalDetMergerPublisherSpec(uint32_t outputs.emplace_back(ConcreteDataTypeMatcher{clbUtils::gDataOriginCDBWrapper, "TPC_CALIB"}, Lifetime::Sporadic); } + if (sendToDCS) { + outputs.emplace_back(ConcreteDataTypeMatcher{clbUtils::gDataOriginCDBPayload, "TPC_CALIB_DCS"}, Lifetime::Sporadic); + outputs.emplace_back(ConcreteDataTypeMatcher{clbUtils::gDataOriginCDBWrapper, "TPC_CALIB_DCS"}, Lifetime::Sporadic); + } + std::vector inputs; inputs.emplace_back("clbPayload", ConcreteDataTypeMatcher{gDataOriginTPC, "CLBPART"}, Lifetime::Sporadic); inputs.emplace_back("clbInfo", ConcreteDataTypeMatcher{gDataOriginTPC, "CLBPARTINFO"}, Lifetime::Sporadic); @@ -258,11 +324,11 @@ o2::framework::DataProcessorSpec o2::tpc::getCalDetMergerPublisherSpec(uint32_t id.data(), inputs, outputs, - AlgorithmSpec{adaptFromTask(lanes, skipCCDB, dumpAfterComplete)}, + AlgorithmSpec{adaptFromTask(lanes, skipCCDB, sendToDCS, dumpAfterComplete)}, Options{ {"force-quit", VariantType::Bool, false, {"force quit after max-events have been reached"}}, {"direct-file-dump", VariantType::Bool, false, {"directly dump calibration to file"}}, {"check-calib-infos", VariantType::Bool, false, {"make consistency check of calib infos"}}, } // end Options - }; // end DataProcessorSpec + }; // end DataProcessorSpec } diff --git a/Detectors/TPC/workflow/src/tpc-calib-pad-raw.cxx b/Detectors/TPC/workflow/src/tpc-calib-pad-raw.cxx index b58a5f5f84a51..9130e70ace157 100644 --- a/Detectors/TPC/workflow/src/tpc-calib-pad-raw.cxx +++ b/Detectors/TPC/workflow/src/tpc-calib-pad-raw.cxx @@ -61,6 +61,7 @@ void customize(std::vector& workflowOptions) {"configFile", VariantType::String, "", {"configuration file for configurable parameters"}}, {"calib-type", VariantType::String, "pedestal", {"Calibration type to run: pedestal, pulser, ce"}}, {"no-write-ccdb", VariantType::Bool, false, {"skip sending the calibration output to CCDB"}}, + {"send-to-dcs-ccdb", VariantType::Bool, false, {"Send values to DCS DB"}}, {"lanes", VariantType::Int, defaultlanes, {"Number of parallel processing lanes."}}, {"sectors", VariantType::String, sectorDefault.c_str(), {"List of TPC sectors, comma separated ranges, e.g. 0-3,7,9-15"}}, }; @@ -83,6 +84,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) std::string inputSpec = config.options().get("input-spec"); const auto skipCCDB = config.options().get("no-write-ccdb"); + const auto sendToDCS = config.options().get("send-to-dcs-ccdb"); const auto publishAfterTFs = config.options().get("publish-after-tfs"); const auto tpcsectors = o2::RangeTokenizer::tokenize(config.options().get("sectors")); @@ -121,7 +123,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) workflow.emplace_back(getTPCCalibPadRawSpec(inputSpec, ilane, range, publishAfterTFs, rawType)); } - workflow.emplace_back(getCalDetMergerPublisherSpec(nLanes, skipCCDB, publishAfterTFs > 0)); + workflow.emplace_back(getCalDetMergerPublisherSpec(nLanes, skipCCDB, sendToDCS, publishAfterTFs > 0)); return workflow; } From 5cecce865b06547ec792c39bf0222d6476bb0798 Mon Sep 17 00:00:00 2001 From: aferrero2707 Date: Fri, 28 Feb 2025 09:49:43 +0100 Subject: [PATCH 0135/1914] [MCH] enable mapping fixes in ASYNC mode The MCHDigitModifier.updateST1=true and MCHDigitModifier.updateST2=true options are added to the MCH reco workflow for the ASYNC processing of real data. This enables the code that fixes the readout mapping in existing CTFs for some parts of ST1 and ST2 detectors. The mapping has also been corrected in the O2 code. Therefore the remapping is disabled for SYNC processing and MC simulations, since they already apply the correct mapping to generate the digits. --- prodtests/full-system-test/dpl-workflow.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index ef09bb76c9658..0f5083dbcdebb 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -142,6 +142,10 @@ else elif [[ $BEAMTYPE == "PbPb" ]]; then ITS_CONFIG_KEY+="ITSVertexerParam.lowMultBeamDistCut=0;ITSCATrackerParam.nROFsPerIterations=12;ITSCATrackerParam.perPrimaryVertexProcessing=true;" fi + + if [[ $IS_SIMULATED_DATA == 0 && $CTFINPUT == 1 ]]; then # Enable fixes to the MCH readout mapping for async processing of real data + MCH_CONFIG_KEY+="MCHDigitModifier.updateST1=true;MCHDigitModifier.updateST2=true;" + fi fi [[ $CTFINPUT == 1 ]] && GPU_CONFIG_KEY+="GPU_proc.tpcInputWithClusterRejection=1;" [[ ! -z $NTRDTRKTHREADS ]] && TRD_CONFIG_KEY+="GPU_proc.ompThreads=$NTRDTRKTHREADS;" From 7c97d71be2bfb26275a35e7acdc01c5abc8eb19c Mon Sep 17 00:00:00 2001 From: nivram-phy Date: Sat, 1 Mar 2025 17:16:52 +0100 Subject: [PATCH 0136/1914] Merge noise branch (#13992) * Adding functionalities to produced merged noise map objects * Adding functionalities to produced merged noise map objects * Clang format * Fixing typo * fixing typos * Please consider the following formatting changes * fixing typos * Please consider the following formatting changes * fixing typos in copyright issue notice * making small changes to make code more stable * fixing typos * Please consider the following formatting changes * modifying checks on validtime * modifying checks on validtime * Adding ccdbmerged object to the right call function * fixing space issues * fixing space issues * fixing space issues * fixing space issues * copyright header issue * clang format issue * Update NoiseCalibratorSpec.cxx --------- Co-authored-by: Niveditha Ramasubramanian Co-authored-by: ALICE Action Bot --- Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx index 8c79dcfebd8db..617e02ebb9d9c 100644 --- a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx +++ b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx @@ -83,6 +83,8 @@ void NoiseCalibratorSpec::run(ProcessingContext& pc) } else { LOG(info) << "Sending an object to Production-CCDB and DCS-CCDB"; sendOutputCcdbDcs(pc.outputs()); + LOG(info) << "Sending an object to Production-CCDBMerge"; + sendOutputCcdbMerge(pc.outputs()); } pc.services().get().readyToQuit(mStopMeOnly ? QuitRequest::Me : QuitRequest::All); } @@ -105,6 +107,8 @@ void NoiseCalibratorSpec::run(ProcessingContext& pc) } else { LOG(info) << "Sending an object to Production-CCDB and DCS-CCDB"; sendOutputCcdbDcs(pc.outputs()); + LOG(info) << "Sending an object to Production-CCDBMerge"; + sendOutputCcdbMerge(pc.outputs()); } pc.services().get().readyToQuit(mStopMeOnly ? QuitRequest::Me : QuitRequest::All); } From 9a3eb3ee117bb6bca491923dfd8534451de36d72 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:28:21 +0100 Subject: [PATCH 0137/1914] DPL: improve flat file support for RNTuple --- Framework/AnalysisSupport/src/Plugin.cxx | 40 ++++++++++++++++++++---- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/Framework/AnalysisSupport/src/Plugin.cxx b/Framework/AnalysisSupport/src/Plugin.cxx index e3a39761e8049..033adc461c600 100644 --- a/Framework/AnalysisSupport/src/Plugin.cxx +++ b/Framework/AnalysisSupport/src/Plugin.cxx @@ -85,20 +85,48 @@ std::vector getListOfTables(std::unique_ptr& f) { std::vector r; TList* keyList = f->GetListOfKeys(); + // We should handle two cases, one where the list of tables in a TDirectory, + // the other one where the dataframe number is just a prefix + std::string first = ""; for (auto key : *keyList) { - if (!std::string_view(key->GetName()).starts_with("DF_")) { + if (!std::string_view(key->GetName()).starts_with("DF_") && !std::string_view(key->GetName()).starts_with("/DF_")) { continue; } - auto* d = (TDirectory*)f->Get(key->GetName()); - TList* branchList = d->GetListOfKeys(); - for (auto b : *branchList) { - r.emplace_back(b->GetName()); + auto* d = (TDirectory*)f->GetObjectChecked(key->GetName(), TClass::GetClass("TDirectory")); + // Objects are in a folder, list it. + if (d) { + TList* branchList = d->GetListOfKeys(); + for (auto b : *branchList) { + r.emplace_back(b->GetName()); + } + break; + } + + void* v = f->GetObjectChecked(key->GetName(), TClass::GetClass("ROOT::Experimental::RNTuple")); + if (v) { + std::string s = key->GetName(); + size_t pos = s.find('-'); + // Check if '-' is found + // Skip metaData and parentFiles + if (pos == std::string::npos) { + continue; + } + std::string t = s.substr(pos + 1); + // If we find a duplicate table name, it means we are in the next DF and we can stop. + if (t == first) { + break; + } + if (first.empty()) { + first = t; + } + // Create a new string starting after the '-' + r.emplace_back(t); } - break; } return r; } + auto readMetadata(std::unique_ptr& currentFile) -> std::vector { // Get the metadata, if any From 8c306c388a25893d5064fafb3ede56cae7d110da Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:28:21 +0100 Subject: [PATCH 0138/1914] DPL: Move DataInputDirector to arrow::Dataset API --- .../src/AODJAlienReaderHelpers.cxx | 16 +- .../AnalysisSupport/src/DataInputDirector.cxx | 192 ++++++++++++------ .../AnalysisSupport/src/DataInputDirector.h | 20 +- Framework/AnalysisSupport/src/TTreePlugin.cxx | 4 + .../TestWorkflows/src/o2TestHistograms.cxx | 4 +- 5 files changed, 155 insertions(+), 81 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx index 9c19de85739ce..f8a9705e4eb62 100644 --- a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx @@ -10,10 +10,12 @@ // or submit itself to any jurisdiction. #include "AODJAlienReaderHelpers.h" +#include #include "Framework/TableTreeHelpers.h" #include "Framework/AnalysisHelpers.h" #include "Framework/DataProcessingStats.h" #include "Framework/RootTableBuilderHelpers.h" +#include "Framework/RootArrowFilesystem.h" #include "Framework/AlgorithmSpec.h" #include "Framework/ConfigParamRegistry.h" #include "Framework/ControlService.h" @@ -41,6 +43,8 @@ #include #include #include +#include +#include using namespace o2; using namespace o2::aod; @@ -272,11 +276,13 @@ AlgorithmSpec AODJAlienReaderHelpers::rootFileReaderCallback(ConfigContext const // Origin file name for derived output map auto o2 = Output(TFFileNameHeader); auto fileAndFolder = didir->getFileFolder(dh, fcnt, ntf); - std::string currentFilename(fileAndFolder.file->GetName()); - if (strcmp(fileAndFolder.file->GetEndpointUrl()->GetProtocol(), "file") == 0 && fileAndFolder.file->GetEndpointUrl()->GetFile()[0] != '/') { + auto rootFS = std::dynamic_pointer_cast(fileAndFolder.filesystem()); + auto* f = dynamic_cast(rootFS->GetFile()); + std::string currentFilename(f->GetFile()->GetName()); + if (strcmp(f->GetEndpointUrl()->GetProtocol(), "file") == 0 && f->GetEndpointUrl()->GetFile()[0] != '/') { // This is not an absolute local path. Make it absolute. static std::string pwd = gSystem->pwd() + std::string("/"); - currentFilename = pwd + std::string(fileAndFolder.file->GetName()); + currentFilename = pwd + std::string(f->GetName()); } outputs.make(o2) = currentFilename; } @@ -312,7 +318,9 @@ AlgorithmSpec AODJAlienReaderHelpers::rootFileReaderCallback(ConfigContext const auto concrete = DataSpecUtils::asConcreteDataMatcher(firstRoute.matcher); auto dh = header::DataHeader(concrete.description, concrete.origin, concrete.subSpec); auto fileAndFolder = didir->getFileFolder(dh, fcnt, ntf); - if (!fileAndFolder.file) { + + // In case the filesource is empty, move to the next one. + if (fileAndFolder.path().empty()) { fcnt += 1; ntf = 0; if (didir->atEnd(fcnt)) { diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 172ecd66c0e64..dd0238af8ddc0 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -11,6 +11,8 @@ #include "DataInputDirector.h" #include "Framework/DataDescriptorQueryBuilder.h" #include "Framework/Logger.h" +#include "Framework/PluginManager.h" +#include "Framework/RootArrowFilesystem.h" #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/Output.h" #include "Headers/DataHeader.h" @@ -26,8 +28,12 @@ #include "TGrid.h" #include "TObjString.h" #include "TMap.h" +#include "TFile.h" +#include +#include #include +#include #if __has_include() #include @@ -47,12 +53,27 @@ FileNameHolder* makeFileNameHolder(std::string fileName) return fileNameHolder; } -DataInputDescriptor::DataInputDescriptor(bool alienSupport, int level, o2::monitoring::Monitoring* monitoring, int allowedParentLevel, std::string parentFileReplacement) : mAlienSupport(alienSupport), - mMonitoring(monitoring), - mAllowedParentLevel(allowedParentLevel), - mParentFileReplacement(std::move(parentFileReplacement)), - mLevel(level) +DataInputDescriptor::DataInputDescriptor(bool alienSupport, int level, o2::monitoring::Monitoring* monitoring, int allowedParentLevel, std::string parentFileReplacement) + : mAlienSupport(alienSupport), + mMonitoring(monitoring), + mAllowedParentLevel(allowedParentLevel), + mParentFileReplacement(std::move(parentFileReplacement)), + mLevel(level) { + std::vector capabilitiesSpecs = { + "O2Framework:RNTupleObjectReadingCapability", + "O2Framework:TTreeObjectReadingCapability", + }; + + std::vector plugins; + for (auto spec : capabilitiesSpecs) { + auto morePlugins = PluginManager::parsePluginSpecString(spec); + for (auto& extra : morePlugins) { + plugins.push_back(extra); + } + } + + PluginManager::loadFromPlugin(plugins, mFactory.capabilities); } void DataInputDescriptor::printOut() @@ -108,20 +129,22 @@ bool DataInputDescriptor::setFile(int counter) // open file auto filename = mfilenames[counter]->fileName; - if (mcurrentFile) { - if (mcurrentFile->GetName() == filename) { + auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); + if (rootFS.get()) { + if (rootFS->GetFile()->GetName() == filename) { return true; } closeInputFile(); } - mcurrentFile = TFile::Open(filename.c_str()); - if (!mcurrentFile) { + + mCurrentFilesystem = std::make_shared(TFile::Open(filename.c_str()), 50 * 1024 * 1024, mFactory); + if (!mCurrentFilesystem.get()) { throw std::runtime_error(fmt::format("Couldn't open file \"{}\"!", filename)); } - mcurrentFile->SetReadaheadSize(50 * 1024 * 1024); + rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); // get the parent file map if exists - mParentFileMap = (TMap*)mcurrentFile->Get("parentFiles"); // folder name (DF_XXX) --> parent file (absolute path) + mParentFileMap = (TMap*)rootFS->GetFile()->Get("parentFiles"); // folder name (DF_XXX) --> parent file (absolute path) if (mParentFileMap && !mParentFileReplacement.empty()) { auto pos = mParentFileReplacement.find(';'); if (pos == std::string::npos) { @@ -140,16 +163,28 @@ bool DataInputDescriptor::setFile(int counter) // get the directory names if (mfilenames[counter]->numberOfTimeFrames <= 0) { - std::regex TFRegex = std::regex("DF_[0-9]+"); - TList* keyList = mcurrentFile->GetListOfKeys(); + const std::regex TFRegex = std::regex("/?DF_([0-9]+)(|-.*)$"); + TList* keyList = rootFS->GetFile()->GetListOfKeys(); + std::vector finalList; // extract TF numbers and sort accordingly + // We use an extra seen set to make sure we preserve the order in which + // we instert things in the final list and to make sure we do not have duplicates. + // Multiple folder numbers can happen if we use a flat structure /DF_- + std::unordered_set seen; for (auto key : *keyList) { - if (std::regex_match(((TObjString*)key)->GetString().Data(), TFRegex)) { - auto folderNumber = std::stoul(std::string(((TObjString*)key)->GetString().Data()).substr(3)); - mfilenames[counter]->listOfTimeFrameNumbers.emplace_back(folderNumber); + std::smatch matchResult; + std::string keyName = ((TObjString*)key)->GetString().Data(); + bool match = std::regex_match(keyName, matchResult, TFRegex); + if (match) { + auto folderNumber = std::stoul(matchResult[1].str()); + if (seen.find(folderNumber) == seen.end()) { + seen.insert(folderNumber); + mfilenames[counter]->listOfTimeFrameNumbers.emplace_back(folderNumber); + } } } + if (mParentFileMap != nullptr) { // If we have a parent map, we should not process in DF alphabetical order but according to parent file to avoid swapping between files std::sort(mfilenames[counter]->listOfTimeFrameNumbers.begin(), mfilenames[counter]->listOfTimeFrameNumbers.end(), @@ -162,12 +197,8 @@ bool DataInputDescriptor::setFile(int counter) std::sort(mfilenames[counter]->listOfTimeFrameNumbers.begin(), mfilenames[counter]->listOfTimeFrameNumbers.end()); } - for (auto folderNumber : mfilenames[counter]->listOfTimeFrameNumbers) { - auto folderName = "DF_" + std::to_string(folderNumber); - mfilenames[counter]->listOfTimeFrameKeys.emplace_back(folderName); - mfilenames[counter]->alreadyRead.emplace_back(false); - } - mfilenames[counter]->numberOfTimeFrames = mfilenames[counter]->listOfTimeFrameKeys.size(); + mfilenames[counter]->alreadyRead.resize(mfilenames[counter]->alreadyRead.size() + mfilenames[counter]->listOfTimeFrameNumbers.size(), false); + mfilenames[counter]->numberOfTimeFrames = mfilenames[counter]->listOfTimeFrameNumbers.size(); } mCurrentFileID = counter; @@ -193,26 +224,21 @@ uint64_t DataInputDescriptor::getTimeFrameNumber(int counter, int numTF) return (mfilenames[counter]->listOfTimeFrameNumbers)[numTF]; } -FileAndFolder DataInputDescriptor::getFileFolder(int counter, int numTF) +arrow::dataset::FileSource DataInputDescriptor::getFileFolder(int counter, int numTF) { - FileAndFolder fileAndFolder; - // open file if (!setFile(counter)) { - return fileAndFolder; + return {}; } // no TF left if (mfilenames[counter]->numberOfTimeFrames > 0 && numTF >= mfilenames[counter]->numberOfTimeFrames) { - return fileAndFolder; + return {}; } - fileAndFolder.file = mcurrentFile; - fileAndFolder.folderName = (mfilenames[counter]->listOfTimeFrameKeys)[numTF]; - mfilenames[counter]->alreadyRead[numTF] = true; - return fileAndFolder; + return {fmt::format("DF_{}", mfilenames[counter]->listOfTimeFrameNumbers[numTF]), mCurrentFilesystem}; } DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, std::string treename) @@ -221,17 +247,19 @@ DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, // This file has no parent map return nullptr; } - auto folderName = (mfilenames[counter]->listOfTimeFrameKeys)[numTF]; + auto folderName = fmt::format("DF_{}", mfilenames[counter]->listOfTimeFrameNumbers[numTF]); auto parentFileName = (TObjString*)mParentFileMap->GetValue(folderName.c_str()); + // The current DF is not found in the parent map (this should not happen and is a fatal error) + auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); if (!parentFileName) { - // The current DF is not found in the parent map (this should not happen and is a fatal error) - throw std::runtime_error(fmt::format(R"(parent file map exists but does not contain the current DF "{}" in file "{}")", folderName.c_str(), mcurrentFile->GetName())); + throw std::runtime_error(fmt::format(R"(parent file map exists but does not contain the current DF "{}" in file "{}")", folderName.c_str(), rootFS->GetFile()->GetName())); return nullptr; } if (mParentFile) { // Is this still the corresponding to the correct file? - if (parentFileName->GetString().CompareTo(mParentFile->mcurrentFile->GetName()) == 0) { + auto parentRootFS = std::dynamic_pointer_cast(mParentFile->mCurrentFilesystem); + if (parentFileName->GetString().CompareTo(parentRootFS->GetFile()->GetName()) == 0) { return mParentFile; } else { mParentFile->closeInputFile(); @@ -241,7 +269,8 @@ DataInputDescriptor* DataInputDescriptor::getParentFile(int counter, int numTF, } if (mLevel == mAllowedParentLevel) { - throw std::runtime_error(fmt::format(R"(while looking for tree "{}", the parent file was requested but we are already at level {} of maximal allowed level {} for DF "{}" in file "{}")", treename.c_str(), mLevel, mAllowedParentLevel, folderName.c_str(), mcurrentFile->GetName())); + throw std::runtime_error(fmt::format(R"(while looking for tree "{}", the parent file was requested but we are already at level {} of maximal allowed level {} for DF "{}" in file "{}")", treename.c_str(), mLevel, mAllowedParentLevel, folderName.c_str(), + rootFS->GetFile()->GetName())); } LOGP(info, "Opening parent file {} for DF {}", parentFileName->GetString().Data(), folderName.c_str()); @@ -270,11 +299,13 @@ void DataInputDescriptor::printFileStatistics() if (wait_time < 0) { wait_time = 0; } - std::string monitoringInfo(fmt::format("lfn={},size={},total_df={},read_df={},read_bytes={},read_calls={},io_time={:.1f},wait_time={:.1f},level={}", mcurrentFile->GetName(), - mcurrentFile->GetSize(), getTimeFramesInFile(mCurrentFileID), getReadTimeFramesInFile(mCurrentFileID), mcurrentFile->GetBytesRead(), mcurrentFile->GetReadCalls(), + auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); + auto f = dynamic_cast(rootFS->GetFile()); + std::string monitoringInfo(fmt::format("lfn={},size={},total_df={},read_df={},read_bytes={},read_calls={},io_time={:.1f},wait_time={:.1f},level={}", f->GetName(), + f->GetSize(), getTimeFramesInFile(mCurrentFileID), getReadTimeFramesInFile(mCurrentFileID), f->GetBytesRead(), f->GetReadCalls(), ((float)mIOTime / 1e9), ((float)wait_time / 1e9), mLevel)); #if __has_include() - auto alienFile = dynamic_cast(mcurrentFile); + auto alienFile = dynamic_cast(f); if (alienFile) { monitoringInfo += fmt::format(",se={},open_time={:.1f}", alienFile->GetSE(), alienFile->GetElapsed()); } @@ -285,7 +316,7 @@ void DataInputDescriptor::printFileStatistics() void DataInputDescriptor::closeInputFile() { - if (mcurrentFile) { + if (mCurrentFilesystem.get()) { if (mParentFile) { mParentFile->closeInputFile(); delete mParentFile; @@ -296,9 +327,7 @@ void DataInputDescriptor::closeInputFile() mParentFileMap = nullptr; printFileStatistics(); - mcurrentFile->Close(); - delete mcurrentFile; - mcurrentFile = nullptr; + mCurrentFilesystem.reset(); } } @@ -346,8 +375,8 @@ int DataInputDescriptor::fillInputfiles() int DataInputDescriptor::findDFNumber(int file, std::string dfName) { - auto dfList = mfilenames[file]->listOfTimeFrameKeys; - auto it = std::find(dfList.begin(), dfList.end(), dfName); + auto dfList = mfilenames[file]->listOfTimeFrameNumbers; + auto it = std::find_if(dfList.begin(), dfList.end(), [dfName](size_t i) { return fmt::format("DF_{}", i) == dfName; }); if (it == dfList.end()) { return -1; } @@ -358,40 +387,75 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh { auto ioStart = uv_hrtime(); - auto fileAndFolder = getFileFolder(counter, numTF); - if (!fileAndFolder.file) { + auto folder = getFileFolder(counter, numTF); + if (!folder.filesystem()) { return false; } - auto fullpath = fileAndFolder.folderName + "/" + treename; - auto tree = (TTree*)fileAndFolder.file->Get(fullpath.c_str()); + auto rootFS = std::dynamic_pointer_cast(folder.filesystem()); + + if (!rootFS) { + throw std::runtime_error(fmt::format(R"(Not a TFile filesystem!)")); + } + // FIXME: Ugly. We should detect the format from the treename, good enough for now. + std::shared_ptr format; + + auto fullpath = arrow::dataset::FileSource{folder.path() + "/" + treename, folder.filesystem()}; + + for (auto& capability : mFactory.capabilities) { + auto objectPath = capability.lfn2objectPath(fullpath.path()); + void* handle = capability.getHandle(rootFS, objectPath); + if (handle) { + format = capability.factory().format(); + break; + } + } + + if (!format) { + throw std::runtime_error(fmt::format(R"(Cannot find a viable format for object {}!)", fullpath.path())); + } + + auto schemaOpt = format->Inspect(fullpath); + auto physicalSchema = schemaOpt; + std::vector> fields; + for (auto& original : (*schemaOpt)->fields()) { + if (original->name().ends_with("_size")) { + continue; + } + fields.push_back(original); + } + auto datasetSchema = std::make_shared(fields); + + auto fragment = format->MakeFragment(fullpath, {}, *physicalSchema); - if (!tree) { - LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.c_str()); + if (!fragment.ok()) { + LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); auto parentFile = getParentFile(counter, numTF, treename); if (parentFile != nullptr) { - int parentNumTF = parentFile->findDFNumber(0, fileAndFolder.folderName); + int parentNumTF = parentFile->findDFNumber(0, folder.path()); if (parentNumTF == -1) { - throw std::runtime_error(fmt::format(R"(DF {} listed in parent file map but not found in the corresponding file "{}")", fileAndFolder.folderName, parentFile->mcurrentFile->GetName())); + auto parentRootFS = std::dynamic_pointer_cast(parentFile->mCurrentFilesystem); + throw std::runtime_error(fmt::format(R"(DF {} listed in parent file map but not found in the corresponding file "{}")", folder.path(), parentRootFS->GetFile()->GetName())); } // first argument is 0 as the parent file object contains only 1 file return parentFile->readTree(outputs, dh, 0, parentNumTF, treename, totalSizeCompressed, totalSizeUncompressed); } - throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}". Please check https://aliceo2group.github.io/analysis-framework/docs/troubleshooting/#tree-not-found for more information.)", fileAndFolder.folderName + "/" + treename, fileAndFolder.file->GetName())); + auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); + throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}". Please check https://aliceo2group.github.io/analysis-framework/docs/troubleshooting/#tree-not-found for more information.)", fullpath.path(), rootFS->GetFile()->GetName())); } // create table output auto o = Output(dh); - auto t2t = outputs.make(o); - // add branches to read - // fill the table - t2t->setLabel(tree->GetName()); - totalSizeCompressed += tree->GetZipBytes(); - totalSizeUncompressed += tree->GetTotBytes(); - t2t->addAllColumns(tree); - t2t->fill(tree); - delete tree; + // FIXME: This should allow me to create a memory pool + // which I can then use to scan the dataset. + // + auto f2b = outputs.make(o); + + //// add branches to read + //// fill the table + f2b->setLabel(treename.c_str()); + f2b->fill(*fragment, datasetSchema, format); mIOTime += (uv_hrtime() - ioStart); @@ -693,7 +757,7 @@ DataInputDescriptor* DataInputDirector::getDataInputDescriptor(header::DataHeade return result; } -FileAndFolder DataInputDirector::getFileFolder(header::DataHeader dh, int counter, int numTF) +arrow::dataset::FileSource DataInputDirector::getFileFolder(header::DataHeader dh, int counter, int numTF) { auto didesc = getDataInputDescriptor(dh); // if NOT match then use defaultDataInputDescriptor diff --git a/Framework/AnalysisSupport/src/DataInputDirector.h b/Framework/AnalysisSupport/src/DataInputDirector.h index eca0ef195d111..9bab29db3ff24 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.h +++ b/Framework/AnalysisSupport/src/DataInputDirector.h @@ -15,6 +15,10 @@ #include "Framework/DataDescriptorMatcher.h" #include "Framework/DataAllocator.h" +#include "Framework/RootArrowFilesystem.h" + +#include +#include #include #include "rapidjson/fwd.h" @@ -31,16 +35,10 @@ struct FileNameHolder { std::string fileName; int numberOfTimeFrames = 0; std::vector listOfTimeFrameNumbers; - std::vector listOfTimeFrameKeys; std::vector alreadyRead; }; FileNameHolder* makeFileNameHolder(std::string fileName); -struct FileAndFolder { - TFile* file = nullptr; - std::string folderName = ""; -}; - class DataInputDescriptor { /// Holds information concerning the reading of an aod table. @@ -52,7 +50,6 @@ class DataInputDescriptor std::string treename = ""; std::unique_ptr matcher; - DataInputDescriptor() = default; DataInputDescriptor(bool alienSupport, int level, o2::monitoring::Monitoring* monitoring = nullptr, int allowedParentLevel = 0, std::string parentFileReplacement = ""); void printOut(); @@ -78,7 +75,7 @@ class DataInputDescriptor int findDFNumber(int file, std::string dfName); uint64_t getTimeFrameNumber(int counter, int numTF); - FileAndFolder getFileFolder(int counter, int numTF); + arrow::dataset::FileSource getFileFolder(int counter, int numTF); DataInputDescriptor* getParentFile(int counter, int numTF, std::string treename); int getTimeFramesInFile(int counter); int getReadTimeFramesInFile(int counter); @@ -90,6 +87,7 @@ class DataInputDescriptor bool isAlienSupportOn() { return mAlienSupport; } private: + o2::framework::RootObjectReadingFactory mFactory; std::string minputfilesFile = ""; std::string* minputfilesFilePtr = nullptr; std::string mFilenameRegex = ""; @@ -98,7 +96,7 @@ class DataInputDescriptor std::string mParentFileReplacement; std::vector mfilenames; std::vector* mdefaultFilenamesPtr = nullptr; - TFile* mcurrentFile = nullptr; + std::shared_ptr mCurrentFilesystem; int mCurrentFileID = -1; bool mAlienSupport = false; @@ -127,7 +125,6 @@ class DataInputDirector ~DataInputDirector(); void reset(); - void createDefaultDataInputDescriptor(); void printOut(); bool atEnd(int counter); @@ -140,10 +137,11 @@ class DataInputDirector // getters DataInputDescriptor* getDataInputDescriptor(header::DataHeader dh); int getNumberInputDescriptors() { return mdataInputDescriptors.size(); } + void createDefaultDataInputDescriptor(); bool readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, size_t& totalSizeCompressed, size_t& totalSizeUncompressed); uint64_t getTimeFrameNumber(header::DataHeader dh, int counter, int numTF); - FileAndFolder getFileFolder(header::DataHeader dh, int counter, int numTF); + arrow::dataset::FileSource getFileFolder(header::DataHeader dh, int counter, int numTF); int getTimeFramesInFile(header::DataHeader dh, int counter); uint64_t getTotalSizeCompressed(); diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 881f7d6edc117..f36f309404699 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -633,6 +633,10 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( mappings.push_back({physicalFieldIdx, physicalFieldIdx - 1, fi}); opsCount += 2; } else { + if (physicalFieldIdx > 1) { + O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Field %{public}s previous field is %{public}s.", dataset_field->name().c_str(), + physical_schema->field(physicalFieldIdx - 1)->name().c_str()); + } mappings.push_back({physicalFieldIdx, -1, fi}); opsCount++; } diff --git a/Framework/TestWorkflows/src/o2TestHistograms.cxx b/Framework/TestWorkflows/src/o2TestHistograms.cxx index efac16f6da4f0..2ec268130267b 100644 --- a/Framework/TestWorkflows/src/o2TestHistograms.cxx +++ b/Framework/TestWorkflows/src/o2TestHistograms.cxx @@ -40,7 +40,7 @@ struct EtaAndClsHistogramsSimple { OutputObj etaClsH{TH2F("eta_vs_pt", "#eta vs pT", 102, -2.01, 2.01, 100, 0, 10)}; Produces skimEx; - void process(aod::Tracks const& tracks) + void process(aod::Tracks const& tracks, aod::FT0s const&) { LOGP(info, "Invoking the simple one"); for (auto& track : tracks) { @@ -54,7 +54,7 @@ struct EtaAndClsHistogramsIUSimple { OutputObj etaClsH{TH2F("eta_vs_pt", "#eta vs pT", 102, -2.01, 2.01, 100, 0, 10)}; Produces skimEx; - void process(aod::TracksIU const& tracks) + void process(aod::TracksIU const& tracks, aod::FT0s const&) { LOGP(info, "Invoking the simple one"); for (auto& track : tracks) { From 5d76679553e51fd05bbb9200a0e76faf5f615ea8 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 19 Feb 2025 14:28:21 +0100 Subject: [PATCH 0139/1914] DPL: add support for decompressing directly to shared memory This PR postpones the read operations which would usually populate an intermediate RecordBatch and it performs them directly on its subsequent shared memory serialization. Doing so avoids having the intermediate representation allocate most of the memory. For the moment this is only done for TTree. RNtuple support will come in a subsequent PR. --- .../AnalysisSupport/src/DataInputDirector.cxx | 7 ++- .../Core/include/Framework/TableTreeHelpers.h | 16 +++++- Framework/Core/src/DataAllocator.cxx | 57 ++++++++----------- Framework/Core/src/TableTreeHelpers.cxx | 14 +++-- 4 files changed, 50 insertions(+), 44 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index dd0238af8ddc0..1daab029b3e8e 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -399,6 +399,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh } // FIXME: Ugly. We should detect the format from the treename, good enough for now. std::shared_ptr format; + FragmentToBatch::StreamerCreator creator = nullptr; auto fullpath = arrow::dataset::FileSource{folder.path() + "/" + treename, folder.filesystem()}; @@ -407,6 +408,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh void* handle = capability.getHandle(rootFS, objectPath); if (handle) { format = capability.factory().format(); + creator = capability.factory().deferredOutputStreamer; break; } } @@ -449,13 +451,12 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh // FIXME: This should allow me to create a memory pool // which I can then use to scan the dataset. - // - auto f2b = outputs.make(o); + auto f2b = outputs.make(o, creator, *fragment); //// add branches to read //// fill the table f2b->setLabel(treename.c_str()); - f2b->fill(*fragment, datasetSchema, format); + f2b->fill(datasetSchema, format); mIOTime += (uv_hrtime() - ioStart); diff --git a/Framework/Core/include/Framework/TableTreeHelpers.h b/Framework/Core/include/Framework/TableTreeHelpers.h index c6a769e579fb7..92725d186ee33 100644 --- a/Framework/Core/include/Framework/TableTreeHelpers.h +++ b/Framework/Core/include/Framework/TableTreeHelpers.h @@ -11,6 +11,8 @@ #ifndef O2_FRAMEWORK_TABLETREEHELPERS_H_ #define O2_FRAMEWORK_TABLETREEHELPERS_H_ +#include +#include #include #include "TFile.h" #include "TTreeReader.h" @@ -146,15 +148,25 @@ class TreeToTable class FragmentToBatch { public: - FragmentToBatch(arrow::MemoryPool* pool = arrow::default_memory_pool()); + // The function to be used to create the required stream. + using StreamerCreator = std::function(std::shared_ptr, const std::shared_ptr& buffer)>; + + FragmentToBatch(StreamerCreator, std::shared_ptr, arrow::MemoryPool* pool = arrow::default_memory_pool()); void setLabel(const char* label); - void fill(std::shared_ptr, std::shared_ptr dataSetSchema, std::shared_ptr); + void fill(std::shared_ptr dataSetSchema, std::shared_ptr); std::shared_ptr finalize(); + std::shared_ptr streamer(std::shared_ptr buffer) + { + return mCreator(mFragment, buffer); + } + private: + std::shared_ptr mFragment; arrow::MemoryPool* mArrowMemoryPool = nullptr; std::string mTableLabel; std::shared_ptr mRecordBatch; + StreamerCreator mCreator; }; // ----------------------------------------------------------------------------- diff --git a/Framework/Core/src/DataAllocator.cxx b/Framework/Core/src/DataAllocator.cxx index c310892c4c490..b735eee1f3308 100644 --- a/Framework/Core/src/DataAllocator.cxx +++ b/Framework/Core/src/DataAllocator.cxx @@ -211,34 +211,6 @@ void doWriteTable(std::shared_ptr b, arrow::Table* table) } } -void doWriteBatch(std::shared_ptr b, arrow::RecordBatch* batch) -{ - auto mock = std::make_shared(); - int64_t expectedSize = 0; - auto mockWriter = arrow::ipc::MakeStreamWriter(mock.get(), batch->schema()); - arrow::Status outStatus = mockWriter.ValueOrDie()->WriteRecordBatch(*batch); - - expectedSize = mock->Tell().ValueOrDie(); - auto reserve = b->Reserve(expectedSize); - if (reserve.ok() == false) { - throw std::runtime_error("Unable to reserve memory for table"); - } - - auto stream = std::make_shared(b); - // This is a copy maybe we can finally get rid of it by having using the - // dataset API? - auto outBatch = arrow::ipc::MakeStreamWriter(stream.get(), batch->schema()); - if (outBatch.ok() == false) { - throw ::std::runtime_error("Unable to create batch writer"); - } - - outStatus = outBatch.ValueOrDie()->WriteRecordBatch(*batch); - - if (outStatus.ok() == false) { - throw std::runtime_error("Unable to Write batch"); - } -} - void DataAllocator::adopt(const Output& spec, LifetimeHolder& tb) { auto& timingInfo = mRegistry.get(); @@ -318,16 +290,35 @@ void DataAllocator::adopt(const Output& spec, LifetimeHolder& f // Serialization happens in here, so that we can // get rid of the intermediate tree 2 table object, saving memory. auto batch = source.finalize(); - doWriteBatch(buffer, batch.get()); + auto mock = std::make_shared(); + int64_t expectedSize = 0; + auto mockWriter = arrow::ipc::MakeStreamWriter(mock.get(), batch->schema()); + arrow::Status outStatus = mockWriter.ValueOrDie()->WriteRecordBatch(*batch); + + expectedSize = mock->Tell().ValueOrDie(); + auto reserve = buffer->Reserve(expectedSize); + if (reserve.ok() == false) { + throw std::runtime_error("Unable to reserve memory for table"); + } + + auto deferredWriterStream = source.streamer(buffer); + + auto outBatch = arrow::ipc::MakeStreamWriter(deferredWriterStream, batch->schema()); + if (outBatch.ok() == false) { + throw ::std::runtime_error("Unable to create batch writer"); + } + + outStatus = outBatch.ValueOrDie()->WriteRecordBatch(*batch); + + if (outStatus.ok() == false) { + throw std::runtime_error("Unable to Write batch"); + } // deletion happens in the caller }; - /// To finalise this we write the table to the buffer. - /// FIXME: most likely not a great idea. We should probably write to the buffer - /// directly in the TableBuilder, incrementally. auto finalizer = [](std::shared_ptr b) -> void { // This is empty because we already serialised the object when - // the LifetimeHolder goes out of scope. + // the LifetimeHolder goes out of scope. See code above. }; context.addBuffer(std::move(header), buffer, std::move(finalizer), routeIndex); diff --git a/Framework/Core/src/TableTreeHelpers.cxx b/Framework/Core/src/TableTreeHelpers.cxx index 2f23c07aea451..84d4ff171bc39 100644 --- a/Framework/Core/src/TableTreeHelpers.cxx +++ b/Framework/Core/src/TableTreeHelpers.cxx @@ -13,7 +13,6 @@ #include "Framework/Endian.h" #include "Framework/Signpost.h" -#include "arrow/type_traits.h" #include #include #include @@ -533,7 +532,7 @@ void TreeToTable::setLabel(const char* label) mTableLabel = label; } -void TreeToTable::fill(TTree*tree) +void TreeToTable::fill(TTree* tree) { std::vector> columns; std::vector> fields; @@ -569,8 +568,10 @@ std::shared_ptr TreeToTable::finalize() return mTable; } -FragmentToBatch::FragmentToBatch(arrow::MemoryPool* pool) - : mArrowMemoryPool{pool} +FragmentToBatch::FragmentToBatch(StreamerCreator creator, std::shared_ptr fragment, arrow::MemoryPool* pool) + : mFragment{std::move(fragment)}, + mArrowMemoryPool{pool}, + mCreator{std::move(creator)} { } @@ -579,13 +580,14 @@ void FragmentToBatch::setLabel(const char* label) mTableLabel = label; } -void FragmentToBatch::fill(std::shared_ptr fragment, std::shared_ptr schema, std::shared_ptr format) +void FragmentToBatch::fill(std::shared_ptr schema, std::shared_ptr format) { auto options = std::make_shared(); options->dataset_schema = schema; - auto scanner = format->ScanBatchesAsync(options, fragment); + auto scanner = format->ScanBatchesAsync(options, mFragment); auto batch = (*scanner)(); mRecordBatch = *batch.result(); + // Notice that up to here the buffer was not yet filled. } std::shared_ptr FragmentToBatch::finalize() From 34384e0a06a9a2bc8610d8f2a6a4122eaaa36abb Mon Sep 17 00:00:00 2001 From: iravasen Date: Mon, 3 Mar 2025 11:42:50 +0100 Subject: [PATCH 0140/1914] ITS Calib: ROOT trees for VCASN and ITHR scan + new features (#14008) * ROOT tree for vcasn and ithr scan + processing of tot_full and ps_2d scans row by row instead of EOR * allow calculation of 2D params in finalize() for pulse shape 2D scan --- .../ITSWorkflow/ThresholdCalibratorSpec.h | 1 - .../workflow/src/ThresholdCalibratorSpec.cxx | 114 ++++++++++-------- 2 files changed, 65 insertions(+), 50 deletions(-) diff --git a/Detectors/ITSMFT/ITS/workflow/include/ITSWorkflow/ThresholdCalibratorSpec.h b/Detectors/ITSMFT/ITS/workflow/include/ITSWorkflow/ThresholdCalibratorSpec.h index 028ce3d0687fb..2a139f7997dfb 100644 --- a/Detectors/ITSMFT/ITS/workflow/include/ITSWorkflow/ThresholdCalibratorSpec.h +++ b/Detectors/ITSMFT/ITS/workflow/include/ITSWorkflow/ThresholdCalibratorSpec.h @@ -148,7 +148,6 @@ class ITSThresholdCalibrator : public Task // Hash tables to store the hit and threshold information per pixel std::map>>>> mPixelHits; - std::map> mForbiddenRows; // Unordered map for saving sum of values (thr/ithr/vcasn) for avg calculation std::map> mThresholds; // Map including PixID for noisy pixels diff --git a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx index e5e4ea016e26e..075a4869210e1 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx @@ -371,8 +371,10 @@ void ITSThresholdCalibrator::initThresholdTree(bool recreate /*=true*/) mThresholdTree = new TTree("ITS_calib_tree", "ITS_calib_tree"); mThresholdTree->Branch("chipid", &vChipid, "vChipID[1024]/S"); mThresholdTree->Branch("row", &vRow, "vRow[1024]/S"); - if (mScanType == 'T') { - mThresholdTree->Branch("thr", &vThreshold, "vThreshold[1024]/S"); + if (mScanType == 'T' || mScanType == 'V' || mScanType == 'I') { + std::string bName = mScanType == 'T' ? "thr" : mScanType == 'V' ? "vcasn" + : "ithr"; + mThresholdTree->Branch(bName.c_str(), &vThreshold, "vThreshold[1024]/S"); mThresholdTree->Branch("noise", &vNoise, "vNoise[1024]/F"); mThresholdTree->Branch("spoints", &vPoints, "vPoints[1024]/b"); mThresholdTree->Branch("success", &vSuccess, "vSuccess[1024]/O"); @@ -384,7 +386,7 @@ void ITSThresholdCalibrator::initThresholdTree(bool recreate /*=true*/) } else if (mScanType == 'P') { mThresholdTree->Branch("n_hits", &vThreshold, "vThreshold[1024]/S"); mThresholdTree->Branch("strobedel", &vMixData, "vMixData[1024]/S"); - } else if (mScanType == 'p') { + } else if (mScanType == 'p' || mScanType == 't') { mThresholdTree->Branch("n_hits", &vThreshold, "vThreshold[1024]/S"); mThresholdTree->Branch("strobedel", &vMixData, "vMixData[1024]/S"); mThresholdTree->Branch("charge", &vCharge, "vCharge[1024]/b"); @@ -675,7 +677,7 @@ void ITSThresholdCalibrator::extractThresholdRow(const short int& chipID, const this->mDeadPixID[chipID].push_back(col_i * 1000 + row); } } - } else if (this->mScanType == 'P' || this->mScanType == 'p' || mScanType == 'R') { + } else if (this->mScanType == 'P' || this->mScanType == 'p' || mScanType == 'R' || mScanType == 't') { // Loop over all columns (pixels) in the row for (short int var1_i = 0; var1_i < this->N_RANGE; var1_i++) { for (short int chg_i = 0; chg_i < this->N_RANGE2; chg_i++) { @@ -742,7 +744,7 @@ void ITSThresholdCalibrator::extractThresholdRow(const short int& chipID, const mSlopeTree->Fill(); } - } else { // threshold, vcasn, ithr + } else { // threshold, vcasn, ithr, vresetd_2d short int iRU = getRUID(chipID); #ifdef WITH_OPENMP @@ -779,11 +781,11 @@ void ITSThresholdCalibrator::extractThresholdRow(const short int& chipID, const } // Fill the ScTree tree - if (mScanType == 'T') { // TODO: store also for other scans? - for (int ichg = mMin; ichg <= mMax; ichg++) { + if (mScanType == 'T' || mScanType == 'V' || mScanType == 'I') { // TODO: store also for other scans? + for (int ichg = mMin; ichg <= mMax; ichg += mStep) { for (short int col_i = 0; col_i < this->N_COL; col_i += mColStep) { vCharge[col_i] = ichg; - vHits[col_i] = mPixelHits[chipID][row][col_i][0][ichg - mMin]; + vHits[col_i] = mPixelHits[chipID][row][col_i][0][(ichg - mMin) / mStep]; } mScTree->Fill(); } @@ -791,7 +793,7 @@ void ITSThresholdCalibrator::extractThresholdRow(const short int& chipID, const } // end of the else // Saves threshold information to internal memory - if (mScanType != 'P' && mScanType != 'p' && mScanType != 'R' && mScanType != 'r') { + if (mScanType != 'P' && mScanType != 'p' && mScanType != 't' && mScanType != 'R' && mScanType != 'r') { this->saveThreshold(); } } @@ -799,12 +801,10 @@ void ITSThresholdCalibrator::extractThresholdRow(const short int& chipID, const ////////////////////////////////////////////////////////////////////////////// void ITSThresholdCalibrator::saveThreshold() { - // In the case of a full threshold scan, write to TTree - if (this->mScanType == 'T' || this->mScanType == 'D' || this->mScanType == 'A' || this->mScanType == 'P' || this->mScanType == 'p' || this->mScanType == 'R' || this->mScanType == 'r') { - this->mThresholdTree->Fill(); - } + // write to TTree + this->mThresholdTree->Fill(); - if (this->mScanType != 'D' && this->mScanType != 'A' && this->mScanType != 'P' && this->mScanType != 'p' && this->mScanType != 'R' && this->mScanType != 'r') { + if (this->mScanType == 'V' || this->mScanType == 'I' || this->mScanType == 'T') { // Save info in a map for later averaging int sumT = 0, sumSqT = 0, sumN = 0, sumSqN = 0; int countSuccess = 0, countUnsuccess = 0; @@ -957,6 +957,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) // ATTENTION: with back bias (VCASNBB) put max vcasn to 130 (default is 80) // 4 rows per chip this->mScanType = 'V'; + this->initThresholdTree(); this->mMin = inMinVcasn; // 30 is the default this->mMax = inMaxVcasn; // 80 is the default this->N_RANGE = mMax - mMin + 1; @@ -967,6 +968,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) // S-curve is backwards from VCASN case, otherwise same // 4 rows per chip this->mScanType = 'I'; + this->initThresholdTree(); this->mMin = inMinIthr; // 25 is the default this->mMax = inMaxIthr; // 100 is the default this->N_RANGE = mMax - mMin + 1; @@ -1003,13 +1005,28 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mStrobeWindow = 5; // it's 4 but it corresponds to 4+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mCheckExactRow = true; - } else if (runtype == TOT_CALIBRATION || runtype == TOT_CALIBRATION_1_ROW) { + } else if (runtype == TOT_CALIBRATION_1_ROW) { // Pulse length scan 2D (charge vs strobe delay) this->mScanType = 'p'; // small p, just to distinguish from capital P this->initThresholdTree(); this->mFitType = NO_FIT; - this->mMin = (runtype == TOT_CALIBRATION) ? 300 : 0; - this->mMax = (runtype == TOT_CALIBRATION) ? 1100 : 2000; // strobe delay goes from 0 to 2000 or 1100 (included) in steps of 10 + this->mMin = 0; + this->mMax = 2000; // strobe delay goes from 0 to 2000 in steps of 10 + this->mStep = 10; + this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) + this->N_RANGE = (mMax - mMin) / mStep + 1; + this->mMin2 = 0; // charge min + this->mMax2 = 170; // charge max + this->mStep2 = 1; // step for the charge + this->N_RANGE2 = (mMax2 - mMin2) / mStep2 + 1; + this->mCheckExactRow = true; + } else if (runtype == TOT_CALIBRATION) { + // TOT calibration (like pulse shape 2D but with a reduced range in both strobe delay and charge) + this->mScanType = 't'; + this->initThresholdTree(); + this->mFitType = NO_FIT; + this->mMin = 300; + this->mMax = 1100; // strobe delay goes from 300 to 1100 (included) in steps of 10 this->mStep = 10; this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; @@ -1017,13 +1034,9 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mMax2 = 60; // charge max this->mStep2 = 30; // step for the charge this->mCalculate2DParams = false; // do not calculate time over threshold, pulse length, etc.. - if (runtype == TOT_CALIBRATION_1_ROW) { - this->mMin2 = 0; // charge min - this->mMax2 = 170; // charge max - this->mStep2 = 1; // step for the charge - } this->N_RANGE2 = (mMax2 - mMin2) / mStep2 + 1; this->mCheckExactRow = true; + } else if (runtype == VRESETD_150 || runtype == VRESETD_300 || runtype == VRESETD_2D) { this->mScanType = 'R'; // capital R is for 1D scan if (runtype == VRESETD_150 || runtype == VRESETD_300) { @@ -1060,7 +1073,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) if (saveTree) { this->initThresholdTree(); } - this->mFitType = (mScanType == 'D' || mScanType == 'A' || mScanType == 'P' || mScanType == 'p') ? NO_FIT : mFitType; + this->mFitType = (mScanType == 'D' || mScanType == 'A' || mScanType == 'P' || mScanType == 'p' || mScanType == 't') ? NO_FIT : mFitType; this->mCheckExactRow = (mScanType == 'D' || mScanType == 'A') ? false : true; if (scaleNinj) { nInjScaled = nInj / 3; @@ -1264,7 +1277,7 @@ std::vector ITSThresholdCalibrator::calculatePulseParams2D(const short in void ITSThresholdCalibrator::extractAndUpdate(const short int& chipID, const short int& row) { // In threshold scan case, reset mThresholdTree before writing to a new file - if ((this->mScanType == 'T' || this->mScanType == 'D' || this->mScanType == 'A' || this->mScanType == 'P' || this->mScanType == 'p' || mScanType == 'R' || mScanType == 'r') && ((this->mRowCounter)++ == N_ROWS_PER_FILE)) { + if ((this->mRowCounter)++ == N_ROWS_PER_FILE) { // Finalize output and create a new TTree and ROOT file this->finalizeOutput(); this->initThresholdTree(); @@ -1353,7 +1366,7 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) loopval = !mCdwVersion ? (short int)((calib.calibUserField >> 16) & 0xff) : (short int)((calib.calibUserField >> 16) & 0xffff); } - if (this->mScanType == 'p' || this->mScanType == 'r') { + if (this->mScanType == 'p' || this->mScanType == 't' || this->mScanType == 'r') { realcharge = 170 - ((short int)(calib.calibUserField >> 32)) & 0x1fff; // not existing with CDW v0 } @@ -1363,9 +1376,12 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) cwcnt = (short int)(calib.calibCounter); // count the last N injections short int checkVal = (mScanType == 'I') ? mMin : mMax; - if ((mScanType != 'r' && loopval == checkVal) || (mScanType == 'r' && realcharge == mMax2)) { + if ((mScanType != 'r' && mScanType != 'p' && mScanType != 't' && loopval == checkVal) || + (mScanType == 'r' && realcharge == mMax2) || + (mScanType == 'p' && realcharge == mMin2) || + (mScanType == 't' && loopval == checkVal && realcharge == mMax2)) { mCdwCntRU[iRU][row]++; - mLoopVal[iRU][row] = loopval; // keep loop val (relevant for VRESET2D scan only) + mLoopVal[iRU][row] = loopval; // keep loop val (relevant for VRESET2D and TOT_1ROW scan only) } if (this->mVerboseOutput) { LOG(info) << "RU: " << iRU << " CDWcounter: " << cwcnt << " row: " << row << " Loopval: " << loopval << " realcharge: " << realcharge << " confDBv: " << mCdwVersion; @@ -1393,18 +1409,17 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) cwcnt = 0; } - if (loopval > this->mMax || loopval < this->mMin || ((mScanType == 'p' || mScanType == 'r') && (realcharge > this->mMax2 || realcharge < this->mMin2))) { + if (loopval > this->mMax || loopval < this->mMin || ((mScanType == 'p' || mScanType == 't' || mScanType == 'r') && (realcharge > this->mMax2 || realcharge < this->mMin2))) { if (this->mVerboseOutput) { LOG(warning) << "CW issues - loopval value " << loopval << " out of range for min " << this->mMin << " and max " << this->mMax << " (range: " << N_RANGE << ")"; - if (mScanType == 'p' || mScanType == 'r') { + if (mScanType == 'p' || mScanType == 'r' || mScanType == 't') { LOG(warning) << " and/or realcharge value " << realcharge << " out of range from min " << this->mMin2 << " and max " << this->mMax2 << " (range: " << N_RANGE2 << ")"; } } } else { std::vector mChips; - std::map mChipsForbRows; // loop to retrieve list of chips and start tagging bad dcols if the hits does not come from this row for (unsigned int idig = rofIndex; idig < rofIndex + rofNEntries; idig++) { // gets chipid auto& d = digits[idig]; @@ -1425,17 +1440,6 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) short int ru = getRUID(chipID); mActiveLinks[ru][getLinkID(chipID, ru)] = true; // check rows and allocate memory - if (mScanType != 'r' && mForbiddenRows.count(chipID)) { - for (int iforb = mForbiddenRows[chipID].size() - 1; iforb >= 0; iforb--) { - if (mForbiddenRows[chipID][iforb] == row) { - mChipsForbRows[chipID] = true; - break; - } - } - } - if (mChipsForbRows[chipID]) { - continue; - } if (!this->mPixelHits.count(chipID)) { if (mScanType == 'D' || mScanType == 'A') { // for digital and analog scan initialize the full matrix for each chipID for (int irow = 0; irow < 512; irow++) { @@ -1461,7 +1465,7 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) continue; } - if (!mChipsForbRows[chipID] && (!mCheckExactRow || d.getRow() == row) && (mMeb < 0 || cwcnt % 3 == mMeb)) { // row has NOT to be forbidden and we ignore hits coming from other rows (potential masking issue on chip) + if ((!mCheckExactRow || d.getRow() == row) && (mMeb < 0 || cwcnt % 3 == mMeb)) { // row has NOT to be forbidden and we ignore hits coming from other rows (potential masking issue on chip) // Increment the number of counts for this pixel this->mPixelHits[chipID][d.getRow()][col][chgPoint][loopPoint]++; } @@ -1492,7 +1496,16 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) } // Check if scan of a row is finished: only for specific scans! bool passCondition = (mCdwCntRU[ruIndex][row] >= nInjScaled * nL); - if (mScanType != 'D' && mScanType != 'A' && mScanType != 'P' && mScanType != 'p' && mScanType != 'R' && passCondition) { + if (mScanType == 'p' || mScanType == 't') { + passCondition = passCondition && (mLoopVal[ruIndex][row] == mMax); + if (mVerboseOutput) { + LOG(info) << "PassCondition: " << passCondition << " - (mCdwCntRU,mLoopVal) of RU" << ruIndex << " row " << row << " = (" << mCdwCntRU[ruIndex][row] << ", " << mLoopVal[ruIndex][row] << ")"; + } + } else if (mVerboseOutput) { + LOG(info) << "PassCondition: " << passCondition << " - mCdwCntRU of RU" << ruIndex << " row " << row << " = " << mCdwCntRU[ruIndex][row]; + } + + if (mScanType != 'D' && mScanType != 'A' && mScanType != 'P' && mScanType != 'R' && passCondition) { // extract data from the row for (short int iChip = 0; iChip < chipEnabled.size(); iChip++) { short int chipID = chipEnabled[iChip]; @@ -1503,10 +1516,9 @@ void ITSThresholdCalibrator::run(ProcessingContext& pc) if (mPixelHits.count(chipID)) { if (mPixelHits[chipID].count(row)) { // make sure the row exists extractAndUpdate(chipID, row); - if (mScanType != 'r' || (mScanType == 'r' && mLoopVal[ruIndex][row] == mMax)) { + if (mScanType != 'p' && (mScanType != 'r' || mLoopVal[ruIndex][row] == mMax)) { // do not erase for scantype = p because in finalize() we have calculate2Dparams mPixelHits[chipID].erase(row); } - mForbiddenRows[chipID].push_back(row); } } } @@ -1889,11 +1901,15 @@ void ITSThresholdCalibrator::finalize() if (mVerboseOutput) { LOG(info) << "Extracting hits from pulse shape scan or vresetd scan, chip " << itchip->first; } - auto itrow = this->mPixelHits[itchip->first].cbegin(); - while (itrow != mPixelHits[itchip->first].cend()) { // in case there are multiple rows, for now it's 1 row - this->extractAndUpdate(itchip->first, itrow->first); // fill the tree - ++itrow; + + if (mScanType != 'p') { // done already in run() + auto itrow = this->mPixelHits[itchip->first].cbegin(); + while (itrow != mPixelHits[itchip->first].cend()) { // in case there are multiple rows, for now it's 1 row + this->extractAndUpdate(itchip->first, itrow->first); // fill the tree - for mScanType = p, it is done already in run() + ++itrow; + } } + if (mCalculate2DParams && (mScanType == 'P' || mScanType == 'p')) { this->addDatabaseEntry(itchip->first, name, mScanType == 'P' ? calculatePulseParams(itchip->first) : calculatePulseParams2D(itchip->first), false); } From 7b51768a4bc2788ebb56818db19f382b13756df2 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:30:02 +0100 Subject: [PATCH 0141/1914] DPL: improve message in case of missing TTree Good enough for now. It will not be the correct one once we support RNTuple as well. --- Framework/AnalysisSupport/src/DataInputDirector.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 1daab029b3e8e..d0d3fe0e6e17d 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -413,8 +413,10 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh } } + // FIXME: we should distinguish between an actually missing object and one which has a non compatible + // format. if (!format) { - throw std::runtime_error(fmt::format(R"(Cannot find a viable format for object {}!)", fullpath.path())); + throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}". Please check https://aliceo2group.github.io/analysis-framework/docs/troubleshooting/#tree-not-found for more information.)", fullpath.path(), rootFS->GetFile()->GetName())); } auto schemaOpt = format->Inspect(fullpath); From 8d00eb565b48a415eb24b4f1b56fddb3b10e7db7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 4 Mar 2025 00:21:06 +0100 Subject: [PATCH 0142/1914] DPL: handle case of list as first field of the dataset --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index f36f309404699..f74fcda38d1f0 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -627,13 +627,13 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( throw runtime_error_f("Cannot find physical field associated to %s. Possible fields: %s", dataset_field->name().c_str(), physical_schema->ToString().c_str()); } - if (physicalFieldIdx > 1 && physical_schema->field(physicalFieldIdx - 1)->name().ends_with("_size")) { + if (physicalFieldIdx > 0 && physical_schema->field(physicalFieldIdx - 1)->name().ends_with("_size")) { O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Field %{public}s has sizes in %{public}s.", dataset_field->name().c_str(), physical_schema->field(physicalFieldIdx - 1)->name().c_str()); mappings.push_back({physicalFieldIdx, physicalFieldIdx - 1, fi}); opsCount += 2; } else { - if (physicalFieldIdx > 1) { + if (physicalFieldIdx > 0) { O2_SIGNPOST_EVENT_EMIT(root_arrow_fs, tid, "Generator", "Field %{public}s previous field is %{public}s.", dataset_field->name().c_str(), physical_schema->field(physicalFieldIdx - 1)->name().c_str()); } From 2abc6f41f610333446d6ffaea5f894c03be2b901 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 4 Mar 2025 10:37:30 +0100 Subject: [PATCH 0143/1914] DPL: use correct size for the buffer --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index f74fcda38d1f0..4481c06a8c6d9 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -702,7 +702,7 @@ arrow::Result TTreeFileFormat::ScanBatchesAsync( if ((datasetField->type() == arrow::boolean())) { valueOp.kind = ReadOpKind::Booleans; valueOp.listSize = 1; - valueOp.targetBuffer = treeFragment->GetPlaceholderForOp((valueOp.rootBranchEntries) / 8 + 1); + valueOp.targetBuffer = treeFragment->GetPlaceholderForOp((valueOp.rootBranchEntries + 7) / 8); } else if (listType && datasetField->type()->field(0)->type() == arrow::boolean()) { valueOp.typeSize = physicalField->type()->field(0)->type()->byte_width(); valueOp.listSize = listType->list_size(); From ea359772136e0c9818b336dd70790ea3ff18a478 Mon Sep 17 00:00:00 2001 From: Matthias Kleiner Date: Fri, 28 Feb 2025 16:05:32 +0100 Subject: [PATCH 0144/1914] TPC: Merging SAC CCDB files into one object - adding option to enable/disable writing of FFT coefficients to the CCDB --- Detectors/TPC/base/include/TPCBase/CDBTypes.h | 2 + .../include/TPCCalibration/IDCContainer.h | 8 ++++ .../calibration/src/TPCCalibrationLinkDef.h | 3 ++ .../include/TPCWorkflow/TPCFactorizeSACSpec.h | 44 ++++++------------- .../TPCFourierTransformAggregatorSpec.h | 19 +++++--- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/Detectors/TPC/base/include/TPCBase/CDBTypes.h b/Detectors/TPC/base/include/TPCBase/CDBTypes.h index a3c52ecd0928b..8cf27330b8fc5 100644 --- a/Detectors/TPC/base/include/TPCBase/CDBTypes.h +++ b/Detectors/TPC/base/include/TPCBase/CDBTypes.h @@ -66,6 +66,7 @@ enum class CDBType { CalSAC1, ///< I_1(t) = _{r,\phi} CalSACDelta, ///< \Delta I(r,\phi,t) = I(r,\phi,t) / ( I_0(r,\phi) * I_1(t) ) CalSACFourier, ///< Fourier coefficients of CalSAC1 + CalSAC, ///< CalSAC0, CalSAC1 and CalSACDelta /// CalITPC0, ///< 2D average TPC clusters for longer time interval CalITPC1, ///< 1D integrated TPC clusters @@ -133,6 +134,7 @@ const std::unordered_map CDBTypeMap{ {CDBType::CalSAC0, "TPC/Calib/SAC_0"}, {CDBType::CalSAC1, "TPC/Calib/SAC_1"}, {CDBType::CalSACDelta, "TPC/Calib/SAC_DELTA"}, + {CDBType::CalSAC, "TPC/Calib/SAC"}, {CDBType::CalSACFourier, "TPC/Calib/SAC_FOURIER"}, // ITPCCs {CDBType::CalITPC0, "TPC/Calib/ITPCC_0"}, diff --git a/Detectors/TPC/calibration/include/TPCCalibration/IDCContainer.h b/Detectors/TPC/calibration/include/TPCCalibration/IDCContainer.h index 786c60753297d..0ac7c81d6f891 100644 --- a/Detectors/TPC/calibration/include/TPCCalibration/IDCContainer.h +++ b/Detectors/TPC/calibration/include/TPCCalibration/IDCContainer.h @@ -402,6 +402,14 @@ struct SACDelta { std::array, SIDES> mSACDelta{}; }; +/// container to be written to CCDB +template +struct SAC { + SACZero mSACZero{}; + SACOne mSACOne{}; + SACDelta mSACDelta{}; +}; + struct FourierCoeffSAC { std::array mCoeff{}; }; diff --git a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h index 097ea756d370e..d42627197cd7f 100644 --- a/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h +++ b/Detectors/TPC/calibration/src/TPCCalibrationLinkDef.h @@ -96,6 +96,9 @@ #pragma link C++ struct o2::tpc::SACDelta < float> + ; #pragma link C++ struct o2::tpc::SACDelta < unsigned short> + ; #pragma link C++ struct o2::tpc::SACDelta < unsigned char> + ; +#pragma link C++ struct o2::tpc::SAC < float> + ; +#pragma link C++ struct o2::tpc::SAC < unsigned short> + ; +#pragma link C++ struct o2::tpc::SAC < unsigned char> + ; #pragma link C++ struct o2::tpc::SACZero + ; #pragma link C++ struct o2::tpc::SACOne + ; #pragma link C++ struct o2::tpc::FourierCoeffSAC + ; diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeSACSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeSACSpec.h index 3d225c36283da..f191f5f44761b 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeSACSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeSACSpec.h @@ -82,9 +82,7 @@ class TPCFactorizeSACSpec : public o2::framework::Task static constexpr header::DataDescription getDataDescriptionLane() { return header::DataDescription{"SACLANE"}; } // for CCDB - static constexpr header::DataDescription getDataDescriptionCCDBSAC0() { return header::DataDescription{"TPC_CalibSAC0"}; } - static constexpr header::DataDescription getDataDescriptionCCDBSAC1() { return header::DataDescription{"TPC_CalibSAC1"}; } - static constexpr header::DataDescription getDataDescriptionCCDBSACDelta() { return header::DataDescription{"TPC_SACDelta"}; } + static constexpr header::DataDescription getDataDescriptionCCDBSAC() { return header::DataDescription{"TPC_CalibSAC"}; } private: SACFactorization mSACFactorization; ///< object for performing the factorization of the SACs @@ -106,42 +104,30 @@ class TPCFactorizeSACSpec : public o2::framework::Task output.snapshot(Output{gDataOriginTPC, getDataDescriptionTimeStamp()}, std::vector{timeStampStart, timeStampEnd}); output.snapshot(Output{gDataOriginTPC, getDataDescriptionLane()}, mLaneId); - o2::ccdb::CcdbObjectInfo ccdbInfoSAC0(CDBTypeMap.at(CDBType::CalSAC0), std::string{}, std::string{}, std::map{}, timeStampStart, timeStampEnd); - auto imageSAC0 = o2::ccdb::CcdbApi::createObjectImage(&mSACFactorization.getSACZero(), &ccdbInfoSAC0); - LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfoSAC0.getPath(), ccdbInfoSAC0.getFileName(), imageSAC0->size(), ccdbInfoSAC0.getStartValidityTimestamp(), ccdbInfoSAC0.getEndValidityTimestamp()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBSAC0(), 0}, *imageSAC0.get()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBSAC0(), 0}, ccdbInfoSAC0); - - o2::ccdb::CcdbObjectInfo ccdbInfoSAC1(CDBTypeMap.at(CDBType::CalSAC1), std::string{}, std::string{}, std::map{}, timeStampStart, timeStampEnd); - auto imageSAC1 = o2::ccdb::CcdbApi::createObjectImage(&mSACFactorization.getSACOne(), &ccdbInfoSAC1); - LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfoSAC1.getPath(), ccdbInfoSAC1.getFileName(), imageSAC1->size(), ccdbInfoSAC1.getStartValidityTimestamp(), ccdbInfoSAC1.getEndValidityTimestamp()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBSAC1(), 0}, *imageSAC1.get()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBSAC1(), 0}, ccdbInfoSAC1); - - o2::ccdb::CcdbObjectInfo ccdbInfoSACDelta(CDBTypeMap.at(CDBType::CalSACDelta), std::string{}, std::string{}, std::map{}, timeStampStart, timeStampEnd); + o2::ccdb::CcdbObjectInfo ccdbInfoSAC(CDBTypeMap.at(CDBType::CalSAC), std::string{}, std::string{}, std::map{}, timeStampStart, timeStampEnd); std::unique_ptr> imageSACDelta{}; switch (mCompressionDeltaSAC) { case SACFactorization::SACDeltaCompression::MEDIUM: default: { - const SACDelta sacDelta = mSACFactorization.getSACDeltaMediumCompressed(); - imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacDelta, &ccdbInfoSACDelta); + SAC sacContainer{mSACFactorization.getSACZero(), mSACFactorization.getSACOne(), mSACFactorization.getSACDeltaMediumCompressed()}; + imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacContainer, &ccdbInfoSAC); break; } case SACFactorization::SACDeltaCompression::HIGH: { - const SACDelta sacDelta = mSACFactorization.getSACDeltaHighCompressed(); - imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacDelta, &ccdbInfoSACDelta); + SAC sacContainer{mSACFactorization.getSACZero(), mSACFactorization.getSACOne(), mSACFactorization.getSACDeltaHighCompressed()}; + imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacContainer, &ccdbInfoSAC); break; } case SACFactorization::SACDeltaCompression::NO: - SACDelta sacDelta = std::move(mSACFactorization).getSACDeltaUncompressed(); - imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacDelta, &ccdbInfoSACDelta); + SAC sacContainer{mSACFactorization.getSACZero(), mSACFactorization.getSACOne(), std::move(mSACFactorization).getSACDeltaUncompressed()}; + imageSACDelta = o2::ccdb::CcdbApi::createObjectImage(&sacContainer, &ccdbInfoSAC); break; } - LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfoSACDelta.getPath(), ccdbInfoSACDelta.getFileName(), imageSACDelta->size(), ccdbInfoSACDelta.getStartValidityTimestamp(), ccdbInfoSACDelta.getEndValidityTimestamp()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBSACDelta(), 0}, *imageSACDelta.get()); - output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBSACDelta(), 0}, ccdbInfoSACDelta); + LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfoSAC.getPath(), ccdbInfoSAC.getFileName(), imageSACDelta->size(), ccdbInfoSAC.getStartValidityTimestamp(), ccdbInfoSAC.getEndValidityTimestamp()); + output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBSAC(), 0}, *imageSACDelta.get()); + output.snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBSAC(), 0}, ccdbInfoSAC); } else { LOGP(warning, "Received empty data for SACs! SACs will not be stored for the current aggregation interval!"); } @@ -153,12 +139,8 @@ class TPCFactorizeSACSpec : public o2::framework::Task DataProcessorSpec getTPCFactorizeSACSpec(const int lane, const unsigned int timeframes, const SACFactorization::SACFactorization::SACDeltaCompression compression, const bool debug) { std::vector outputSpecs; - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC0()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC0()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC1()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC1()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCFactorizeSACSpec::getDataDescriptionCCDBSACDelta()}, Lifetime::Sporadic); - outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCFactorizeSACSpec::getDataDescriptionCCDBSACDelta()}, Lifetime::Sporadic); + outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBPayload, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC()}, Lifetime::Sporadic); + outputSpecs.emplace_back(ConcreteDataTypeMatcher{o2::calibration::Utils::gDataOriginCDBWrapper, TPCFactorizeSACSpec::getDataDescriptionCCDBSAC()}, Lifetime::Sporadic); outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFactorizeSACSpec::getDataDescriptionSAC1(), header::DataHeader::SubSpecificationType{Side::A}}, Lifetime::Sporadic); outputSpecs.emplace_back(ConcreteDataMatcher{gDataOriginTPC, TPCFactorizeSACSpec::getDataDescriptionSAC1(), header::DataHeader::SubSpecificationType{Side::C}}, Lifetime::Sporadic); diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h index 956e9c899cebc..5fe0d6a442dd5 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h @@ -63,6 +63,7 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task mIntervalsSACs = ic.options().get("intervalsSACs"); mLengthIDCScalerSeconds = ic.options().get("tpcScalerLengthS"); mDisableScaler = ic.options().get("disable-scaler"); + mEnableFFTCCDB = ic.options().get("enable-fft-CCDB"); resizeBuffer(mInputLanes); } @@ -173,11 +174,13 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task mIDCFourierTransform[side].calcFourierCoefficients(mIntervalsBuffer[mExpectedInputLane].size()); if (!mProcessSACs) { - o2::ccdb::CcdbObjectInfo ccdbInfo(CDBTypeMap.at(((side == 0) ? CDBType::CalIDCFourierA : CDBType::CalIDCFourierC)), std::string{}, std::string{}, std::map{}, mCCDBBuffer[mExpectedInputLane].front(), mCCDBBuffer[mExpectedInputLane].back()); - auto imageFFT = o2::ccdb::CcdbApi::createObjectImage(&mIDCFourierTransform[side].getFourierCoefficients(), &ccdbInfo); - LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfo.getPath(), ccdbInfo.getFileName(), imageFFT->size(), ccdbInfo.getStartValidityTimestamp(), ccdbInfo.getEndValidityTimestamp()); - pc.outputs().snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBFourier(), 0}, *imageFFT.get()); - pc.outputs().snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBFourier(), 0}, ccdbInfo); + if (mEnableFFTCCDB) { + o2::ccdb::CcdbObjectInfo ccdbInfo(CDBTypeMap.at(((side == 0) ? CDBType::CalIDCFourierA : CDBType::CalIDCFourierC)), std::string{}, std::string{}, std::map{}, mCCDBBuffer[mExpectedInputLane].front(), mCCDBBuffer[mExpectedInputLane].back()); + auto imageFFT = o2::ccdb::CcdbApi::createObjectImage(&mIDCFourierTransform[side].getFourierCoefficients(), &ccdbInfo); + LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfo.getPath(), ccdbInfo.getFileName(), imageFFT->size(), ccdbInfo.getStartValidityTimestamp(), ccdbInfo.getEndValidityTimestamp()); + pc.outputs().snapshot(Output{o2::calibration::Utils::gDataOriginCDBPayload, getDataDescriptionCCDBFourier(), 0}, *imageFFT.get()); + pc.outputs().snapshot(Output{o2::calibration::Utils::gDataOriginCDBWrapper, getDataDescriptionCCDBFourier(), 0}, ccdbInfo); + } } else { coeffSAC.mCoeff[side] = mIDCFourierTransform[side].getFourierCoefficients(); } @@ -192,7 +195,7 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task } } - if (mProcessSACs) { + if (mProcessSACs && mEnableFFTCCDB) { o2::ccdb::CcdbObjectInfo ccdbInfo(CDBTypeMap.at(CDBType::CalSACFourier), std::string{}, std::string{}, std::map{}, mCCDBBuffer[mExpectedInputLane].front(), mCCDBBuffer[mExpectedInputLane].back()); auto imageFFT = o2::ccdb::CcdbApi::createObjectImage(&coeffSAC, &ccdbInfo); LOGP(info, "Sending object {} / {} of size {} bytes, valid for {} : {} ", ccdbInfo.getPath(), ccdbInfo.getFileName(), imageFFT->size(), ccdbInfo.getStartValidityTimestamp(), ccdbInfo.getEndValidityTimestamp()); @@ -233,6 +236,7 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task long mIDCSCalerEndTSLast = 0; ///< end time stamp of last TPC IDC scaler object to ensure no gapps o2::tpc::TPCScaler mScalerLast; ///< buffer last scaler to easily add internal overlap for the beginning bool mDisableScaler{false}; ///< disable the creation of TPC IDC scalers + bool mEnableFFTCCDB{false}; ///< write FFT coefficients to CCDB int mRun{}; const std::array, 2> mFilter = {std::vector{{"idcone", ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFactorizeIDCSpec::getDataDescriptionIDC1()}, Lifetime::Sporadic}}, std::vector{{"sacone", ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, TPCFactorizeSACSpec::getDataDescriptionSAC1()}, Lifetime::Sporadic}}}; ///< filter for looping over input data @@ -438,7 +442,8 @@ DataProcessorSpec getTPCFourierTransformAggregatorSpec(const unsigned int rangeI Options{{"intervalsSACs", VariantType::Int, 11, {"Number of integration intervals which will be sampled for the fourier coefficients"}}, {"dump-coefficients-agg", VariantType::Bool, false, {"Dump fourier coefficients to file"}}, {"tpcScalerLengthS", VariantType::Float, 300.f, {"Length of the TPC scalers in seconds"}}, - {"disable-scaler", VariantType::Bool, false, {"Disable creation of IDC scaler"}}}}; + {"disable-scaler", VariantType::Bool, false, {"Disable creation of IDC scaler"}}, + {"enable-fft-CCDB", VariantType::Bool, false, {"Enable writing of FFT coefficients to CCDB"}}}}; } } // namespace o2::tpc From f68c7bfce7b9ec8de4f986d6d4618683028bf55a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 4 Mar 2025 13:37:29 +0100 Subject: [PATCH 0145/1914] DPL: fix support for parent files --- .../AnalysisSupport/src/DataInputDirector.cxx | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index d0d3fe0e6e17d..981ca5254980d 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -416,6 +416,18 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh // FIXME: we should distinguish between an actually missing object and one which has a non compatible // format. if (!format) { + LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); + auto parentFile = getParentFile(counter, numTF, treename); + if (parentFile != nullptr) { + int parentNumTF = parentFile->findDFNumber(0, folder.path()); + if (parentNumTF == -1) { + auto parentRootFS = std::dynamic_pointer_cast(parentFile->mCurrentFilesystem); + throw std::runtime_error(fmt::format(R"(DF {} listed in parent file map but not found in the corresponding file "{}")", folder.path(), parentRootFS->GetFile()->GetName())); + } + // first argument is 0 as the parent file object contains only 1 file + return parentFile->readTree(outputs, dh, 0, parentNumTF, treename, totalSizeCompressed, totalSizeUncompressed); + } + auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}". Please check https://aliceo2group.github.io/analysis-framework/docs/troubleshooting/#tree-not-found for more information.)", fullpath.path(), rootFS->GetFile()->GetName())); } @@ -432,22 +444,6 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh auto fragment = format->MakeFragment(fullpath, {}, *physicalSchema); - if (!fragment.ok()) { - LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); - auto parentFile = getParentFile(counter, numTF, treename); - if (parentFile != nullptr) { - int parentNumTF = parentFile->findDFNumber(0, folder.path()); - if (parentNumTF == -1) { - auto parentRootFS = std::dynamic_pointer_cast(parentFile->mCurrentFilesystem); - throw std::runtime_error(fmt::format(R"(DF {} listed in parent file map but not found in the corresponding file "{}")", folder.path(), parentRootFS->GetFile()->GetName())); - } - // first argument is 0 as the parent file object contains only 1 file - return parentFile->readTree(outputs, dh, 0, parentNumTF, treename, totalSizeCompressed, totalSizeUncompressed); - } - auto rootFS = std::dynamic_pointer_cast(mCurrentFilesystem); - throw std::runtime_error(fmt::format(R"(Couldn't get TTree "{}" from "{}". Please check https://aliceo2group.github.io/analysis-framework/docs/troubleshooting/#tree-not-found for more information.)", fullpath.path(), rootFS->GetFile()->GetName())); - } - // create table output auto o = Output(dh); From 54495197abb919936518ef141ab56ad1ad259edf Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 4 Mar 2025 17:15:24 +0100 Subject: [PATCH 0146/1914] Semaphore distinction in CCDB Use different semaphores when retrieving headers vs snapshotting the blob. Otherwise there seems to be a funny overlap in actions and snapshotting was not really protected. Fixes: https://its.cern.ch/jira/browse/O2-5834 --- CCDB/src/CcdbApi.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index f436172de42ff..df05d393100d6 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -1461,7 +1461,7 @@ std::map CcdbApi::retrieveHeaders(std::string const& p if (!mSnapshotCachePath.empty()) { // protect this sensitive section by a multi-process named semaphore - auto semaphore_barrier = std::make_unique(mSnapshotCachePath, path); + auto semaphore_barrier = std::make_unique(mSnapshotCachePath + std::string("_headers"), path); std::string logfile = mSnapshotCachePath + "/log"; std::fstream out(logfile, ios_base::out | ios_base::app); From a7da0ce81f81e19fd5dde6091e3b1240d83c16f7 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Tue, 11 Feb 2025 15:49:33 +0100 Subject: [PATCH 0147/1914] Add new FairLogger severity (critical) to DPL and DebugGUI --- .../Core/include/Framework/LogParsingHelpers.h | 2 ++ Framework/Core/src/LogParsingHelpers.cxx | 6 ++++++ Framework/Core/src/runDataProcessing.cxx | 4 ++++ Framework/DataTakingSupport/src/Plugin.cxx | 8 ++++++-- Framework/GUISupport/src/FrameworkGUIDebugger.cxx | 14 ++++++++++++++ .../GUISupport/src/FrameworkGUIDevicesGraph.cxx | 8 ++++++++ 6 files changed, 40 insertions(+), 2 deletions(-) diff --git a/Framework/Core/include/Framework/LogParsingHelpers.h b/Framework/Core/include/Framework/LogParsingHelpers.h index 6af2c6e52886c..7f3909c2eaadd 100644 --- a/Framework/Core/include/Framework/LogParsingHelpers.h +++ b/Framework/Core/include/Framework/LogParsingHelpers.h @@ -23,9 +23,11 @@ struct LogParsingHelpers { enum struct LogLevel { Debug, Info, + Important, Warning, Alarm, Error, + Critical, Fatal, Unknown, Size diff --git a/Framework/Core/src/LogParsingHelpers.cxx b/Framework/Core/src/LogParsingHelpers.cxx index 58da49caee4da..0d309f28928ba 100644 --- a/Framework/Core/src/LogParsingHelpers.cxx +++ b/Framework/Core/src/LogParsingHelpers.cxx @@ -17,9 +17,11 @@ namespace o2::framework char const* const LogParsingHelpers::LOG_LEVELS[(int)LogParsingHelpers::LogLevel::Size] = { "DEBUG", "INFO", + "IMPORTANT", "WARNING", "ALARM", "ERROR", + "CRITICAL", "FATAL", "UNKNOWN"}; using LogLevel = o2::framework::LogParsingHelpers::LogLevel; @@ -59,12 +61,16 @@ LogLevel LogParsingHelpers::parseTokenLevel(std::string_view const s) } else if (s.compare(LABELPOS, 7, "[INFO] ") == 0 || s.compare(LABELPOS, 8, "[STATE] ") == 0) { return LogLevel::Info; + } else if (s.compare(LABELPOS, 12, "[IMPORTANT] ") == 0) { + return LogLevel::Important; } else if (s.compare(LABELPOS, 7, "[WARN] ") == 0) { return LogLevel::Warning; } else if (s.compare(LABELPOS, 8, "[ALARM] ") == 0) { return LogLevel::Alarm; } else if (s.compare(LABELPOS, 8, "[ERROR] ") == 0) { return LogLevel::Error; + } else if (s.compare(LABELPOS, 11, "[CRITICAL] ") == 0) { + return LogLevel::Critical; } else if (s.compare(LABELPOS, 8, "[FATAL] ") == 0) { return LogLevel::Fatal; } diff --git a/Framework/Core/src/runDataProcessing.cxx b/Framework/Core/src/runDataProcessing.cxx index 66fc2c7b2c3df..6c38945039d84 100644 --- a/Framework/Core/src/runDataProcessing.cxx +++ b/Framework/Core/src/runDataProcessing.cxx @@ -2141,6 +2141,8 @@ int runStateMachine(DataProcessorSpecs const& workflow, info.logLevel = LogParsingHelpers::LogLevel::Info; } else if ((*logLevelIt).compare("alarm") == 0) { info.logLevel = LogParsingHelpers::LogLevel::Alarm; + } else if ((*logLevelIt).compare("critical") == 0) { + info.logLevel = LogParsingHelpers::LogLevel::Critical; } else if ((*logLevelIt).compare("fatal") == 0) { info.logLevel = LogParsingHelpers::LogLevel::Fatal; } @@ -3159,6 +3161,8 @@ int doMain(int argc, char** argv, o2::framework::WorkflowSpec const& workflow, fair::Logger::SetConsoleSeverity(fair::Severity::important); } else if (logLevel == "alarm") { fair::Logger::SetConsoleSeverity(fair::Severity::alarm); + } else if (logLevel == "critical") { + fair::Logger::SetConsoleSeverity(fair::Severity::critical); } else if (logLevel == "fatal") { fair::Logger::SetConsoleSeverity(fair::Severity::fatal); } else { diff --git a/Framework/DataTakingSupport/src/Plugin.cxx b/Framework/DataTakingSupport/src/Plugin.cxx index e80e3f359f1be..f3dc23e26b114 100644 --- a/Framework/DataTakingSupport/src/Plugin.cxx +++ b/Framework/DataTakingSupport/src/Plugin.cxx @@ -72,6 +72,10 @@ auto createInfoLoggerSinkHelper(InfoLogger* logger, InfoLoggerContext* ctx) severity = InfoLogger::Severity::Fatal; level = 1; break; + case fair::Severity::critical: + severity = InfoLogger::Severity::Error; + level = 1; + break; case fair::Severity::error: severity = InfoLogger::Severity::Error; level = 2; @@ -132,8 +136,8 @@ auto createInfoLoggerSinkHelper(InfoLogger* logger, InfoLoggerContext* ctx) severity, level, InfoLogger::undefinedMessageOption.errorCode, - metadata.file.data(), - atoi(metadata.line.data())}; + metadata.file, + atoi(std::string(metadata.line.data(), metadata.line.size()).c_str())}; if (logger) { logger->log(opt, *ctx, "%s", content.c_str()); diff --git a/Framework/GUISupport/src/FrameworkGUIDebugger.cxx b/Framework/GUISupport/src/FrameworkGUIDebugger.cxx index 1e7942b5c22f7..112797d357458 100644 --- a/Framework/GUISupport/src/FrameworkGUIDebugger.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDebugger.cxx @@ -69,6 +69,8 @@ ImVec4 colorForLogLevel(LogParsingHelpers::LogLevel logLevel) switch (logLevel) { case LogParsingHelpers::LogLevel::Info: return PaletteHelpers::GREEN; + case LogParsingHelpers::LogLevel::Important: + return PaletteHelpers::GREEN; case LogParsingHelpers::LogLevel::Debug: return PaletteHelpers::WHITE; case LogParsingHelpers::LogLevel::Alarm: @@ -77,6 +79,8 @@ ImVec4 colorForLogLevel(LogParsingHelpers::LogLevel logLevel) return PaletteHelpers::DARK_YELLOW; case LogParsingHelpers::LogLevel::Error: return PaletteHelpers::RED; + case LogParsingHelpers::LogLevel::Critical: + return PaletteHelpers::RED; case LogParsingHelpers::LogLevel::Fatal: return PaletteHelpers::RED; case LogParsingHelpers::LogLevel::Unknown: @@ -977,11 +981,21 @@ void pushWindowColorDueToStatus(const DeviceInfo& info) return; } switch (info.maxLogLevel) { + case LogLevel::Critical: + ImGui::PushStyleColor(ImGuiCol_TitleBg, PaletteHelpers::SHADED_RED); + ImGui::PushStyleColor(ImGuiCol_TitleBgActive, PaletteHelpers::RED); + ImGui::PushStyleColor(ImGuiCol_TitleBgCollapsed, PaletteHelpers::SHADED_RED); + break; case LogLevel::Error: ImGui::PushStyleColor(ImGuiCol_TitleBg, PaletteHelpers::SHADED_RED); ImGui::PushStyleColor(ImGuiCol_TitleBgActive, PaletteHelpers::RED); ImGui::PushStyleColor(ImGuiCol_TitleBgCollapsed, PaletteHelpers::SHADED_RED); break; + case LogLevel::Alarm: + ImGui::PushStyleColor(ImGuiCol_TitleBg, PaletteHelpers::SHADED_YELLOW); + ImGui::PushStyleColor(ImGuiCol_TitleBgActive, PaletteHelpers::YELLOW); + ImGui::PushStyleColor(ImGuiCol_TitleBgCollapsed, PaletteHelpers::SHADED_YELLOW); + break; case LogLevel::Warning: ImGui::PushStyleColor(ImGuiCol_TitleBg, PaletteHelpers::SHADED_YELLOW); ImGui::PushStyleColor(ImGuiCol_TitleBgActive, PaletteHelpers::YELLOW); diff --git a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx index a7e781ffba275..89126cf303a66 100644 --- a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx @@ -694,10 +694,18 @@ void showTopologyNodeGraph(WorkspaceGUIState& state, ImGui::BeginGroup(); // Lock horizontal position ImGui::TextUnformatted(node->Name); switch (info.maxLogLevel) { + case LogLevel::Critical: + ImGui::SameLine(); + ImGui::TextColored(ERROR_MESSAGE_COLOR, "%s", ICON_FA_EXCLAMATION_CIRCLE); + break; case LogLevel::Error: ImGui::SameLine(); ImGui::TextColored(ERROR_MESSAGE_COLOR, "%s", ICON_FA_EXCLAMATION_CIRCLE); break; + case LogLevel::Alarm: + ImGui::SameLine(); + ImGui::TextColored(WARNING_MESSAGE_COLOR, "%s", ICON_FA_EXCLAMATION_TRIANGLE); + break; case LogLevel::Warning: ImGui::SameLine(); ImGui::TextColored(WARNING_MESSAGE_COLOR, "%s", ICON_FA_EXCLAMATION_TRIANGLE); From 05183622bf7240dcdfb4d887feba8109fb13aca7 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 12 Feb 2025 15:44:27 +0100 Subject: [PATCH 0148/1914] Adding O2_SIGNPOST_EVENT_EMIT_CRITICAL macro --- Framework/Foundation/include/Framework/Signpost.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Framework/Foundation/include/Framework/Signpost.h b/Framework/Foundation/include/Framework/Signpost.h index ba807865a1195..4106174059b3c 100644 --- a/Framework/Foundation/include/Framework/Signpost.h +++ b/Framework/Foundation/include/Framework/Signpost.h @@ -531,6 +531,16 @@ void o2_debug_log_set_stacktrace(_o2_log_t* log, int stacktrace) O2_LOG_MACRO_RAW(warn, remove_engineering_type(format).data(), ##__VA_ARGS__); \ }) +// Similar to the above, however it will also print a normal critical message regardless of the signpost being enabled or not. +#define O2_SIGNPOST_EVENT_EMIT_CRITICAL(log, id, name, format, ...) __extension__({ \ + if (O2_BUILTIN_UNLIKELY(O2_SIGNPOST_ENABLED_MAC(log))) { \ + O2_SIGNPOST_EVENT_EMIT_MAC(log, id, name, format, ##__VA_ARGS__); \ + } else if (O2_BUILTIN_UNLIKELY(private_o2_log_##log->stacktrace)) { \ + _o2_signpost_event_emit(private_o2_log_##log, id, name, remove_engineering_type(format).data(), ##__VA_ARGS__); \ + } \ + O2_LOG_MACRO_RAW(critical, remove_engineering_type(format).data(), ##__VA_ARGS__); \ +}) + #define O2_SIGNPOST_START(log, id, name, format, ...) \ if (O2_BUILTIN_UNLIKELY(O2_SIGNPOST_ENABLED_MAC(log))) { \ O2_SIGNPOST_START_MAC(log, id, name, format, ##__VA_ARGS__); \ From 2729899462334b4ff34d81555eddfb5b3bb9d32d Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Thu, 13 Feb 2025 09:01:22 +0100 Subject: [PATCH 0149/1914] fixing formatting errors --- Framework/Foundation/include/Framework/Signpost.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Framework/Foundation/include/Framework/Signpost.h b/Framework/Foundation/include/Framework/Signpost.h index 4106174059b3c..7010301d80b44 100644 --- a/Framework/Foundation/include/Framework/Signpost.h +++ b/Framework/Foundation/include/Framework/Signpost.h @@ -532,13 +532,13 @@ void o2_debug_log_set_stacktrace(_o2_log_t* log, int stacktrace) }) // Similar to the above, however it will also print a normal critical message regardless of the signpost being enabled or not. -#define O2_SIGNPOST_EVENT_EMIT_CRITICAL(log, id, name, format, ...) __extension__({ \ +#define O2_SIGNPOST_EVENT_EMIT_CRITICAL(log, id, name, format, ...) __extension__({ \ if (O2_BUILTIN_UNLIKELY(O2_SIGNPOST_ENABLED_MAC(log))) { \ O2_SIGNPOST_EVENT_EMIT_MAC(log, id, name, format, ##__VA_ARGS__); \ } else if (O2_BUILTIN_UNLIKELY(private_o2_log_##log->stacktrace)) { \ _o2_signpost_event_emit(private_o2_log_##log, id, name, remove_engineering_type(format).data(), ##__VA_ARGS__); \ } \ - O2_LOG_MACRO_RAW(critical, remove_engineering_type(format).data(), ##__VA_ARGS__); \ + O2_LOG_MACRO_RAW(critical, remove_engineering_type(format).data(), ##__VA_ARGS__); \ }) #define O2_SIGNPOST_START(log, id, name, format, ...) \ From e5f893f5c4640d471c58defec2a84775488e004e Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Thu, 20 Feb 2025 08:50:39 +0100 Subject: [PATCH 0150/1914] adding GPUCritical, putting InfoLoggerMessageOption initializer to log() call --- Framework/DataTakingSupport/src/Plugin.cxx | 14 ++++++-------- GPU/GPUTracking/Definitions/GPULogging.h | 4 ++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Framework/DataTakingSupport/src/Plugin.cxx b/Framework/DataTakingSupport/src/Plugin.cxx index f3dc23e26b114..42cb4f6773fda 100644 --- a/Framework/DataTakingSupport/src/Plugin.cxx +++ b/Framework/DataTakingSupport/src/Plugin.cxx @@ -132,15 +132,13 @@ auto createInfoLoggerSinkHelper(InfoLogger* logger, InfoLoggerContext* ctx) return; } - InfoLogger::InfoLoggerMessageOption opt = { - severity, - level, - InfoLogger::undefinedMessageOption.errorCode, - metadata.file, - atoi(std::string(metadata.line.data(), metadata.line.size()).c_str())}; - if (logger) { - logger->log(opt, *ctx, "%s", content.c_str()); + logger->log({severity, + level, + InfoLogger::undefinedMessageOption.errorCode, + std::string(metadata.file).c_str(), + atoi(std::string(metadata.line).c_str())}, + *ctx, "%s", content.c_str()); } }; }; diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index 693512b15c3c2..4ad6b70b2fd8b 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -24,6 +24,7 @@ #define GPUWarning(...) #define GPUAlarm(...) #define GPUError(...) + #define GPUCritical(...) #define GPUFatal(...) #elif defined(GPUCA_STANDALONE) && !defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_NO_FMT) #include @@ -38,6 +39,7 @@ fmt::fprintf(stderr, string "\n", ##__VA_ARGS__); \ } #define GPUError(...) GPUWarning(__VA_ARGS__) + #define GPUCritical(...) GPUWarning(__VA_ARGS__) #define GPUAlarm(...) GPUWarning(__VA_ARGS__) #define GPUFatal(string, ...) \ { \ @@ -64,6 +66,7 @@ } #define GPUAlarm(...) GPUWarning(__VA_ARGS__) #define GPUError(...) GPUWarning(__VA_ARGS__) + #define GPUCritical(...) GPUWarning(__VA_ARGS__) #define GPUFatal(string, ...) \ { \ fprintf(stderr, string "\n", __VA_ARGS__); \ @@ -78,6 +81,7 @@ #define GPUWarning(...) LOGF(warning, __VA_ARGS__) #define GPUAlarm(...) LOGF(alarm, __VA_ARGS__) #define GPUError(...) LOGF(error, __VA_ARGS__) + #define GPUCritical(...) LOGF(critical, __VA_ARGS__) #define GPUFatal(...) LOGF(fatal, __VA_ARGS__) #endif From b3ac562b31f502fd628343e993370cd3adaecc90 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Tue, 4 Mar 2025 14:35:45 +0100 Subject: [PATCH 0151/1914] adding error and critical severity to DPLRawParser --- Framework/Utils/include/DPLUtils/DPLRawParser.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Framework/Utils/include/DPLUtils/DPLRawParser.h b/Framework/Utils/include/DPLUtils/DPLRawParser.h index bd56b1bf7be50..e1732ef70550a 100644 --- a/Framework/Utils/include/DPLUtils/DPLRawParser.h +++ b/Framework/Utils/include/DPLUtils/DPLRawParser.h @@ -250,6 +250,10 @@ class DPLRawParser LOG(warn) << msg << (*this->mInputIterator).spec->binding << " : " << e.what(); } else if (this->mSeverity == fair::Severity::fatal) { LOG(fatal) << msg << (*this->mInputIterator).spec->binding << " : " << e.what(); + } else if (this->mSeverity == fair::Severity::critical) { + LOG(critical) << msg << (*this->mInputIterator).spec->binding << " : " << e.what(); + } else if (this->mSeverity == fair::Severity::error) { + LOG(error) << msg << (*this->mInputIterator).spec->binding << " : " << e.what(); } else if (this->mSeverity == fair::Severity::info) { LOG(info) << msg << (*this->mInputIterator).spec->binding << " : " << e.what(); } else { From 9e9afd85be0eb420cb6e008b7b18a3b67b60aec9 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 4 Mar 2025 16:10:51 +0100 Subject: [PATCH 0152/1914] DPL: add ability to have singposts controlled actions --- .../Foundation/include/Framework/Signpost.h | 21 +++++++++++++++++++ .../Foundation/test/test_SignpostLogger.cxx | 7 +++++++ 2 files changed, 28 insertions(+) diff --git a/Framework/Foundation/include/Framework/Signpost.h b/Framework/Foundation/include/Framework/Signpost.h index 7010301d80b44..53cc4d914a73b 100644 --- a/Framework/Foundation/include/Framework/Signpost.h +++ b/Framework/Foundation/include/Framework/Signpost.h @@ -202,10 +202,20 @@ bool _o2_lock_free_stack_push(_o2_lock_free_stack& stack, const int& value, bool bool _o2_lock_free_stack_pop(_o2_lock_free_stack& stack, int& value, bool spin = false); void* _o2_log_create(char const* name, int stacktrace); void _o2_signpost_event_emit(_o2_log_t* log, _o2_signpost_id_t id, char const* name, char const* const format, ...); +void _o2_singpost_action(_o2_log_t* log, void (*callback)(void*)); void _o2_signpost_interval_begin(_o2_log_t* log, _o2_signpost_id_t id, char const* name, char const* const format, ...); void _o2_signpost_interval_end(_o2_log_t* log, _o2_signpost_id_t id, char const* name, char const* const format, ...); void _o2_log_set_stacktrace(_o2_log_t* log, int stacktrace); +// Helper to invoke a callback when the signpost is enabled. The callback +// gets passed some previously stored context (nullptr for now). +// TODO: I use a separate function because in the future this might change and I might +// allow to store some context as part of the activity. +inline void _o2_signpost_action(_o2_log_t* log, void (*callback)(void*)) +{ + callback(nullptr); +} + // This generates a unique id for a signpost. Do not use this directly, use O2_SIGNPOST_ID_GENERATE instead. // Notice that this is only valid on a given computer. // This is guaranteed to be unique at 5 GHz for at least 63 years, if my math is correct. @@ -488,6 +498,17 @@ void o2_debug_log_set_stacktrace(_o2_log_t* log, int stacktrace) }) #define O2_SIGNPOST_ID_FROM_POINTER(name, log, pointer) _o2_signpost_id_t name = _o2_signpost_id_make_with_pointer(private_o2_log_##log, pointer) #define O2_SIGNPOST_ID_GENERATE(name, log) _o2_signpost_id_t name = _o2_signpost_id_generate_local(private_o2_log_##log) + +// Execute the provided callback if the log is enabled. Useful e.g. to dump IgProf profiles +// only if the signpost is enabled or to add remote telemetry for certain events. +#define O2_SIGNPOST_ACTION(log, callback) __extension__({ \ + if (O2_BUILTIN_UNLIKELY(O2_SIGNPOST_ENABLED_MAC(log))) { \ + _o2_signpost_action(private_o2_log_##log, callback); \ + } else if (O2_BUILTIN_UNLIKELY(private_o2_log_##log->stacktrace)) { \ + _o2_signpost_action(private_o2_log_##log, callback); \ + } \ +}) + // In case Instruments is attached, we switch to the Apple signpost API otherwise, both one // mac and on linux we use our own implementation, using the logger. We can use the same ids because // they are compatible between the two implementations, we also use remove_engineering_type to remove diff --git a/Framework/Foundation/test/test_SignpostLogger.cxx b/Framework/Foundation/test/test_SignpostLogger.cxx index e5e401dd8f854..4032357835b08 100644 --- a/Framework/Foundation/test/test_SignpostLogger.cxx +++ b/Framework/Foundation/test/test_SignpostLogger.cxx @@ -63,4 +63,11 @@ int main(int argc, char** argv) O2_SIGNPOST_ID_GENERATE(idStacktrace, SignpostStacktrace); O2_LOG_ENABLE(SignpostStacktrace); O2_SIGNPOST_EVENT_EMIT_ERROR(SignpostStacktrace, idStacktrace, "Test category", "An error with stacktrace %d \n", 1); + // Test actions associtated to a given debug stream. + static bool testMustCall = false; + static bool testMustNotCall = false; + O2_SIGNPOST_ACTION(SignpostStacktrace, [](void *) { testMustCall = true; }); + O2_LOG_DISABLE(SignpostStacktrace); + O2_SIGNPOST_ACTION(SignpostStacktrace, [](void *) { testMustNotCall = true; }); + return testMustCall && (!testMustNotCall) ? 0 : 1; } From c4ba161dbcc3a1dc1fdee697a378aaf3c5417cb0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 6 Mar 2025 10:05:16 +0100 Subject: [PATCH 0153/1914] rANS: Suppress bogus compiler warnings, and add / improve some comments --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 1 + Utilities/rANS/benchmarks/bench_ransDecode.cxx | 3 +++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 3c118f402dc4f..e85a3c3e9e1f3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -416,7 +416,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mInternals = master->mInternals; GPUFailedMsg(cudaSetDevice(mDeviceId)); - GPUInfo("CUDA Initialized from master"); + GPUInfo("CUDA Initialisation successfull (from master)"); } for (uint32_t i = 0; i < mEvents.size(); i++) { diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h index 7117dd0c718c6..c7a1b4f55e501 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h @@ -21,7 +21,7 @@ #include #include #pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wshadow" // FIXME: Is this still needed? #include #include #include diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 2d73279cf1fe7..64d8549312736 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -114,6 +114,7 @@ class GPUTPCGMMergedTrack float mLastY; //* outer Y float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; diff --git a/Utilities/rANS/benchmarks/bench_ransDecode.cxx b/Utilities/rANS/benchmarks/bench_ransDecode.cxx index a642b58a3431d..0a11b042a77ee 100644 --- a/Utilities/rANS/benchmarks/bench_ransDecode.cxx +++ b/Utilities/rANS/benchmarks/bench_ransDecode.cxx @@ -112,8 +112,11 @@ void ransDecodeBenchmark(benchmark::State& st, Args&&... args) using input_data_type = std::remove_cv_t>; using source_type = typename input_data_type::value_type; +#pragma GCC diagnostic push // TODO: Remove me when fixed in GCC +#pragma GCC diagnostic ignored "-Walloc-size-larger-than=" EncodeBuffer encodeBuffer{inputData.size()}; DecodeBuffer decodeBuffer{inputData.size()}; +#pragma GCC diagnostic pop const auto histogram = makeDenseHistogram::fromSamples(gsl::span(inputData)); Metrics metrics{histogram}; From 0bd7fb7f7f882973aaa431a593b285082550971e Mon Sep 17 00:00:00 2001 From: Maximilian Korwieser Date: Fri, 31 Jan 2025 13:04:55 +0100 Subject: [PATCH 0154/1914] [TPC-QC]Add functionality to set the coordinates of PV. --- Detectors/TPC/qc/include/TPCQC/Tracks.h | 23 +++++++++++++++-------- Detectors/TPC/qc/src/Tracks.cxx | 3 +-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/Detectors/TPC/qc/include/TPCQC/Tracks.h b/Detectors/TPC/qc/include/TPCQC/Tracks.h index f6cff7c2b8090..ff215b68ce3b1 100644 --- a/Detectors/TPC/qc/include/TPCQC/Tracks.h +++ b/Detectors/TPC/qc/include/TPCQC/Tracks.h @@ -81,19 +81,26 @@ class Tracks mUseCutMaxAbsDCArOnHistos = useCutMaxAbsDCArOnHistos; } + // Set PV position + void setPVposition(const o2::math_utils::Point3D meanVtxPoint3D) + { + mPositionOfPV = meanVtxPoint3D; + } + /// get ratios of 1D histograms std::unordered_map>& getMapHist() { return mMapHist; } const std::unordered_map>& getMapHist() const { return mMapHist; } private: - float mCutAbsEta = 1.f; // Eta cut - int mCutMinnCls = 60; // minimum N clusters - float mCutMindEdxTot = 20.f; // dEdxTot min value - float mCutMinPtDCAr = 1.5f; // minimum pT for DCAr plots DCAr vs. phi, eta, nCluster - float mSamplingFractionDCAr = 0.1f; // sampling rate for calculation of DCAr - bool mTurnOffHistosForAsync = false; // Decide whether to turn off some histograms for async to reduce memory - float mCutMaxAbsDCAr = 1.f; // maximum DCAr - bool mUseCutMaxAbsDCArOnHistos = false; // Decide whether to use the cut on maximum DCAr for the histograms + float mCutAbsEta = 1.f; // Eta cut + int mCutMinnCls = 60; // minimum N clusters + float mCutMindEdxTot = 20.f; // dEdxTot min value + float mCutMinPtDCAr = 1.5f; // minimum pT for DCAr plots DCAr vs. phi, eta, nCluster + float mSamplingFractionDCAr = 0.1f; // sampling rate for calculation of DCAr + bool mTurnOffHistosForAsync = false; // Decide whether to turn off some histograms for async to reduce memory + float mCutMaxAbsDCAr = 1.f; // maximum DCAr + bool mUseCutMaxAbsDCArOnHistos = false; // Decide whether to use the cut on maximum DCAr for the histograms + o2::math_utils::Point3D mPositionOfPV{}; // Position of the PV std::unordered_map> mMapHist; std::vector mHist1D{}; ///< Initialize vector of 1D histograms diff --git a/Detectors/TPC/qc/src/Tracks.cxx b/Detectors/TPC/qc/src/Tracks.cxx index 9f1d9aabf9523..8e6f0d702df1b 100644 --- a/Detectors/TPC/qc/src/Tracks.cxx +++ b/Detectors/TPC/qc/src/Tracks.cxx @@ -180,9 +180,8 @@ bool Tracks::processTrack(const o2::tpc::TrackTPC& track) if (propagator->getMatLUT() && propagator->hasMagFieldSet()) { // ---| fill DCA histos |--- o2::gpu::gpustd::array dca; - const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); - if (propagator->propagateToDCABxByBz(refPoint, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { + if (propagator->propagateToDCABxByBz(mPositionOfPV, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { const auto phi = o2::math_utils::to02PiGen(track.getPhi()); dcaHistPT->Fill(pt, dca[0]); dcaHist->Fill(phi, dca[0]); From b6432870f6504a8005d8b70d24e596341679859c Mon Sep 17 00:00:00 2001 From: wiechula Date: Tue, 4 Mar 2025 18:57:12 +0100 Subject: [PATCH 0155/1914] TPC: Cluster filter for PbPb 2023 --- GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx | 46 ++++++++++++++++++- GPU/GPUTracking/Debug/GPUTPCClusterFilter.h | 26 ++++++++++- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 10 ++-- .../Global/GPUChainTrackingClusterizer.cxx | 4 +- .../Global/GPUChainTrackingCompression.cxx | 6 ++- .../GPUChainTrackingDebugAndProfiling.cxx | 5 +- 7 files changed, 83 insertions(+), 16 deletions(-) diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx index cdd0e4879f949..92adcbd9c14d7 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.cxx @@ -12,14 +12,47 @@ /// \file GPUTPCClusterFilter.cxx /// \author David Rohr +#include "GPUCommonLogger.h" #include "GPUTPCClusterFilter.h" #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; -GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters) +GPUTPCClusterFilter::GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType) + : mFilterType(filterType) { - // Could initialize private variables based on the clusters here + if (filterType == 1) { + // Custom filter settings go here + + } else if (filterType == 2) { + // PbPb23 filter + mClusterStats = std::make_unique[]>(MaxStacks); + static bool called = false; + if (!called) { + LOGP(info, "GPUTPCClusterFilter called for PbPb 2023 settings"); + called = true; + } + + for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { + for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { + const uint32_t globalStack = getGlobalStack(iSector, iRow); + mClusterStats[globalStack].resize(MaxTimeBin); + + for (uint32_t k = 0; k < clusters.nClusters[iSector][iRow]; k++) { + const o2::tpc::ClusterNative& cl = clusters.clusters[iSector][iRow][k]; + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + + if (clQmax < 12) { + if (clTime >= static_cast(mClusterStats[globalStack].size())) { + mClusterStats[globalStack].resize(mClusterStats[globalStack].size() + 445); + } + ++mClusterStats[globalStack][clTime]; + } + } + } + } + } } bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl) @@ -27,5 +60,14 @@ bool GPUTPCClusterFilter::filter(uint32_t sector, uint32_t row, o2::tpc::Cluster // Return true to keep the cluster, false to drop it. // May change cluster properties by modifying the cl reference. // Note that this function might be called multiple times for the same cluster, in which case the final modified cl reference goes into the output clusters. + if (mFilterType == 2) { + const uint32_t globalStack = getGlobalStack(sector, row); + const int clTime = static_cast(cl.getTime()); + const float clQmax = cl.getQmax(); + if ((mClusterStats[globalStack][clTime] > 40 && clQmax < 12) || (mClusterStats[globalStack][clTime] > 200)) { + return false; + } + } + return true; } diff --git a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h index 908f78fd23b9a..b39237757de53 100644 --- a/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h +++ b/GPU/GPUTracking/Debug/GPUTPCClusterFilter.h @@ -15,7 +15,10 @@ #ifndef GPUTPCCLUSTERFILTER_H #define GPUTPCCLUSTERFILTER_H +#include #include +#include +#include "GPUDefConstantsAndSettings.h" namespace o2::tpc { @@ -28,8 +31,29 @@ namespace o2::gpu class GPUTPCClusterFilter { public: - GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters); + GPUTPCClusterFilter(const o2::tpc::ClusterNativeAccess& clusters, uint8_t filterType); bool filter(uint32_t sector, uint32_t row, o2::tpc::ClusterNative& cl); + + private: + static constexpr uint32_t MaxTimeBin = 14256; + static constexpr uint32_t MaxStacks = GPUCA_NSECTORS * 4; + uint8_t mFilterType = 0; //< 0: off, 1: custom, 2: PbPb23 + + std::unique_ptr[]> mClusterStats; //< Number of clusters per stack and time bin + + uint32_t getGlobalStack(uint32_t sector, uint32_t row) const + { + int stack = 3; + if (row < 63) { + stack = 0; + } else if (row < 97) { + stack = 1; + } else if (row < 127) { + stack = 2; + } + + return sector * 4 + stack; + }; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 25419f3483dd6..3c31a4fbb8409 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -291,7 +291,7 @@ AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processi AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow") AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding") AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters") -AddOption(tpcApplyDebugClusterFilter, bool, false, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class") +AddOption(tpcApplyClusterFilterOnCPU, uint8_t, 0, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class, 0: off, 1: debug, 2: PbPb23") AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 9de8b2174a182..a63886b93ccf9 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -294,16 +294,12 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression) && GetProcessingSettings().tpcApplyCFCutsAtDecoding && !GetProcessingSettings().tpcUseOldCPUDecoding) { - GPUError("tpcApplyCFCutsAtDecoding currently requires tpcUseOldCPUDecoding"); - return false; - } if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } - if (GetProcessingSettings().tpcApplyDebugClusterFilter == 1 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { - GPUError("tpcApplyDebugClusterFilter cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { + GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { @@ -815,7 +811,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintDebugOutput(); - //PrintMemoryRelations(); + // PrintMemoryRelations(); if (GetProcessingSettings().eventDisplay) { if (!mDisplayRunning) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 121d60873324f..62a4a524d67df 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -635,7 +635,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (mWaitForFinalInputs) { GPUFatal("Cannot use waitForFinalInput callback without delayed output"); } - if (!GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (!GetProcessingSettings().tpcApplyClusterFilterOnCPU) { AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); tmpNativeClusters = mInputsHost->mPclusterNativeOutput; } else { @@ -1021,7 +1021,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) tmpNativeAccess->clustersMCTruth = mcLabelsConstView; tmpNativeAccess->setOffsetPtrs(); mIOPtrs.clustersNative = tmpNativeAccess; - if (GetProcessingSettings().tpcApplyDebugClusterFilter) { + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU) { auto allocator = [this, &tmpNativeClusters](size_t size) { this->mInputsHost->mNClusterNative = size; this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 94d39249d620c..1b08de21abd0f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,6 +201,11 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { + const bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); + if (runFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); + } + if (GetProcessingSettings().tpcUseOldCPUDecoding) { const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; @@ -214,7 +219,6 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding; auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index ec6b48a55d50d..38e1cd0036c16 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -295,7 +295,8 @@ void GPUChainTracking::SanityCheck() void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts) { - GPUTPCClusterFilter clusterFilter(*clusters); + const uint8_t filterType = GetProcessingSettings().tpcApplyClusterFilterOnCPU; + GPUTPCClusterFilter clusterFilter(*clusters, filterType); o2::tpc::ClusterNative* outputBuffer = nullptr; for (int32_t iPhase = 0; iPhase < 2; iPhase++) { uint32_t countTotal = 0; @@ -312,7 +313,7 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster if (param().tpcCutTimeBin > 0) { keep = keep && cl.getTime() < param().tpcCutTimeBin; } - keep = keep && (!GetProcessingSettings().tpcApplyDebugClusterFilter || clusterFilter.filter(iSector, iRow, cl)); + keep = keep && (!filterType || clusterFilter.filter(iSector, iRow, cl)); if (iPhase && keep) { outputBuffer[countTotal] = cl; } From 23958247bf8d01cdf23e053c7e4886c87d834041 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Thu, 6 Mar 2025 17:40:08 +0100 Subject: [PATCH 0156/1914] TPC: fix crash in MIPTrackFilterDevice when skipping first TF --- Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index b8b95090d8534..1329dea236b1f 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -106,6 +106,7 @@ void MIPTrackFilterDevice::init(framework::InitContext& ic) void MIPTrackFilterDevice::run(ProcessingContext& pc) { + o2::base::GRPGeomHelper::instance().checkUpdates(pc); const auto currentTF = processing_helpers::getCurrentTF(pc); if ((mTFCounter++ % mProcessEveryNthTF) && (currentTF >= mProcessNFirstTFs)) { LOGP(info, "Skipping TF {}", currentTF); @@ -115,7 +116,6 @@ void MIPTrackFilterDevice::run(ProcessingContext& pc) } return; } - o2::base::GRPGeomHelper::instance().checkUpdates(pc); const auto tracks = pc.inputs().get>("tracks"); const auto nTracks = tracks.size(); From 21d7d164517eb16042b7b86253c933e5cec191ca Mon Sep 17 00:00:00 2001 From: pillot Date: Fri, 7 Mar 2025 12:54:39 +0100 Subject: [PATCH 0157/1914] new executable to scan HV/LV CCDB objects and look for issues (#14022) --- Detectors/MUON/MCH/Conditions/CMakeLists.txt | 12 + Detectors/MUON/MCH/Conditions/README.md | 53 +- .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 1121 +++++++++++++++++ 3 files changed, 1184 insertions(+), 2 deletions(-) create mode 100644 Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx diff --git a/Detectors/MUON/MCH/Conditions/CMakeLists.txt b/Detectors/MUON/MCH/Conditions/CMakeLists.txt index f0a84dec6a416..eff44df743df5 100644 --- a/Detectors/MUON/MCH/Conditions/CMakeLists.txt +++ b/Detectors/MUON/MCH/Conditions/CMakeLists.txt @@ -37,6 +37,18 @@ o2_add_executable( O2::MCHGlobalMapping ) +o2_add_executable( + scan-hvlv-ccdb + COMPONENT_NAME mch + SOURCES src/scan-hvlv-ccdb.cxx + PUBLIC_LINK_LIBRARIES + O2::CCDB + O2::CommonUtils + O2::DetectorsDCS + O2::MCHConditions + O2::MCHStatus + ) + if(BUILD_TESTING) o2_add_test( dcs-aliases diff --git a/Detectors/MUON/MCH/Conditions/README.md b/Detectors/MUON/MCH/Conditions/README.md index fda90ba8c9ca9..d35fdcd0a0958 100644 --- a/Detectors/MUON/MCH/Conditions/README.md +++ b/Detectors/MUON/MCH/Conditions/README.md @@ -17,7 +17,9 @@ Those objects are stored at the following CCDB paths : - MCH/Calib/BadChannel - MCH/Calib/RejectList -The BadChannel and RejectList objects can be uploaded, e.g. for debug purposes, using the `o2-mch-bad-channels-ccdb` program : +# o2-mch-bad-channels-ccdb + +The BadChannel and RejectList objects can be uploaded, e.g. for debug purposes, using the `o2-mch-bad-channels-ccdb` program: ```shell $ o2-mch-bad-channels-ccdb --help @@ -46,10 +48,57 @@ Usage: -a [ --alias ] arg DCS alias (HV or LV) to reject ``` -For instance, to create in a local CCDB a RejectList object which declares solar number 32 as bad, from Tuesday 1 November 2022 00:00:01 UTC to Saturday 31 December 2022 23:59:59, use : +For instance, to create in a local CCDB a RejectList object which declares solar number 32 as bad, from Tuesday 1 November 2022 00:00:01 UTC to Saturday 31 December 2022 23:59:59, use: ```shell $ o2-mch-bad-channels-ccdb -p -s 32 -t RejectList --starttimestamp 1667260801000 --endtimestamp 1672531199000 ``` The program will search the reference CCDB (defined with `--referenceccdb`) for existing objects valid during this period and propose you to either overwrite them or update them. In the first case, a single object will be created, valid for the whole period, containing only the new bad channels. In the second case, as many objects as necessary will be created with appropriate validity ranges, adding the new bad channels to the existing ones. + +# o2-mch-scan-hvlv-ccdb + +the HV or LV DCS datapoints stored in the CCDB (http://alice-ccdb.cern.ch) can be scanned using the `o2-mch-scan-hvlv-ccdb` program: + +```shell +$ o2-mch-scan-hvlv-ccdb -h +This program scans HV or LV channels looking for issues +Usage: + -h [ --help ] produce help message + -r [ --runs ] arg run(s) to scan (comma separated list of runs + or ASCII file with one run per line) + -c [ --channels ] arg channel(s) to scan ("HV" or "LV" or comma + separated list of (part of) DCS aliases) + --configKeyValues arg Semicolon separated key=value strings to + change HV thresholds + -d [ --duration ] arg (=0) minimum duration (ms) of HV/LV issues to + consider + -w [ --warning ] arg (=1) warning level (0, 1 or 2) + -p [ --print ] arg (=1) print level (0, 1, 2 or 3) + -o [ --output ] arg (=scan.root) output root file name +``` + +It takes as input a list of runs and a list of either HV or LV channels to scan. **Note that it will scan the CCDB from the begining of the first run to the end of the last one, which can represent quite of lot of files.** More details about the options are given below. + +It produces as output a list of detected issues, with time, duration and affected runs, and a root file with the displays of the data points per channel per chamber for a visual inspection. Issues are triggered when HV/LV values go below a given threshold. For HV channels it also compares the issues found by the internal algorithm with the ones found by [Detectors/MUON/MCH/Status/src/HVStatusCreator.cxx](../Status/src/HVStatusCreator.cxx). + +For instance, to scan all HV channels for runs 545222 and 545223 and detect issues of a minimum duration of 10s, use: +```shell +o2-mch-scan-hvlv-ccdb -r 545222,545223 -c HV -d 10000 +``` + +### channel input formats: +* "HV" to scan all HV channels +* "LV" to scan all LV channels +* comma separated list of (part of) DCS aliases, which must be all of the same type, i.e contain either Quad/Slat (type = HV), or Group/an/di/Sol (type = LV) + +### warning levels: +* 0: no warning +* 1: check data points timestamp w.r.t. HV/LV file validity range with ±5s tolerance +* 2: check data points timestamp w.r.t. HV/LV file validity range without tolerance + +### print levels: +* 0: print detected issues +* 1: same as 0 + print validity range of runs and HV/LV files +* 2: same as 1 + print the first and last data points of each selected channel +* 3: same as 1 + print all the data points of each selected channel diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx new file mode 100644 index 0000000000000..32cd365916c63 --- /dev/null +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -0,0 +1,1121 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "TFile.h" +#include "TCanvas.h" +#include "TGraph.h" +#include "TH1F.h" +#include "TLine.h" +#include "TMultiGraph.h" +#include "TStyle.h" + +#include "CCDB/BasicCCDBManager.h" +#include "CCDB/CcdbApi.h" +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsDCS/DataPointIdentifier.h" +#include "DetectorsDCS/DataPointValue.h" +#include "MCHConditions/DCSAliases.h" +#include "MCHStatus/HVStatusCreator.h" +#include "MCHStatus/StatusMapCreatorParam.h" + +namespace po = boost::program_options; + +using namespace o2; +using DPID = dcs::DataPointIdentifier; +using DPVAL = dcs::DataPointValue; +using DPMAP = std::unordered_map>; +using DPMAP2 = std::map>; +using RBMAP = std::map>; +using DPBMAP = std::map; +using ISSUE = std::tuple; +using ISSUELIST = std::vector; +using ISSUEMAP = std::map; + +//---------------------------------------------------------------------------- +bool containsAKey(std::string data, const std::set& Keys) +{ + /// check if the data contains one of the keys + + auto itKey = std::find_if(Keys.begin(), Keys.end(), [&data](const auto& key) { + return data.find(key) != data.npos; + }); + + return itKey != Keys.end(); +} + +//---------------------------------------------------------------------------- +bool isValid(std::string alias) +{ + /// check if the alias is a valid (part of a) DCS alias + + static const std::vector aliases = + mch::dcs::aliases({mch::dcs::MeasurementType::HV_V, + mch::dcs::MeasurementType::LV_V_FEE_ANALOG, + mch::dcs::MeasurementType::LV_V_FEE_DIGITAL, + mch::dcs::MeasurementType::LV_V_SOLAR}); + + auto itAlias = std::find_if(aliases.begin(), aliases.end(), [&alias](const auto& a) { + return a.find(alias) != a.npos; + }); + + return itAlias != aliases.end(); +} + +//---------------------------------------------------------------------------- +void scanWhat(std::string what, std::string& path, bool& scanHV, bool& scanAll, std::set& aliases) +{ + /// get what to scan and where + + static const std::set hvKeys{"HV", "Quad", "Slat"}; + static const std::set lvKeys{"LV", "Group", "an", "di", "Sol"}; + + // HV or LV ? + path = ""; + scanHV = false; + if (containsAKey(what, hvKeys)) { + path = "MCH/Calib/HV"; + scanHV = true; + } + if (containsAKey(what, lvKeys)) { + if (scanHV) { + printf("error: cannot scan HV and LV channels at the same time\n"); + exit(1); + } + path = "MCH/Calib/LV"; + } + if (path.empty()) { + printf("error: no valid HV or LV channel to scan\n"); + exit(1); + } + + // everything or specific aliases ? + if (what.find(scanHV ? "HV" : "LV") != what.npos) { + scanAll = true; + aliases.clear(); + } else { + scanAll = false; + std::istringstream input(what); + for (std::string alias; std::getline(input, alias, ',');) { + if (isValid(alias)) { + aliases.insert(alias); + } else { + printf("error: \"%s\" invalid (part of) HV or LV alias\n", alias.c_str()); + exit(1); + } + } + } +} + +//---------------------------------------------------------------------------- +uint64_t ms2s(uint64_t ts) +{ + /// convert the time stamp from ms to s + + return (ts + 500) / 1000; +} + +//---------------------------------------------------------------------------- +std::string getTime(uint64_t ts) +{ + /// convert the time stamp (ms) to local time + + time_t t = ms2s(ts); + + std::string time = std::ctime(&t); + time.pop_back(); // remove trailing \n + + return time; +} + +//---------------------------------------------------------------------------- +std::set getRuns(std::string runList) +{ + /// read the runList from an ASCII file, or a comma separated run list, or a single run + + std::set runs{}; + + auto isNumber = [](std::string val) { return !val.empty() && val.find_first_not_of("0123456789") == val.npos; }; + + if (isNumber(runList)) { + + runs.insert(std::stoi(runList)); + + } else if (runList.find(",") != runList.npos) { + + std::istringstream input(runList); + for (std::string run; std::getline(input, run, ',');) { + if (isNumber(run)) { + runs.insert(std::stoi(run)); + } + } + + } else { + + std::ifstream input(runList); + if (input.is_open()) { + for (std::string run; std::getline(input, run);) { + if (isNumber(run)) { + runs.insert(std::stoi(run)); + } + } + } + } + + return runs; +} + +//---------------------------------------------------------------------------- +RBMAP getRunBoundaries(ccdb::CcdbApi const& api, std::string runList) +{ + /// return the SOR / EOR time stamps for every runs in the list + + RBMAP runBoundaries{}; + + auto runs = getRuns(runList); + + for (auto run : runs) { + auto boundaries = ccdb::CCDBManagerInstance::getRunDuration(api, run); + runBoundaries.emplace(run, boundaries); + } + + return runBoundaries; +} + +//---------------------------------------------------------------------------- +void checkRunBoundaries(const RBMAP& runBoundaries) +{ + /// check the consistency of the run time boundaries + + if (runBoundaries.empty()) { + printf("error: no run found from the list\n"); + exit(1); + } + + bool error = false; + int previousRun = 0; + uint64_t endOfPreviousRun = 0; + + for (const auto& [run, boundaries] : runBoundaries) { + if (boundaries.second <= boundaries.first) { + printf("error: run %d EOR <= SOR: %llu - %llu (%s - %s)\n", + run, boundaries.first, boundaries.second, + getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); + error = true; + } + if (boundaries.first <= endOfPreviousRun) { + printf("error: SOR run %d <= EOR run %d: %llu (%s) <= %llu (%s)\n", + run, previousRun, boundaries.first, getTime(boundaries.first).c_str(), + endOfPreviousRun, getTime(endOfPreviousRun).c_str()); + error = true; + } + previousRun = run; + endOfPreviousRun = boundaries.second; + } + + if (error) { + exit(1); + } +} + +//---------------------------------------------------------------------------- +void printRunBoundaries(const RBMAP& runBoundaries) +{ + /// print the list of runs with their time boundaries + + printf("\nlist of runs with their boundaries:\n"); + printf("------------------------------------\n"); + + for (const auto& [run, boundaries] : runBoundaries) { + printf("%d: %llu - %llu (%s - %s)\n", run, boundaries.first, boundaries.second, + getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); + } + + printf("------------------------------------\n"); +} + +//---------------------------------------------------------------------------- +void drawRunBoudaries(const RBMAP& runBoundaries, TCanvas* c) +{ + /// draw the run time boundaries + + c->cd(); + + for (const auto& [run, boundaries] : runBoundaries) { + + TLine* startRunLine = new TLine(ms2s(boundaries.first), c->GetUymin(), ms2s(boundaries.first), c->GetUymax()); + startRunLine->SetUniqueID(run); + startRunLine->SetLineColor(4); + startRunLine->SetLineWidth(1); + startRunLine->Draw(); + + TLine* endRunLine = new TLine(ms2s(boundaries.second), c->GetUymin(), ms2s(boundaries.second), c->GetUymax()); + endRunLine->SetUniqueID(run); + endRunLine->SetLineColor(2); + endRunLine->SetLineWidth(1); + endRunLine->Draw(); + } +} + +//---------------------------------------------------------------------------- +DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, uint64_t tStart, uint64_t tStop) +{ + /// get the time boundaries of every HV/LV files found in the time range + + // add extra margin (ms) of ± 1 min to the creation time, which occurs every 30 min + static const uint64_t timeMarging[2] = {60000, 1860000}; + + std::istringstream fileInfo(api.list(what.c_str(), false, "text/plain", + tStop + timeMarging[1], tStart - timeMarging[0])); + + DPBMAP dpBoundaries{}; + std::string dummy{}; + uint64_t begin = 0; + uint64_t end = 0; + + for (std::string line; std::getline(fileInfo, line);) { + if (line.find("Validity:") == 0) { + std::istringstream in(line); + in >> dummy >> begin >> dummy >> end; + dpBoundaries.emplace(begin, end); + } + } + + if (dpBoundaries.empty()) { + printf("\e[0;31merror: no file found in %s in time range %llu - %llu (%s - %s) --> use the default one\e[0m\n", + what.c_str(), tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + dpBoundaries.emplace(1, 9999999999999); + } + + return dpBoundaries; +} + +//---------------------------------------------------------------------------- +void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, uint64_t tStop) +{ + /// check the consistency of HV/LV file time boundaries + + bool error = false; + + if (dpBoundaries.begin()->first > tStart) { + printf("error: the beginning of the time range is not covered: %llu > %llu (%s > %s)\n", + dpBoundaries.begin()->first, tStart, + getTime(dpBoundaries.begin()->first).c_str(), getTime(tStart).c_str()); + error = true; + } + if (dpBoundaries.rbegin()->second < tStop) { + printf("error: the end of the time range is not covered: %llu < %llu (%s < %s)\n", + dpBoundaries.rbegin()->second, tStop, + getTime(dpBoundaries.rbegin()->second).c_str(), getTime(tStop).c_str()); + error = true; + } + + uint64_t previousTStop = dpBoundaries.begin()->first; + for (auto [tStart, tStop] : dpBoundaries) { + if (tStop <= tStart) { + printf("error: EOF <= SOF: %llu - %llu (%s - %s)\n", + tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + error = true; + } + if (tStart != previousTStop) { + printf("error: end of %s file != start of next %s file: %llu (%s) != %llu (%s))\n", + scanHV ? "HV" : "LV", scanHV ? "HV" : "LV", + previousTStop, getTime(previousTStop).c_str(), tStart, getTime(tStart).c_str()); + error = true; + } + previousTStop = tStop; + } + + if (error) { + exit(1); + } +} + +//---------------------------------------------------------------------------- +void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) +{ + /// print the time boundaries of every HV/LV files found in the full time range + + printf("\nlist of %s file time boundaries:\n", scanHV ? "HV" : "LV"); + printf("------------------------------------\n"); + + for (auto [tStart, tStop] : dpBoundaries) { + printf("%llu - %llu (%s - %s)\n", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + } + + printf("------------------------------------\n"); +} + +//---------------------------------------------------------------------------- +double getLVLimit(std::string alias) +{ + /// return the LV limit for that channel + + static const double lvLimits[3] = {1.5, 1.5, 6.}; // FeeAnalog, FeeDigital, Solar + + if (alias.find("an") != alias.npos) { + return lvLimits[0]; + } else if (alias.find("di") != alias.npos) { + return lvLimits[1]; + } + return lvLimits[2]; +} + +//---------------------------------------------------------------------------- +void drawLimit(double limit, TCanvas* c) +{ + /// draw the HV/LV limit for the displayed chamber + + c->cd(); + + TLine* l = new TLine(c->GetUxmin(), limit, c->GetUxmax(), limit); + l->SetLineColor(1); + l->SetLineWidth(1); + l->SetLineStyle(2); + l->Draw(); +} + +//---------------------------------------------------------------------------- +std::string getDuration(uint64_t tStart, uint64_t tStop) +{ + /// get the duration (dd hh:mm:ss) between the two time stamps (ms) + + auto dt = ms2s(tStop - tStart); + auto s = dt % 60; + auto m = (dt / 60) % 60; + auto h = (dt / 3600) % 24; + auto d = dt / 86400; + + return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); +} + +//---------------------------------------------------------------------------- +double getValue(DPVAL dp) +{ + /// return the value of this data point + + union Converter { + uint64_t raw_data; + double value; + } converter; + + converter.raw_data = dp.payload_pt1; + + return converter.value; +} + +//---------------------------------------------------------------------------- +std::string getDE(std::string alias) +{ + /// for DCS HV alias: return the corresponding DE (and sector) + /// for DCS LV alias: return an empty string + + auto de = mch::dcs::aliasToDetElemId(alias); + + if (de) { + return (mch::dcs::isQuadrant(mch::dcs::aliasToChamber(alias))) + ? fmt::format("DE{}-{}", *de, mch::dcs::aliasToNumber(alias) % 10) + : fmt::format("DE{}", *de); + } + + return ""; +} + +//---------------------------------------------------------------------------- +void fillDataPoints(const std::vector& dps, std::map& dps2, + uint64_t tMin, uint64_t tMax, int warningLevel) +{ + /// fill the map of data points + + static const uint64_t tolerance = 5000; + + if (dps.empty()) { + printf("error: the file does not contain any data point\n"); + exit(1); + } + + auto itDP = dps.begin(); + auto ts = itDP->get_epoch_time(); + std::string header = "warning:"; + std::string color = (ts + tolerance < tMin || ts > tMin + tolerance) ? "\e[0;31m" : "\e[0;34m"; + bool printWarning = warningLevel > 1 || (warningLevel == 1 && color == "\e[0;31m"); + + // check if the first data point is a copy of the last one from previous file + if (!dps2.empty()) { + auto previousTS = dps2.rbegin()->first; + if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { + if (ts <= previousTS) { + printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + exit(1); + } + if (printWarning) { + printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), + (previousTS < tMin) ? "-" : "+", (previousTS < tMin) ? tMin - previousTS : previousTS - tMin); + if (ts <= tMin) { + printf(" but get one at dt = -%llu ms\e[0m\n", tMin - ts); + } else { + printf("\e[0m\n"); + } + header = " "; + } + } + } + + // add the first data point (should be before the start of validity of the file) + if (ts >= tMax) { + printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", ts - tMax); + exit(1); + } else if (ts > tMin && printWarning) { + printf("%s%s missing data point prior file start of validity (dt = +%llu ms)\e[0m\n", + color.c_str(), header.c_str(), ts - tMin); + header = " "; + } + dps2.emplace(ts, getValue(*itDP)); + + // add other data points (should be within the validity range of the file) + auto previousTS = ts; + for (++itDP; itDP < dps.end(); ++itDP) { + ts = itDP->get_epoch_time(); + if (ts <= previousTS) { + printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + exit(1); + } + if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { + printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", + (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), tMin - ts); + } else if (ts >= tMax && warningLevel >= 1) { + printf("\e[0;31m%s data point outside of file validity range (dt = +%llu ms)\e[0m\n", + header.c_str(), ts - tMax); + } + dps2.emplace(ts, getValue(*itDP)); + previousTS = ts; + } +} + +//---------------------------------------------------------------------------- +void selectDataPoints(DPMAP2 dpsMapsPerCh[10], uint64_t tStart, uint64_t tStop) +{ + /// remove the data points outside of the given time range and, if needed, + /// add a data point at the boundaries with value equal to the preceding one + + for (int ch = 0; ch < 10; ++ch) { + for (auto& [alias, dps] : dpsMapsPerCh[ch]) { + + // get the first data point in the time range, remove the previous ones + // and add a data point with value equal to the preceding one if it exits + // or to this one otherwise + auto itFirst = dps.lower_bound(tStart); + if (itFirst != dps.begin()) { + double previousVal = std::prev(itFirst)->second; + for (auto it = dps.begin(); it != itFirst;) { + it = dps.erase(it); + } + dps.emplace(tStart, previousVal); + } else if (itFirst->first != tStart) { + if (itFirst->first > tStop) { + printf("error (%s): all data points are posterior to the end of the time range\n", alias.c_str()); + } else { + printf("error (%s): first data point is posterior to the beginning of the time range\n", alias.c_str()); + } + dps.emplace(tStart, itFirst->second); + } + + // get the first data point exceeding the time range, remove it and the next ones + // and add a data point with value equal to the preceding one if needed + auto itLast = dps.upper_bound(tStop); + double previousVal = std::prev(itLast)->second; + for (auto it = itLast; it != dps.end();) { + it = dps.erase(it); + } + dps.emplace(tStop, previousVal); + } + } +} + +//---------------------------------------------------------------------------- +void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool all) +{ + /// print all the registered data points + + const auto format1 = fmt::format(" %llu (%s): {} V\n", hvlvFormat.c_str()); + const auto format2 = fmt::format(": %llu (%s): {} V -- %llu (%s): {} V\n", + hvlvFormat.c_str(), hvlvFormat.c_str()); + + for (int ch = 0; ch < 10; ++ch) { + + printf("\n------------ chamber %d ------------\n", ch + 1); + + for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { + + printf("- %s: %lu values", alias.c_str(), dps.size()); + + if (all) { + + printf("\n"); + for (const auto& [ts, val] : dps) { + printf(format1.c_str(), ts, getTime(ts).c_str(), val); + } + + } else if (!dps.empty()) { + + const auto firstdt = dps.begin(); + const auto lastdt = dps.rbegin(); + printf(format2.c_str(), + firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, + lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); + + } else { + printf("\n"); + } + } + } +} + +//---------------------------------------------------------------------------- +TGraph* mapToGraph(std::string alias, const std::map& dps) +{ + /// create a graph for the DCS channel and add the data points + + TGraph* g = new TGraph(dps.size()); + + auto pos = alias.find("."); + auto shortAlias = alias.substr(0, pos); + auto de = getDE(alias); + auto title = de.empty() ? fmt::format("{}", shortAlias.c_str()) + : fmt::format("{} ({})", de.c_str(), shortAlias.c_str()); + g->SetNameTitle(alias.c_str(), title.c_str()); + + int i(0); + for (auto [ts, val] : dps) { + g->SetPoint(i, ms2s(ts), val); + ++i; + } + + g->SetMarkerSize(1.5); + g->SetMarkerStyle(2); + g->SetLineStyle(2); + + return g; +} + +//---------------------------------------------------------------------------- +TCanvas* drawDataPoints(TMultiGraph* mg, double min, double max) +{ + /// display the data points of the given chamber + + TCanvas* c = new TCanvas(mg->GetName(), mg->GetHistogram()->GetTitle(), 1500, 900); + + mg->Draw("A plc pmc"); + mg->SetMinimum(min); + mg->SetMaximum(max); + mg->GetXaxis()->SetTimeDisplay(1); + mg->GetXaxis()->SetTimeFormat("%d/%m %H:%M"); + mg->GetXaxis()->SetTimeOffset(0, "local"); + mg->GetXaxis()->SetNdivisions(21010); + + c->BuildLegend(); + c->Update(); + + return c; +} + +//---------------------------------------------------------------------------- +void findIssues(const std::map& dps, double limit, ISSUELIST& issues) +{ + /// return the list of HV/LV issues (time range, min value, mean value) for each DCS channel + + uint64_t tStart(0); + double min(0.); + double mean(0.); + uint64_t prevTS(0); + double prevVal(-1.); + + for (auto [ts, val] : dps) { + + if (val < limit) { + + if (tStart == 0) { + + // start a new issue... + tStart = ts; + min = val; + mean = 0.; + prevTS = ts; + prevVal = val; + + } else { + + // ... or complement the current one + min = std::min(min, val); + mean += prevVal * (ts - prevTS); + prevTS = ts; + prevVal = val; + } + + } else if (tStart > 0) { + + // complete the current issue, if any, and register it + mean += prevVal * (ts - prevTS); + mean /= (ts - tStart); + issues.emplace_back(tStart, ts, min, mean, ""); + tStart = 0; + } + } + + // complete the last issue, if any and its duration is != 0, and register it + if (tStart > 0 && prevTS != tStart) { + mean /= (prevTS - tStart); + issues.emplace_back(tStart, prevTS, min, mean, ""); + } +} + +//---------------------------------------------------------------------------- +void fillO2Issues(const std::vector& o2issues, ISSUELIST& issues, + uint64_t tMin, uint64_t tMax) +{ + /// fill the list of issues from O2 (extend the previous one and/or create new ones) + + // the list must not be empty + if (o2issues.empty()) { + printf("error: O2 returns an empty list of issues\n"); + exit(1); + } + + for (auto itIssue = o2issues.begin(); itIssue != o2issues.end(); ++itIssue) { + + // exclude issues fully outside of the DP file boudaries + if (itIssue->end <= tMin || itIssue->begin >= tMax) { + printf("\e[0;35mwarning: skipping O2 issue outside of file boundaries (%llu - %llu)\e[0m\n", + itIssue->begin, itIssue->end); + continue; + } + + // only the first issue could in principle extend before the start of the DP file, to O + if (itIssue->begin < tMin - mch::StatusMapCreatorParam::Instance().timeMargin && + (itIssue != o2issues.begin() || itIssue->begin != 0)) { + printf("\e[0;35mwarning: O2 returns an issue with uncommon start time (%llu < %llu)\e[0m\n", + itIssue->begin, tMin - mch::StatusMapCreatorParam::Instance().timeMargin); + } + + // only the last issue could in principle extend beyond the end of the DP file, to infinity + if (itIssue->end >= tMax + mch::StatusMapCreatorParam::Instance().timeMargin && + (itIssue != std::prev(o2issues.end()) || itIssue->end != std::numeric_limits::max())) { + printf("\e[0;35mwarning: O2 returns an issue with uncommon end time (%llu >= %llu)\e[0m\n", + itIssue->end, tMax + mch::StatusMapCreatorParam::Instance().timeMargin); + } + + // extend the last issue in case of continuity accross the DP files or add a new one, + // restricting their time range within the DP file boundaries + if (itIssue->begin <= tMin && !issues.empty() && std::get<1>(issues.back()) == tMin) { + std::get<1>(issues.back()) = std::min(itIssue->end, tMax); + } else { + issues.emplace_back(std::max(itIssue->begin, tMin), std::min(itIssue->end, tMax), 0., 0., ""); + } + } +} + +//---------------------------------------------------------------------------- +std::string findAffectedRuns(const RBMAP& runBoundaries, uint64_t tStart, uint64_t tStop) +{ + /// return the list of affected runs in this time range + + std::string runs; + + for (const auto& [run, boundaries] : runBoundaries) { + + if (boundaries.second <= tStart) { + continue; + } else if (boundaries.first >= tStop) { + break; + } + + runs += fmt::format("{},", run); + } + + if (!runs.empty()) { + runs.pop_back(); + } + + return runs; +} + +//---------------------------------------------------------------------------- +void selectIssues(ISSUEMAP issuesPerCh[10], const RBMAP& runBoundaries, uint64_t minDuration) +{ + /// select HV/LV issues of a minimum duration (ms) occurring during runs + + for (int ch = 0; ch < 10; ++ch) { + for (auto& issues : issuesPerCh[ch]) { + for (auto itIssue = issues.second.begin(); itIssue != issues.second.end();) { + + auto tStart = std::get<0>(*itIssue); + auto tStop = std::get<1>(*itIssue); + + if (tStop - tStart < minDuration) { + + itIssue = issues.second.erase(itIssue); + + } else { + + auto runs = findAffectedRuns(runBoundaries, tStart, tStop); + + if (runs.empty()) { + + itIssue = issues.second.erase(itIssue); + + } else { + + std::get<4>(*itIssue) = runs; + ++itIssue; + } + } + } + } + } +} + +//---------------------------------------------------------------------------- +void selectO2Issues(ISSUEMAP issuesPerCh[10], const RBMAP& runBoundaries) +{ + /// select HV issues from O2 algorithm occurring during runs + /// and restrict the range of issues to the run range + + for (int ch = 0; ch < 10; ++ch) { + for (auto& issues : issuesPerCh[ch]) { + for (auto itIssue = issues.second.begin(); itIssue != issues.second.end();) { + + auto& tStart = std::get<0>(*itIssue); + auto& tStop = std::get<1>(*itIssue); + + auto runs = findAffectedRuns(runBoundaries, tStart, tStop); + + if (runs.empty()) { + + itIssue = issues.second.erase(itIssue); + + } else { + + tStart = std::max(tStart, runBoundaries.begin()->second.first); + tStop = std::min(tStop, runBoundaries.rbegin()->second.second); + std::get<4>(*itIssue) = runs; + ++itIssue; + } + } + } + } +} + +//---------------------------------------------------------------------------- +bool eraseIssue(const ISSUE& issue, ISSUELIST& issues) +{ + /// find an issue with the same time range and associated run list and erase it + /// return true in case of success + + auto itIssue = std::find_if(issues.begin(), issues.end(), [&issue](const auto& i) { + return (std::get<0>(i) == std::get<0>(issue) && + std::get<1>(i) == std::get<1>(issue) && + std::get<4>(i) == std::get<4>(issue)); + }); + + if (itIssue != issues.end()) { + issues.erase(itIssue); + return true; + } + + return false; +} + +//---------------------------------------------------------------------------- +void printIssues(const ISSUEMAP issuesPerCh[10], const ISSUEMAP o2IssuesPerCh[10], + bool scanHV, std::string hvlvFormat) +{ + /// print all HV/LV issues + + // copy the issues so that we can modify them (i.e. add empty lists or delete issues after printing) + ISSUEMAP issuesPerChCopy[10]; + ISSUEMAP o2IssuesPerChCopy[10]; + for (int ch = 0; ch < 10; ++ch) { + issuesPerChCopy[ch] = issuesPerCh[ch]; + o2IssuesPerChCopy[ch] = o2IssuesPerCh[ch]; + } + + // make sure that all alias keys in the map o2IssuesPerChCopy are also in issuesPerChCopy in order to + // simplify the loop over all issues from both algorithms and fix the order in which they are printed + for (int ch = 0; ch < 10; ++ch) { + for (const auto& [alias, o2Issues] : o2IssuesPerChCopy[ch]) { + if (!o2Issues.empty()) { + issuesPerChCopy[ch].try_emplace(alias, ISSUELIST{}); + } + } + } + + auto printHeader = [](std::string alias) { + auto de = getDE(alias); + if (de.empty()) { + printf("Problem found for %s:\n", alias.c_str()); + } else { + printf("Problem found for %s (%s):\n", alias.c_str(), de.c_str()); + } + }; + + const auto format = fmt::format("%llu - %llu: %s (duration = %s, min = {} V, mean = {} V) --> run(s) %s\n", + hvlvFormat.c_str(), hvlvFormat.c_str()); + + auto printIssue = [&format](ISSUE issue, std::string color) { + const auto& [tStart, tStop, min, mean, runs] = issue; + printf("%s", color.c_str()); + printf(format.c_str(), tStart, tStop, + getTime(tStart).c_str(), getDuration(tStart, tStop).c_str(), min, mean, runs.c_str()); + printf("\e[0m"); + }; + + if (scanHV) { + printf("\n------ list of issues from \e[0;31mthis macro only\e[0m, \e[0;35mO2 only\e[0m, or \e[0;32mboth\e[0m ------\n"); + } else { + printf("\n------ list of issues ------\n"); + } + + bool foundIssues = false; + + for (int ch = 0; ch < 10; ++ch) { + for (const auto& [alias, issues] : issuesPerChCopy[ch]) { + + auto& o2Issues = o2IssuesPerChCopy[ch][alias]; + + if (!issues.empty() || !o2Issues.empty()) { + + foundIssues = true; + printHeader(alias); + + // print all issues found by this macro + for (const auto& issue : issues) { + // change color if the issue is not found by the O2 algorithm (only for HV) + std::string color = (scanHV && !eraseIssue(issue, o2Issues)) ? "\e[0;31m" : "\e[0;32m"; + printIssue(issue, color); + } + + // print other issues found by the O2 algorithm + for (const auto& issue : o2Issues) { + printIssue(issue, "\e[0;35m"); + } + + printf("----------------------------\n"); + } + } + } + + if (!foundIssues) { + printf("----------------------------\n"); + } +} + +//---------------------------------------------------------------------------- +int main(int argc, char** argv) +{ + /// scan HV or LV CCDB objects looking for issues + + std::string runList = ""; + std::string what = ""; + std::string config = ""; + uint64_t minDuration = 0; + int warningLevel = 1; + int printLevel = 1; + std::string outFileName = ""; + + po::options_description usage("Usage"); + // clang-format off + usage.add_options() + ("help,h", "produce help message") + ("runs,r",po::value(&runList)->default_value(""),"run(s) to scan (comma separated list of runs or ASCII file with one run per line)") + ("channels,c",po::value(&what)->default_value(""),R"(channel(s) to scan ("HV" or "LV" or comma separated list of (part of) DCS aliases))") + ("configKeyValues",po::value(&config)->default_value(""),"Semicolon separated key=value strings to change HV thresholds") + ("duration,d",po::value(&minDuration)->default_value(0),"minimum duration (ms) of HV/LV issues to consider") + ("warning,w",po::value(&warningLevel)->default_value(1),"warning level (0, 1 or 2)") + ("print,p",po::value(&printLevel)->default_value(1),"print level (0, 1, 2 or 3)") + ("output,o",po::value(&outFileName)->default_value("scan.root"),"output root file name") + ; + // clang-format on + + po::options_description cmdline; + cmdline.add(usage); + + po::variables_map vm; + po::store(po::command_line_parser(argc, argv).options(cmdline).run(), vm); + + if (vm.count("help")) { + std::cout << "This program scans HV or LV channels looking for issues\n"; + std::cout << usage << "\n"; + return 2; + } + + try { + po::notify(vm); + } catch (const po::error& e) { + std::cout << "error: " << e.what() << "\n"; + exit(1); + } + + if (runList.empty()) { + printf("error: you must provide run(s) to scan\n"); + exit(1); + } + + if (what.empty()) { + printf("error: you must provide channel(s) to scan\n"); + exit(1); + } + + // setup printout and display + const double hvRange[2] = {-10., 1700.}; + const double lvRange[3] = {-1., 4., 8.}; // min, max FeeAnalog/FeeDigital, max Solar + const std::string hvFormat = "%7.2f"; + const std::string lvFormat = "%4.2f"; + gStyle->SetPalette(kVisibleSpectrum); + + // setup algorithms searching for HV issues + conf::ConfigurableParam::updateFromString(config); + conf::ConfigurableParam::setValue("MCHStatusMap.hvMinDuration", std::to_string(minDuration)); + conf::ConfigurableParam::setValue("MCHStatusMap.timeMargin", "0"); // must be 0 to compare O2 with this scan + + // determine what is scanned + std::string path{}; + bool scanHV = false; + bool scanAll = false; + std::set aliases{}; + scanWhat(what, path, scanHV, scanAll, aliases); + + ccdb::CcdbApi api; + api.init("http://alice-ccdb.cern.ch"); + + // get the SOR/EOR of every runs from the list, ordered in run number + auto runBoundaries = getRunBoundaries(api, runList); + if (printLevel > 0) { + printRunBoundaries(runBoundaries); + } + checkRunBoundaries(runBoundaries); + + // extract the time boundaries for each HV/LV file in the full time range + auto dpBoundaries = getDPBoundaries(api, path.c_str(), runBoundaries.begin()->second.first, + runBoundaries.rbegin()->second.second); + if (printLevel > 0) { + printDPBoundaries(dpBoundaries, scanHV); + } + checkDPBoundaries(dpBoundaries, scanHV, runBoundaries.begin()->second.first, + runBoundaries.rbegin()->second.second); + + // loop over the HV/LV files, fill the lists of data points per chamber and find issues using O2 algorithm + DPMAP2 dpsMapsPerCh[10]; + mch::HVStatusCreator hvStatusCreator{}; + ISSUEMAP o2issuesPerCh[10]; + std::map metadata; + for (auto boundaries : dpBoundaries) { + + auto* dpMap = api.retrieveFromTFileAny(path.c_str(), metadata, boundaries.first); + + // fill the lists of data points per chamber for requested aliases + for (const auto& [dpid, dps] : *dpMap) { + std::string alias(dpid.get_alias()); + if (!mch::dcs::isValid(alias)) { + printf("error: invalid DCS alias: %s\n", alias.c_str()); + exit(1); + } + if ((scanAll || containsAKey(alias, aliases)) && (!scanHV || alias.find(".iMon") == alias.npos)) { + int chamber = mch::dcs::toInt(mch::dcs::aliasToChamber(alias)); + fillDataPoints(dps, dpsMapsPerCh[chamber][alias], boundaries.first, boundaries.second, warningLevel); + } + } + + // find issues for requested aliases using O2 algorithm (only for HV) + if (scanHV) { + hvStatusCreator.findBadHVs(*dpMap); + for (const auto& [alias, issues] : hvStatusCreator.getBadHVs()) { + if (scanAll || containsAKey(alias, aliases)) { + int chamber = mch::dcs::toInt(mch::dcs::aliasToChamber(alias)); + fillO2Issues(issues, o2issuesPerCh[chamber][alias], boundaries.first, boundaries.second); + } + } + } + } + if (printLevel > 1) { + printf("\nall data points:"); + printDataPoints(dpsMapsPerCh, scanHV ? hvFormat : lvFormat, printLevel > 2); + } + + // select the data points in the time range + selectDataPoints(dpsMapsPerCh, runBoundaries.begin()->second.first, runBoundaries.rbegin()->second.second); + if (printLevel > 1) { + printf("\ndata points in the time range covered by runs:"); + printDataPoints(dpsMapsPerCh, scanHV ? hvFormat : lvFormat, printLevel > 2); + } + + // create and fill the graphs, and find HV/LV issues + ISSUEMAP issuesPerCh[10]; + TMultiGraph* mg[10]; + std::set limits; + for (int ch = 0; ch < 10; ++ch) { + mg[ch] = new TMultiGraph; + mg[ch]->SetNameTitle(fmt::format("ch{}", ch + 1).c_str(), + fmt::format("chamber {};time;{} (V)", ch + 1, scanHV ? "HV" : "LV").c_str()); + for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { + mg[ch]->Add(mapToGraph(alias, dps), "lp"); + auto limit = scanHV ? mch::StatusMapCreatorParam::Instance().hvLimits[ch] : getLVLimit(alias); + limits.emplace(limit); + findIssues(dps, limit, issuesPerCh[ch][alias]); + } + } + + // select HV/LV issues of a minimum duration (ms) occurring during runs + selectIssues(issuesPerCh, runBoundaries, minDuration); + selectO2Issues(o2issuesPerCh, runBoundaries); + printIssues(issuesPerCh, o2issuesPerCh, scanHV, scanHV ? hvFormat : lvFormat); + + // display + TCanvas* c[10]; + for (int ch = 0; ch < 10; ++ch) { + if (scanHV) { + c[ch] = drawDataPoints(mg[ch], hvRange[0], hvRange[1]); + drawLimit(mch::StatusMapCreatorParam::Instance().hvLimits[ch], c[ch]); + } else { + auto lvMax = (what.find("LV") != what.npos || what.find("Sol") != what.npos) ? lvRange[2] : lvRange[1]; + c[ch] = drawDataPoints(mg[ch], lvRange[0], lvMax); + for (auto limit : limits) { + drawLimit(limit, c[ch]); + } + } + drawRunBoudaries(runBoundaries, c[ch]); + } + + // save display + TFile dataFile(outFileName.c_str(), "recreate"); + for (int ch = 0; ch < 10; ++ch) { + c[ch]->Write(); + } + dataFile.Close(); + + return 0; +} From 8e0c5cf67c58f931d053d5dd214db1eaaa401110 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 21 Feb 2025 13:57:44 +0100 Subject: [PATCH 0158/1914] DPL Analysis: add test for identification concepts --- Framework/Core/CMakeLists.txt | 1 + Framework/Core/include/Framework/ASoA.h | 7 +- Framework/Core/test/test_Concepts.cxx | 164 ++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 Framework/Core/test/test_Concepts.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index c006a4135557b..7202e2299b7cc 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -200,6 +200,7 @@ add_executable(o2-test-framework-core test/test_CompletionPolicy.cxx test/test_ComputingResourceHelpers.cxx test/test_ComputingQuotaEvaluator.cxx + test/test_Concepts.cxx test/test_ControlServiceHelpers.cxx test/test_ConfigParamStore.cxx test/test_ConfigParamRegistry.cxx diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index cb2ff11a8e901..f21decd0d5c45 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -175,6 +175,8 @@ consteval auto intersectOriginals() namespace o2::soa { +struct Binding; + template concept not_void = requires { !std::same_as; }; @@ -192,7 +194,10 @@ template concept is_self_index_column = not_void && std::same_as; template -concept is_index_column = !is_self_index_column && (requires { &C::getId; } || requires { &C::getIds; }); +concept is_index_column = !is_self_index_column && requires(C c, o2::soa::Binding b) { + { c.setCurrentRaw(b) } -> std::same_as; + requires std::same_as; +}; template using is_external_index_t = typename std::conditional_t, std::true_type, std::false_type>; diff --git a/Framework/Core/test/test_Concepts.cxx b/Framework/Core/test/test_Concepts.cxx new file mode 100644 index 0000000000000..00ad931828b44 --- /dev/null +++ b/Framework/Core/test/test_Concepts.cxx @@ -0,0 +1,164 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include +#include "Framework/ASoA.h" +#include "Framework/AnalysisDataModel.h" +#include "Framework/Expressions.h" +#include "Framework/AnalysisHelpers.h" +#include "Framework/AnalysisTask.h" +#include "Framework/Condition.h" +#include "SimulationDataFormat/O2DatabasePDG.h" + +#include + +using namespace o2::framework; +using namespace o2::soa; +using namespace o2; + +struct P { + void process1(aod::Collisions const&) + { + } + + PROCESS_SWITCH(P, process1, "", true); +}; + +TEST_CASE("IdentificationConcepts") +{ + // ASoA + int i; + REQUIRE(not_void); + + REQUIRE(is_persistent_column); + + REQUIRE(is_self_index_column); + + REQUIRE(!is_index_column); + REQUIRE(is_index_column); + REQUIRE(is_index_column); + + REQUIRE(o2::aod::is_aod_hash>); + REQUIRE(o2::aod::is_origin_hash>); + + REQUIRE(has_parent_t); + + REQUIRE(is_metadata); + + REQUIRE(is_metadata_trait>>); + + REQUIRE(has_metadata>>); + + REQUIRE(has_extension>::metadata>); + + REQUIRE(is_spawnable_column); + + REQUIRE(is_indexing_column>); + + REQUIRE(is_dynamic_column>); + + REQUIRE(is_marker_column>); + + REQUIRE(is_column); + REQUIRE(is_column>); + REQUIRE(is_column>); + REQUIRE(is_column>); + + REQUIRE(is_table); + + REQUIRE(is_iterator); + + REQUIRE(with_originals); + + REQUIRE(with_sources>::metadata>); + + REQUIRE(with_base_table); + + REQUIRE(is_index_table); + + Preslice ps = o2::aod::track::collisionId; + REQUIRE(is_preslice); + + REQUIRE(has_filtered_policy::iterator>); + + REQUIRE(is_filtered_iterator::iterator>); + + REQUIRE(is_filtered_table>); + + REQUIRE(is_filtered::iterator>); + REQUIRE(is_filtered>); + + REQUIRE(is_not_filtered_table); + + REQUIRE(is_join); + + auto tl = []() -> SmallGroups { return {std::vector>{}, SelectionVector{}, 0}; }; + REQUIRE(is_smallgroups); + + // AnalysisHelpers + REQUIRE(is_producable); + + Produces prod; + REQUIRE(is_produces); + + struct : ProducesGroup { + Produces p; + } prodg; + REQUIRE(is_produces_group); + + REQUIRE(is_spawnable); + + Spawns spw; + REQUIRE(is_spawns); + + Builds bld; + REQUIRE(is_builds); + + OutputObj oo{"test"}; + REQUIRE(is_outputobj); + + Service srv; + REQUIRE(is_service); + + Partition part = o2::aod::track::collisionId >= 0; + REQUIRE(is_partition); + + // AnalysisTask + Enumeration<0, 1> en; + REQUIRE(is_enumeration); + + // Condition + Condition c{""}; + REQUIRE(is_condition); + + struct : ConditionGroup { + Condition c{""}; + } cg; + REQUIRE(is_condition_group); + + // Configurable + Configurable cc{"", 1, ""}; + REQUIRE(is_configurable); + + ConfigurableAxis ca{"", {0, 1, 2, 3}, ""}; + REQUIRE(is_configurable_axis); + + REQUIRE(is_process_configurable); + + struct : ConfigurableGroup { + Configurable c{"", 1, ""}; + } ccg; + REQUIRE(is_configurable_group); + + // Expressions + expressions::Filter f = o2::aod::track::pt > 1.0f; + REQUIRE(expressions::is_filter); +} From 47ce5171abaa392163d944cd0423cc3d1fbe2144 Mon Sep 17 00:00:00 2001 From: wiechula Date: Fri, 7 Mar 2025 13:25:15 +0100 Subject: [PATCH 0159/1914] GPU: remove unnecessary check --- GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index a63886b93ccf9..0e1cde343135e 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -298,8 +298,8 @@ bool GPUChainTracking::ValidateSettings() GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } - if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().delayedOutput || GetProcessingSettings().runMC)) { - GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with delayedOutput for GPU or with MC labels"); + if (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0 && (GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding || GetProcessingSettings().runMC)) { + GPUError("tpcApplyClusterFilterOnCPU cannot be used with GPU clusterization or with MC labels"); return false; } if (GetRecoSteps() & RecoStep::TRDTracking) { From b0476bbaaa58005dbd0f4c136c06dd8f0a397e85 Mon Sep 17 00:00:00 2001 From: Robert Muenzer Date: Wed, 5 Mar 2025 13:33:00 +0100 Subject: [PATCH 0160/1914] Decrease SAC writeout rate by 10 --- prodtests/full-system-test/aggregator-workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prodtests/full-system-test/aggregator-workflow.sh b/prodtests/full-system-test/aggregator-workflow.sh index 23336cafffab8..90abe9786f89e 100755 --- a/prodtests/full-system-test/aggregator-workflow.sh +++ b/prodtests/full-system-test/aggregator-workflow.sh @@ -298,7 +298,7 @@ crus="0-359" # to be used with $AGGREGATOR_TASKS == TPC_IDCBOTH_SAC or ALL lanesFactorize=${O2_TPC_IDC_FACTORIZE_NLANES:-12} threadFactorize=${O2_TPC_IDC_FACTORIZE_NTHREADS:-16} nTFs=$((1000 * 128 / ${NHBPERTF})) -nTFs_SAC=$((1000 * 128 / ${NHBPERTF})) +nTFs_SAC=$((10000 * 128 / ${NHBPERTF})) nBuffer=$((100 * 128 / ${NHBPERTF})) IDC_DELTA="--disable-IDCDelta true" # off by default # deltas are on by default; you need to request explicitly to switch them off; From adea3ba09468dfddb1f579e2172d3885bc97b92f Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Tue, 18 Feb 2025 14:16:31 +0200 Subject: [PATCH 0161/1914] Update CODEOWNERS for FIT Sahil is the new FIT software coordinator. Maciej and Arvind are no longer in the team. --- CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 92999185d6f31..3f6f4a9e42600 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -29,7 +29,7 @@ /DataFormats/Detectors/CPV @peressounko @kharlov /DataFormats/Detectors/CTP @lietava /DataFormats/Detectors/EMCAL @mfasDa @jokonig -/DataFormats/Detectors/FIT @jotwinow @afurs @andreasmolander @arvindkhuntia @mslupeck +/DataFormats/Detectors/FIT @jotwinow @afurs @andreasmolander @sahilupadhyaya92 /DataFormats/Detectors/FOCAL @maxrauch @mfasDa @iarsene @matthiasrichter /DataFormats/Detectors/GlobalTracking @shahor02 /DataFormats/Detectors/GlobalTrackingWorkflow @shahor02 @@ -59,7 +59,7 @@ /Detectors/Calibration @chiarazampolli @shahor02 /Detectors/CPV @peressounko @kharlov /Detectors/EMCAL @mfasDa @jokonig -/Detectors/FIT @jotwinow @afurs @andreasmolander @arvindkhuntia @mslupeck +/Detectors/FIT @jotwinow @afurs @andreasmolander @sahilupadhyaya92 /Detectors/FOCAL @maxrauch @mfasDa @iarsene @matthiasrichter /Detectors/Geometry @sawenzel @shahor02 /Detectors/GlobalTracking @shahor02 From 699feecb5261ff63bee7358c53f3505fd789c363 Mon Sep 17 00:00:00 2001 From: pillot Date: Fri, 7 Mar 2025 11:48:32 +0100 Subject: [PATCH 0162/1914] add protection against missing object --- .../MCH/Conditions/src/bad-channels-ccdb.cxx | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx index d355a209329ca..04614d8ccd34d 100644 --- a/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/bad-channels-ccdb.cxx @@ -100,7 +100,12 @@ std::set listTSWhenBadChannelsChange(const std::string ccdbUrl, const std::string currentETag{}; for (auto itTS = tsChanges.begin(); itTS != tsChanges.end();) { auto headers = api.retrieveHeaders(source, metadata, *itTS); - if (headers["ETag"] == currentETag) { + if (headers["ETag"].empty()) { + std::cout << "- Warning: missing file" << std::endl; + auto validUntil = (std::next(itTS) != tsChanges.end()) ? *std::next(itTS) : endTimestamp; + std::cout << fmt::format(" validity range: {} - {}\n", *itTS, validUntil); + ++itTS; + } else if (headers["ETag"] == currentETag) { itTS = tsChanges.erase(itTS); } else { if (verbose) { @@ -129,6 +134,19 @@ BadChannelsVector queryBadChannels(const std::string ccdbUrl, std::map metadata; auto source = ccdbPath(badChannelType); auto* badChannels = api.retrieveFromTFileAny(source, metadata, timestamp); + if (badChannels == nullptr) { + std::cout << "do you want to proceed anyway? [y/n] "; + std::string response{}; + while (true) { + std::cin >> response; + if (response == "y") { + std::cout << "number of bad channels = 0" << std::endl; + return {}; + } else if (response == "n") { + exit(3); + } + } + } std::cout << "number of bad channels = " << badChannels->size() << std::endl; if (verbose) { for (const auto& badChannel : *badChannels) { From f34afc2da0528888b44d015a0306ef47d62e41b2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:21:54 +0100 Subject: [PATCH 0163/1914] GPU: Bump required LLVM version for OpenCL --- dependencies/FindO2GPU.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index c5d53d6359ada..57c820fbe86b1 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -175,7 +175,7 @@ if(ENABLE_OPENCL) if(Clang_FOUND AND LLVM_FOUND AND NOT LLVM_CLANG STREQUAL "LLVM_CLANG-NOTFOUND" - AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0) + AND LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 18.0) set(OPENCL_COMPATIBLE_CLANG_FOUND ON) endif() if(OpenCL_VERSION_STRING VERSION_GREATER_EQUAL 2.2 From 9e2d4c5f14311d41e80325fc373fad916b66f3c4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 7 Mar 2025 17:22:01 +0100 Subject: [PATCH 0164/1914] GPU: Simplify and cleanup code --- .../Global/GPUChainTrackingSectorTracker.cxx | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index df7c513fc1120..dd7fe285265ad 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -93,6 +93,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } bool streamInit[GPUCA_MAX_STREAMS] = {false}; + int32_t streamInitAndOccMap = mRec->NStreams() - 1; + if (doGPU) { for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); @@ -113,18 +115,12 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() return 2; } - WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + WriteToConstantMemory(RecoStep::TPCSectorTracking, (char*)processors()->tpcTrackers - (char*)processors(), processorsShadow()->tpcTrackers, sizeof(GPUTPCTracker) * NSECTORS, streamInitAndOccMap, &mEvents->init); - for (int32_t i = 0; i < mRec->NStreams() - 1; i++) { - streamInit[i] = false; - } - streamInit[mRec->NStreams() - 1] = true; - } - if (GPUDebug("Initialization (1)", 0)) { - return (2); + std::fill(streamInit, streamInit + mRec->NStreams(), false); + streamInit[streamInitAndOccMap] = true; } - int32_t streamOccMap = mRec->NStreams() - 1; if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]); } @@ -134,21 +130,21 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } uint32_t* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap; auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU); - runKernel(GetGridAutoStep(streamOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); - runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap), ptrTmp); - runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), ptrTmp, ptr + 2); + runKernel(GetGridAutoStep(streamInitAndOccMap, RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param())); + runKernel(GetGridBlk(GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap), ptrTmp); + runKernel(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamInitAndOccMap), ptrTmp, ptr + 2); mRec->ReturnVolatileMemory(); mInputsHost->mTPCClusterOccupancyMap[1] = param().rec.tpc.occupancyMapTimeBins * 0x10000 + param().rec.tpc.occupancyMapTimeBinsAverage; if (doGPU) { - GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamOccMap, false, &mEvents->init); + GPUMemCpy(RecoStep::TPCSectorTracking, mInputsHost->mTPCClusterOccupancyMap + 2, mInputsShadow->mTPCClusterOccupancyMap + 2, sizeof(*ptr) * GPUTPCClusterOccupancyMapBin::getNBins(mRec->GetParam()), streamInitAndOccMap, false, &mEvents->init); } else { - TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init); + TransferMemoryResourceLinkToGPU(RecoStep::TPCSectorTracking, mInputsHost->mResourceOccupancyMap, streamInitAndOccMap, &mEvents->init); } } if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; @@ -190,19 +186,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - // Initialize temporary memory where needed - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Copying Sector Data to GPU and initializing temporary memory"); - } runKernel(GetGridAutoStep(useStream, RecoStep::TPCSectorTracking), trkShadow.Data().HitWeights(), trkShadow.Data().NumberOfHitsPlusAlign() * sizeof(*trkShadow.Data().HitWeights())); - - if (!doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory - } - if (GPUDebug("Initialization (3)", useStream)) { - throw std::runtime_error("memcpy failure"); - } - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); streamInit[useStream] = true; From 5756ea1683c23dddb348ef70ac307812d47be548 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 09:23:58 +0100 Subject: [PATCH 0165/1914] DPL: cleanup state switching - Remove duplicate helper - Add signposts to mark streaming states transitions - Notify driver --- Framework/Core/src/DataProcessingDevice.cxx | 44 ++++++++++----------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index 8a3fbbcf5b2f1..e8676995772e6 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -141,6 +141,17 @@ void on_transition_requested_expired(uv_timer_t* handle) state.transitionHandling = TransitionHandlingState::Expired; } +auto switchState(ServiceRegistryRef& ref, StreamingState newState) -> void +{ + auto& state = ref.get(); + auto& context = ref.get(); + O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); + O2_SIGNPOST_END(device, dpid, "state", "End of processing state %d", (int)state.streaming); + O2_SIGNPOST_START(device, dpid, "state", "Starting processing state %d", (int)newState); + state.streaming = newState; + ref.get().notifyStreamingState(state.streaming); +}; + void on_data_processing_expired(uv_timer_t* handle) { auto* ref = (ServiceRegistryRef*)handle->data; @@ -1236,7 +1247,7 @@ void DataProcessingDevice::PreRun() O2_SIGNPOST_ID_FROM_POINTER(cid, device, state.loop); O2_SIGNPOST_START(device, cid, "PreRun", "Entering PreRun callback."); state.quitRequested = false; - state.streaming = StreamingState::Streaming; + switchState(ref, StreamingState::Streaming); state.allowedProcessing = DeviceState::Any; for (auto& info : state.inputChannelInfos) { if (info.state != InputChannelState::Pull) { @@ -1365,10 +1376,10 @@ void DataProcessingDevice::Run() // Check if we only have timers auto& spec = ref.get(); if (hasOnlyTimers(spec)) { - state.streaming = StreamingState::EndOfStreaming; + switchState(ref, StreamingState::EndOfStreaming); } - // If this is a source device, dataTransitionTimeout and dataProcessingTimeout are effectively + // If this is a source device, exitTransitionTimeout and dataProcessingTimeout are effectively // the same (because source devices are not allowed to produce any calibration). // should be the same. if (hasOnlyGenerated(spec) && deviceContext.dataProcessingTimeout > 0) { @@ -1385,7 +1396,8 @@ void DataProcessingDevice::Run() state.transitionHandling = TransitionHandlingState::Requested; ref.get().call(ServiceRegistryRef{ref}); uv_update_time(state.loop); - O2_SIGNPOST_EVENT_EMIT(calibration, lid, "timer_setup", "Starting %d s timer for exitTransitionTimeout.", deviceContext.exitTransitionTimeout); + O2_SIGNPOST_EVENT_EMIT(calibration, lid, "timer_setup", "Starting %d s timer for exitTransitionTimeout.", + deviceContext.exitTransitionTimeout); uv_timer_start(deviceContext.gracePeriodTimer, on_transition_requested_expired, deviceContext.exitTransitionTimeout * 1000, 0); if (mProcessingPolicies.termination == TerminationPolicy::QUIT) { O2_SIGNPOST_EVENT_EMIT_INFO(device, lid, "run_loop", "New state requested. Waiting for %d seconds before quitting.", (int)deviceContext.exitTransitionTimeout); @@ -1728,15 +1740,6 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) { auto& context = ref.get(); O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); - auto switchState = [ref](StreamingState newState) { - auto& state = ref.get(); - auto& context = ref.get(); - O2_SIGNPOST_ID_FROM_POINTER(dpid, device, &context); - O2_SIGNPOST_END(device, dpid, "state", "End of processing state %d", (int)state.streaming); - O2_SIGNPOST_START(device, dpid, "state", "Starting processing state %d", (int)newState); - state.streaming = newState; - ref.get().notifyStreamingState(state.streaming); - }; auto& state = ref.get(); auto& spec = ref.get(); @@ -1772,7 +1775,7 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) // dependent on the callback, not something which is controlled by the // framework itself. if (context.allDone == true && state.streaming == StreamingState::Streaming) { - switchState(StreamingState::EndOfStreaming); + switchState(ref, StreamingState::EndOfStreaming); state.lastActiveDataProcessor = &context; } @@ -1818,7 +1821,7 @@ void DataProcessingDevice::doRun(ServiceRegistryRef ref) } // This is needed because the transport is deleted before the device. relayer.clear(); - switchState(StreamingState::Idle); + switchState(ref, StreamingState::Idle); // In case we should process, note the data processor responsible for it if (shouldProcess) { state.lastActiveDataProcessor = &context; @@ -2328,13 +2331,6 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v } }; - auto switchState = [ref](StreamingState newState) { - auto& control = ref.get(); - auto& state = ref.get(); - state.streaming = newState; - control.notifyStreamingState(state.streaming); - }; - ref.get().getReadyToProcess(completed); if (completed.empty() == true) { LOGP(debug, "No computations available for dispatching."); @@ -2510,7 +2506,7 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v O2_SIGNPOST_EVENT_EMIT(device, pcid, "device", "Skipping processing because we are discarding."); } else { O2_SIGNPOST_EVENT_EMIT(device, pcid, "device", "No processing callback provided. Switching to %{public}s.", "Idle"); - state.streaming = StreamingState::Idle; + switchState(ref, StreamingState::Idle); } if (shouldProcess(action)) { auto& timingInfo = ref.get(); @@ -2598,7 +2594,7 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v for (auto& channel : spec.outputChannels) { DataProcessingHelpers::sendEndOfStream(ref, channel); } - switchState(StreamingState::Idle); + switchState(ref, StreamingState::Idle); } return true; From 80faf100ebd930094d01ea63e0ffdbd3d64b2c67 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 09:28:46 +0100 Subject: [PATCH 0166/1914] DPL: correctly handle data-processing-timeouts in sources --- Framework/Core/src/DataProcessingDevice.cxx | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index e8676995772e6..7f42805cfdb1e 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -156,16 +156,19 @@ void on_data_processing_expired(uv_timer_t* handle) { auto* ref = (ServiceRegistryRef*)handle->data; auto& state = ref->get(); + auto& spec = ref->get(); state.loopReason |= DeviceState::TIMER_EXPIRED; // Check if this is a source device O2_SIGNPOST_ID_FROM_POINTER(cid, device, handle); - // Source devices should never end up in this callback, since the exitTransitionTimeout should - // be reset to the dataProcessingTimeout and the timers cohalesced. - assert(hasOnlyGenerated(ref->get()) == false); - O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Only calibrations from this point onwards."); - state.allowedProcessing = DeviceState::CalibrationOnly; + if (hasOnlyGenerated(spec)) { + O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Switching to EndOfStreaming."); + switchState(*ref, StreamingState::EndOfStreaming); + } else { + O2_SIGNPOST_EVENT_EMIT_INFO(calibration, cid, "callback", "Grace period for data processing expired. Only calibrations from this point onwards."); + state.allowedProcessing = DeviceState::CalibrationOnly; + } } void on_communication_requested(uv_async_t* s) @@ -1379,13 +1382,6 @@ void DataProcessingDevice::Run() switchState(ref, StreamingState::EndOfStreaming); } - // If this is a source device, exitTransitionTimeout and dataProcessingTimeout are effectively - // the same (because source devices are not allowed to produce any calibration). - // should be the same. - if (hasOnlyGenerated(spec) && deviceContext.dataProcessingTimeout > 0) { - deviceContext.exitTransitionTimeout = deviceContext.dataProcessingTimeout; - } - // We do not do anything in particular if the data processing timeout would go past the exitTransitionTimeout if (deviceContext.dataProcessingTimeout > 0 && deviceContext.dataProcessingTimeout < deviceContext.exitTransitionTimeout) { uv_update_time(state.loop); From 3961e74342b312adddee96dcdfa2cf8eb8928018 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Feb 2025 10:36:24 +0100 Subject: [PATCH 0167/1914] DPL: drop messages explicitly Somehow if they remain in the queue the confuse the oldest possible timeframe mechanism. --- Framework/Core/src/DataRelayer.cxx | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index c2ae459aace38..385d9a6c50c4a 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -17,6 +17,7 @@ #include "Framework/DataDescriptorMatcher.h" #include "Framework/DataSpecUtils.h" #include "Framework/DataProcessingHeader.h" +#include "Framework/DataProcessingContext.h" #include "Framework/DataRef.h" #include "Framework/InputRecord.h" #include "Framework/InputSpan.h" @@ -46,7 +47,6 @@ #include #include #include -#include #include using namespace o2::framework::data_matcher; @@ -55,6 +55,8 @@ using DataProcessingHeader = o2::framework::DataProcessingHeader; using Verbosity = o2::monitoring::Verbosity; O2_DECLARE_DYNAMIC_LOG(data_relayer); +// Stream which keeps track of the calibration lifetime logic +O2_DECLARE_DYNAMIC_LOG(calibration); namespace o2::framework { @@ -480,6 +482,13 @@ DataRelayer::RelayChoice // We are in calibration mode and the data does not have the calibration bit set. // We do not store it. if (services.get().allowedProcessing == DeviceState::ProcessingType::CalibrationOnly && !isCalibrationData(messages[mi])) { + O2_SIGNPOST_ID_FROM_POINTER(cid, calibration, &services.get()); + O2_SIGNPOST_EVENT_EMIT(calibration, cid, "calibration", + "Dropping incoming %zu messages because they are data processing.", nPayloads); + // Actually dropping messages. + for (size_t i = mi; i < mi + nPayloads + 1; i++) { + auto discard = std::move(messages[i]); + } mi += nPayloads; continue; } From 0cdfe91a3e879d32ab246e83556ed852d84911cb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:11:47 +0100 Subject: [PATCH 0168/1914] DPL: add dumping of memory profile in strategic location --- .../AnalysisSupport/src/DataInputDirector.cxx | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 981ca5254980d..2c51360cd9923 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -15,6 +15,7 @@ #include "Framework/RootArrowFilesystem.h" #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/Output.h" +#include "Framework/Signpost.h" #include "Headers/DataHeader.h" #include "Framework/TableTreeHelpers.h" #include "Monitoring/Tags.h" @@ -41,6 +42,9 @@ #include #endif +#include +O2_DECLARE_DYNAMIC_LOG(reader_memory_dump); + namespace o2::framework { using namespace rapidjson; @@ -458,6 +462,17 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh mIOTime += (uv_hrtime() - ioStart); + O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { + void (*dump_)(const char*); + if (void* sym = dlsym(nullptr, "igprof_dump_now")) { + dump_ = __extension__(void (*)(const char*)) sym; + if (dump_) { + std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); + dump_(filename.c_str()); + } + } + }); + return true; } From f6c7f18db2818535564090df92cbd0a84828f892 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 12 Feb 2025 16:31:38 +0100 Subject: [PATCH 0169/1914] Remove CodeCov badge --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3dc6061a39351..8715613a0eb5c 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,6 @@ -[![codecov](https://codecov.io/gh/AliceO2Group/AliceO2/branch/dev/graph/badge.svg)](https://codecov.io/gh/AliceO2Group/AliceO2/branches/dev) [![JIRA](https://img.shields.io/badge/JIRA-Report%20issue-blue.svg)](https://alice.its.cern.ch/jira/secure/CreateIssue.jspa?pid=11201&issuetype=1) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1493334.svg)](https://doi.org/10.5281/zenodo.1493334) From 88d8400ba8398f7d06d531bbad888e25898675a1 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 12 Feb 2025 16:35:21 +0100 Subject: [PATCH 0170/1914] Fix Format --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8715613a0eb5c..5bdbc4a801e5e 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,8 @@ Rules and instructions are available in the repository ### Enable C++ compiler warnings -Currently O2 is built with minimal compiler warnings enabled. This is going to change in the near future. In the transition period, developers have to manualy enable warnings by building O2 with `ALIBUILD_O2_WARNINGS` environment variable set e.g. using the `-e` option of `alibuild` e.g: -```bash +Currently O2 is built with minimal compiler warnings enabled. This is going to change in the near future. In the transition period, developers have to manualy enable warnings by building O2 with `ALIBUILD_O2_WARNINGS` environment variable set e.g. using the `-e` option of `alibuild` e.g: +```bash aliBuild build --debug -e ALIBUILD_O2_WARNINGS=1 --defaults o2 O2 -``` +``` A helper script that extracts warnings from the build log skipping duplicates is available [here](https://github.com/AliceO2Group/AliceO2/blob/dev/scripts/filter-warnings.sh) From 8290f89678f78df47310aabb749f2b5157138a62 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 8 Mar 2025 20:52:27 +0100 Subject: [PATCH 0171/1914] GPU: Fix compile warning from shadowed variable, disentangle filter types --- .../Global/GPUChainTrackingCompression.cxx | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 1b08de21abd0f..57a759a58924e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,8 +201,8 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { - const bool runFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); - if (runFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool runFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); + if (runFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); } @@ -219,13 +219,13 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; + auto allocatorUse = runFullFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { GPUError("Error decompressing clusters"); return 1; } - if (runFiltering) { + if (runFullFiltering) { RunTPCClusterFilter(mClusterNativeAccess.get(), allocatorFinal, GetProcessingSettings().tpcApplyCFCutsAtDecoding); } decompressTimer.Stop(); @@ -245,7 +245,7 @@ int32_t GPUChainTracking::RunTPCDecompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR")); RecoStep myStep = RecoStep::TPCDecompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression; - bool runFiltering = param().tpcCutTimeBin > 0; + bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; GPUTPCDecompression& Decompressor = processors()->tpcDecompressor; GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor; const auto& threadContext = GetThreadContext(); @@ -325,7 +325,7 @@ int32_t GPUChainTracking::RunTPCDecompression() if (decodedAttachedClusters != cmprClsHost.nAttachedClusters) { GPUWarning("%u / %u clusters failed track model decoding (%f %%)", cmprClsHost.nAttachedClusters - decodedAttachedClusters, cmprClsHost.nAttachedClusters, 100.f * (float)(cmprClsHost.nAttachedClusters - decodedAttachedClusters) / (float)cmprClsHost.nAttachedClusters); } - if (runFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context + if (runTimeBinCutFiltering) { // If filtering, allocate a temporary buffer and cluster native access in decompressor context Decompressor.mNClusterNativeBeforeFiltering = DecompressorShadow.mNClusterNativeBeforeFiltering = decodedAttachedClusters + cmprClsHost.nUnattachedClusters; AllocateRegisteredMemory(Decompressor.mResourceTmpBufferBeforeFiltering); AllocateRegisteredMemory(Decompressor.mResourceClusterNativeAccess); @@ -362,13 +362,13 @@ int32_t GPUChainTracking::RunTPCDecompression() int32_t iStream = (iSector / batchSize) % mRec->NStreams(); runKernel({GetGridAuto(iStream), krnlRunRangeNone, {nullptr, &mEvents->single}}, iSector, batchSize); uint32_t copySize = std::accumulate(mClusterNativeAccess->nClustersSector + iSector, mClusterNativeAccess->nClustersSector + iSector + batchSize, 0u); - if (!runFiltering) { + if (!runTimeBinCutFiltering) { GPUMemCpy(RecoStep::TPCDecompression, mInputsHost->mPclusterNativeOutput + mClusterNativeAccess->clusterOffset[iSector][0], DecompressorShadow.mNativeClustersBuffer + mClusterNativeAccess->clusterOffset[iSector][0], sizeof(Decompressor.mNativeClustersBuffer[0]) * copySize, iStream, false); } } SynchronizeGPU(); - if (runFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly + if (runTimeBinCutFiltering) { // If filtering is applied, count how many clusters will remain after filtering and allocate final buffers accordingly AllocateRegisteredMemory(Decompressor.mResourceNClusterPerSectorRow); WriteToConstantMemory(myStep, (char*)&processors()->tpcDecompressor - (char*)processors(), &DecompressorShadow, sizeof(DecompressorShadow), unattachedStream); runKernel({GetGridAutoStep(unattachedStream, RecoStep::TPCDecompression), krnlRunRangeNone}, DecompressorShadow.mNClusterPerSectorRow, NSECTORS * GPUCA_ROW_COUNT * sizeof(DecompressorShadow.mNClusterPerSectorRow[0])); From dd2d2aa66ec6b004806d190876392df908473e81 Mon Sep 17 00:00:00 2001 From: TrifleMichael <61475109+TrifleMichael@users.noreply.github.com> Date: Mon, 10 Mar 2025 11:19:57 +0100 Subject: [PATCH 0172/1914] CCDBApi: Fix CCDBDownloader redirect errors (#14029) * Fixing CcdbDownloader redirects This commit addresses: - Not following available redirects after receiving 4xx http code. - Not following all redirects provided via "Location" header. - Not following redirects after failing alien:/ or file:/ retrieval. - Improper fail-check in CcdbApi::loadLocalContentToMemory. - The headers holding etags and content-type from multiple locations. * Removing whitespaces --- CCDB/include/CCDB/CCDBDownloader.h | 8 +++-- CCDB/src/CCDBDownloader.cxx | 53 +++++++++++++++++++++--------- CCDB/src/CcdbApi.cxx | 37 ++++++++++++++++++--- 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/CCDB/include/CCDB/CCDBDownloader.h b/CCDB/include/CCDB/CCDBDownloader.h index 0bda186e308c6..6c057a537a096 100644 --- a/CCDB/include/CCDB/CCDBDownloader.h +++ b/CCDB/include/CCDB/CCDBDownloader.h @@ -47,6 +47,7 @@ struct HeaderObjectPair_t { typedef struct DownloaderRequestData { std::vector hosts; + std::vector locations; std::string path; long timestamp; HeaderObjectPair_t hoPair; @@ -231,12 +232,13 @@ class CCDBDownloader std::string prepareRedirectedURL(std::string address, std::string potentialHost) const; /** - * Returns a vector of possible content locations based on the redirect headers. + * Updates the locations vector with the the locations. * - * @param baseUrl Content path. * @param headerMap Map containing response headers. + * @param locations Location list to be updated. + * @param locIndex Index of the next locaiton to be tried. */ - std::vector getLocations(std::multimap* headerMap) const; + void updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const; std::string mUserAgentId = "CCDBDownloader"; /** diff --git a/CCDB/src/CCDBDownloader.cxx b/CCDB/src/CCDBDownloader.cxx index 3fca3c8cc2ae6..2f033a50b36e7 100644 --- a/CCDB/src/CCDBDownloader.cxx +++ b/CCDB/src/CCDBDownloader.cxx @@ -362,7 +362,7 @@ void CCDBDownloader::tryNewHost(PerformData* performData, CURL* easy_handle) { auto requestData = performData->requestData; std::string newUrl = requestData->hosts.at(performData->hostInd) + "/" + requestData->path + "/" + std::to_string(requestData->timestamp); - LOG(debug) << "Connecting to another host " << newUrl; + LOG(debug) << "Connecting to another host " << newUrl << "\n"; requestData->hoPair.header.clear(); curl_easy_setopt(easy_handle, CURLOPT_URL, newUrl.c_str()); mHandlesToBeAdded.push_back(easy_handle); @@ -374,9 +374,11 @@ void CCDBDownloader::getLocalContent(PerformData* performData, std::string& newL LOG(debug) << "Redirecting to local content " << newLocation << "\n"; if (requestData->localContentCallback(newLocation)) { contentRetrieved = true; + LOG(debug) << "Local content retrieved succesfully: " << newLocation << " n"; } else { // Prepare next redirect url newLocation = getNewLocation(performData, locations); + LOG(debug) << "Failed to retrieve local content: " << newLocation << "\n"; } } @@ -396,7 +398,7 @@ std::string CCDBDownloader::getNewLocation(PerformData* performData, std::vector void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLocation, CURL* easy_handle) { auto requestData = performData->requestData; - LOG(debug) << "Trying content location " << newLocation; + LOG(debug) << "Trying content location " << newLocation << "\n"; curl_easy_setopt(easy_handle, CURLOPT_URL, newLocation.c_str()); mHandlesToBeAdded.push_back(easy_handle); } @@ -404,7 +406,7 @@ void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLoca void CCDBDownloader::followRedirect(PerformData* performData, CURL* easy_handle, std::vector& locations, bool& rescheduled, bool& contentRetrieved) { std::string newLocation = getNewLocation(performData, locations); - if (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos) { + while (!contentRetrieved && (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos)) { getLocalContent(performData, newLocation, contentRetrieved, locations); } if (!contentRetrieved && newLocation != "") { @@ -508,8 +510,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) std::string currentHost = requestData->hosts[performData->hostInd]; std::string loggingMessage = prepareLogMessage(currentHost, requestData->userAgent, requestData->path, requestData->timestamp, requestData->headers, httpCode); - // Get alternative locations for the same host - auto locations = getLocations(&(requestData->hoPair.header)); + // Get new locations based on received headers + updateLocations(&(requestData->hoPair.header), &requestData->locations, &performData->locInd); // React to received http code if (200 <= httpCode && httpCode < 400) { @@ -517,8 +519,8 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) if (304 == httpCode) { LOGP(debug, "Object exists but I am not serving it since it's already in your possession"); contentRetrieved = true; - } else if (300 <= httpCode && httpCode < 400 && performData->locInd < locations.size()) { - followRedirect(performData, easy_handle, locations, rescheduled, contentRetrieved); + } else if (300 <= httpCode && httpCode < 400 && performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); } else if (200 <= httpCode && httpCode < 300) { contentRetrieved = true; // Can be overruled by following error check } @@ -531,8 +533,16 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode) contentRetrieved = false; } - // Check if content was retrieved, or scheduled to be retrieved - if (!rescheduled && !contentRetrieved && performData->locInd == locations.size()) { + // Check if content was retrieved or scheduled to be retrieved + if (!rescheduled && !contentRetrieved) { + // Current location failed without providing 3xx http code, try next redirect for the same host + if (performData->locInd < requestData->locations.size()) { + followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved); + } + } + + // Check again because content might have been retrieved or rescheduled via a redirect + if (!rescheduled && !contentRetrieved) { // Ran out of locations to redirect, try new host if (++performData->hostInd < requestData->hosts.size()) { tryNewHost(performData, easy_handle); @@ -650,24 +660,37 @@ CURLcode CCDBDownloader::perform(CURL* handle) return batchBlockingPerform(handleVector).back(); } -std::vector CCDBDownloader::getLocations(std::multimap* headerMap) const +void CCDBDownloader::updateLocations(std::multimap* headerMap, std::vector* locations, int* locIndex) const { - std::vector locs; + std::vector newLocations; + auto iter = headerMap->find("Location"); if (iter != headerMap->end()) { - locs.push_back(iter->second); + auto range = headerMap->equal_range("Location"); + for (auto it = range.first; it != range.second; ++it) { + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } + } + } } + // add alternative locations (not yet included) auto iter2 = headerMap->find("Content-Location"); if (iter2 != headerMap->end()) { auto range = headerMap->equal_range("Content-Location"); for (auto it = range.first; it != range.second; ++it) { - if (std::find(locs.begin(), locs.end(), it->second) == locs.end()) { - locs.push_back(it->second); + if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) { + if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) { + newLocations.push_back(it->second); + } } } } - return locs; + + // Insert location list at the current location index. This assures that the provided locations will be tried first. + locations->insert(locations->begin() + (*locIndex), newLocations.begin(), newLocations.end()); } std::vector CCDBDownloader::batchBlockingPerform(std::vector const& handleVector) diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index df05d393100d6..2906438211c65 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -667,6 +667,23 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd } } } + + // Keep only the first ETag encountered + if (key == "ETag") { + auto cl = headers->find("ETag"); + if (cl != headers->end()) { + insert = false; + } + } + + // Keep only the first Content-Type encountered + if (key == "Content-Type") { + auto cl = headers->find("Content-Type"); + if (cl != headers->end()) { + insert = false; + } + } + if (insert) { headers->insert(std::make_pair(key, value)); } @@ -1971,14 +1988,26 @@ void CcdbApi::vectoredLoadFileToMemory(std::vector& requestConte bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& url) const { if (url.find("alien:/", 0) != std::string::npos) { - loadFileToMemory(dest, url, nullptr); // headers loaded from the file in case of the snapshot reading only - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } if ((url.find("file:/", 0) != std::string::npos)) { std::string path = url.substr(7); if (std::filesystem::exists(path)) { - loadFileToMemory(dest, path, nullptr); - return true; + std::map localHeaders; + loadFileToMemory(dest, url, &localHeaders); + auto it = localHeaders.find("Error"); + if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { + return false; + } else { + return true; + } } } return false; From 622bcca8fa0fa2461a67462eca8f497900110e15 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:12:29 +0100 Subject: [PATCH 0173/1914] DPL: drop obsolete TreeToTable code Now using the arrow::Dataset API. --- Framework/Core/CMakeLists.txt | 2 - .../Core/include/Framework/DataAllocator.h | 14 - .../Core/include/Framework/TableTreeHelpers.h | 54 ---- Framework/Core/src/DataAllocator.cxx | 32 --- Framework/Core/src/TableTreeHelpers.cxx | 272 ------------------ Framework/Core/test/benchmark_TreeToTable.cxx | 96 ------- Framework/Core/test/test_TreeToTable.cxx | 237 --------------- 7 files changed, 707 deletions(-) delete mode 100644 Framework/Core/test/benchmark_TreeToTable.cxx delete mode 100644 Framework/Core/test/test_TreeToTable.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 7202e2299b7cc..f059984b5d85d 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -250,7 +250,6 @@ add_executable(o2-test-framework-core test/test_Variants.cxx test/test_WorkflowHelpers.cxx test/test_WorkflowSerialization.cxx - test/test_TreeToTable.cxx test/test_DataOutputDirector.cxx test/unittest_SimpleOptionsRetriever.cxx test/unittest_DataSpecUtils.cxx @@ -348,7 +347,6 @@ foreach(b EventMixing HistogramRegistry TableToTree - TreeToTable ExternalFairMQDeviceProxies ) o2_add_executable(benchmark-${b} diff --git a/Framework/Core/include/Framework/DataAllocator.h b/Framework/Core/include/Framework/DataAllocator.h index eb63b5469bb29..287513ec85845 100644 --- a/Framework/Core/include/Framework/DataAllocator.h +++ b/Framework/Core/include/Framework/DataAllocator.h @@ -233,15 +233,6 @@ class DataAllocator return tb; } - template - requires(requires { static_cast(std::declval>()); }) - decltype(auto) make(const Output& spec, Args... args) - { - auto t2t = std::move(LifetimeHolder(new std::decay_t(args...))); - adopt(spec, t2t); - return t2t; - } - template requires(requires { static_cast(std::declval>()); }) decltype(auto) make(const Output& spec, Args... args) @@ -288,11 +279,6 @@ class DataAllocator void adopt(const Output& spec, LifetimeHolder&); - /// Adopt a Tree2Table in the framework and serialise / send - /// it as an Arrow table to all consumers of @a spec once done - void - adopt(const Output& spec, LifetimeHolder&); - /// Adopt a Source2Batch in the framework and serialise / send /// it as an Arrow Dataset to all consumers of @a spec once done void diff --git a/Framework/Core/include/Framework/TableTreeHelpers.h b/Framework/Core/include/Framework/TableTreeHelpers.h index 92725d186ee33..3f76298a5bbd4 100644 --- a/Framework/Core/include/Framework/TableTreeHelpers.h +++ b/Framework/Core/include/Framework/TableTreeHelpers.h @@ -36,19 +36,6 @@ namespace o2::framework // OR t2t.addBranch(column.get(), field.get()), ...; // . t2t.process(); // -// ............................................................................. -// ----------------------------------------------------------------------------- -// TreeToTable allows to fill the contents of a given TTree to an arrow::Table -// ColumnIterator is used by TreeToTable -// -// To copy the contents of a tree tr to a table ta do: -// . TreeToTable t2t(tr); -// . t2t.addColumn(columnname1); t2t.addColumn(columnname2); ... -// OR -// t2t.addAllColumns(); -// . auto ta = t2t.process(); -// -// ............................................................................. struct ROOTTypeInfo { EDataType type; char suffix[3]; @@ -58,29 +45,6 @@ struct ROOTTypeInfo { auto arrowTypeFromROOT(EDataType type, int size); auto basicROOTTypeFromArrow(arrow::Type::type id); -class BranchToColumn -{ - public: - BranchToColumn(TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool); - // BranchToColumn(TBranch* branch, TBranch* sizeBranch, std::string name, EDataType type, arrow::MemoryPool* pool); - ~BranchToColumn() = default; - TBranch* branch(); - - std::pair, std::shared_ptr> read(TBuffer* buffer); - - private: - TBranch* mBranch = nullptr; - bool mVLA = false; - std::string mColumnName; - EDataType mType; - std::shared_ptr mArrowType; - arrow::ArrayBuilder* mValueBuilder = nullptr; - std::unique_ptr mListBuilder = nullptr; - int mListSize = 1; - std::unique_ptr mBuilder = nullptr; - arrow::MemoryPool* mPool = nullptr; -}; - class ColumnToBranch { public: @@ -127,24 +91,6 @@ class TableToTree std::vector> mColumnReaders; }; -class TreeToTable -{ - public: - TreeToTable(arrow::MemoryPool* pool = arrow::default_memory_pool()); - void setLabel(const char* label); - void addAllColumns(TTree* tree, std::vector&& names = {}); - void fill(TTree*); - std::shared_ptr finalize(); - - private: - arrow::MemoryPool* mArrowMemoryPool; - std::vector> mBranchReaders; - std::string mTableLabel; - std::shared_ptr mTable; - - void addReader(TBranch* branch, std::string const& name, bool VLA); -}; - class FragmentToBatch { public: diff --git a/Framework/Core/src/DataAllocator.cxx b/Framework/Core/src/DataAllocator.cxx index b735eee1f3308..ca35089fdfaab 100644 --- a/Framework/Core/src/DataAllocator.cxx +++ b/Framework/Core/src/DataAllocator.cxx @@ -241,38 +241,6 @@ void DataAllocator::adopt(const Output& spec, LifetimeHolder& tb) context.addBuffer(std::move(header), buffer, std::move(finalizer), routeIndex); } -void DataAllocator::adopt(const Output& spec, LifetimeHolder& t2t) -{ - auto& timingInfo = mRegistry.get(); - RouteIndex routeIndex = matchDataHeader(spec, timingInfo.timeslice); - - auto header = headerMessageFromOutput(spec, routeIndex, o2::header::gSerializationMethodArrow, 0); - auto& context = mRegistry.get(); - - auto creator = [transport = context.proxy().getOutputTransport(routeIndex)](size_t s) -> std::unique_ptr { - return transport->CreateMessage(s); - }; - auto buffer = std::make_shared(creator); - - t2t.callback = [buffer = buffer, transport = context.proxy().getOutputTransport(routeIndex)](TreeToTable& tree) { - // Serialization happens in here, so that we can - // get rid of the intermediate tree 2 table object, saving memory. - auto table = tree.finalize(); - doWriteTable(buffer, table.get()); - // deletion happens in the caller - }; - - /// To finalise this we write the table to the buffer. - /// FIXME: most likely not a great idea. We should probably write to the buffer - /// directly in the TableBuilder, incrementally. - auto finalizer = [](std::shared_ptr b) -> void { - // This is empty because we already serialised the object when - // the LifetimeHolder goes out of scope. - }; - - context.addBuffer(std::move(header), buffer, std::move(finalizer), routeIndex); -} - void DataAllocator::adopt(const Output& spec, LifetimeHolder& f2b) { auto& timingInfo = mRegistry.get(); diff --git a/Framework/Core/src/TableTreeHelpers.cxx b/Framework/Core/src/TableTreeHelpers.cxx index 84d4ff171bc39..92231cb9ce069 100644 --- a/Framework/Core/src/TableTreeHelpers.cxx +++ b/Framework/Core/src/TableTreeHelpers.cxx @@ -102,166 +102,6 @@ auto basicROOTTypeFromArrow(arrow::Type::type id) } } -TBranch* BranchToColumn::branch() -{ - return mBranch; -} - -BranchToColumn::BranchToColumn(TBranch* branch, bool VLA, std::string name, EDataType type, int listSize, arrow::MemoryPool* pool) - : mBranch{branch}, - mVLA{VLA}, - mColumnName{std::move(name)}, - mType{type}, - mArrowType{arrowTypeFromROOT(type, listSize)}, - mListSize{listSize}, - mPool{pool} - -{ - if (mType == EDataType::kBool_t) { - if (mListSize > 1) { - auto status = arrow::MakeBuilder(mPool, mArrowType->field(0)->type(), &mBuilder); - if (!status.ok()) { - throw runtime_error("Cannot create value builder"); - } - mListBuilder = std::make_unique(mPool, std::move(mBuilder), mListSize); - mValueBuilder = static_cast(mListBuilder.get())->value_builder(); - } else { - auto status = arrow::MakeBuilder(mPool, mArrowType, &mBuilder); - if (!status.ok()) { - throw runtime_error("Cannot create builder"); - } - mValueBuilder = mBuilder.get(); - } - } -} - -std::pair, std::shared_ptr> BranchToColumn::read(TBuffer* buffer) -{ - O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, buffer); - auto totalEntries = mBranch->GetEntries(); - arrow::Status status; - int readEntries = 0; - buffer->Reset(); - std::shared_ptr array; - - if (mType == EDataType::kBool_t) { - // boolean array special case: we need to use builder to create the bitmap - status = mValueBuilder->Reserve(totalEntries * mListSize); - if (mListSize > 1) { - status &= mListBuilder->Reserve(totalEntries); - } - if (!status.ok()) { - throw runtime_error("Failed to reserve memory for array builder"); - } - while (readEntries < totalEntries) { - auto readLast = mBranch->GetBulkRead().GetBulkEntries(readEntries, *buffer); - readEntries += readLast; - status &= static_cast(mValueBuilder)->AppendValues(reinterpret_cast(buffer->GetCurrent()), readLast * mListSize); - } - if (mListSize > 1) { - status &= static_cast(mListBuilder.get())->AppendValues(readEntries); - } - if (!status.ok()) { - throw runtime_error("Failed to append values to array"); - } - if (mListSize > 1) { - status &= mListBuilder->Finish(&array); - } else { - status &= mValueBuilder->Finish(&array); - } - if (!status.ok()) { - throw runtime_error("Failed to create array"); - } - } else { - // other types: use serialized read to build arrays directly - size_t branchSize = mBranch->GetTotBytes(); - auto&& result = arrow::AllocateResizableBuffer(mBranch->GetTotBytes(), mPool); - O2_SIGNPOST_EVENT_EMIT(tabletree_helpers, sid, "BranchToColumn", "Allocating %ld bytes for %{public}s", branchSize, mBranch->GetName()); - if (!result.ok()) { - throw runtime_error("Cannot allocate values buffer"); - } - std::shared_ptr arrowValuesBuffer = std::move(result).ValueUnsafe(); - auto ptr = arrowValuesBuffer->mutable_data(); - if (ptr == nullptr) { - throw runtime_error("Invalid buffer"); - } - - auto typeSize = TDataType::GetDataType(mType)->Size(); - std::unique_ptr offsetBuffer = nullptr; - - uint32_t offset = 0; - int count = 0; - std::shared_ptr arrowOffsetBuffer; - gsl::span offsets; - int size = 0; - uint32_t totalSize = 0; - TBranch* mSizeBranch = nullptr; - if (mVLA) { - mSizeBranch = mBranch->GetTree()->GetBranch((std::string{mBranch->GetName()} + TableTreeHelpers::sizeBranchSuffix).c_str()); - offsetBuffer = std::make_unique(TBuffer::EMode::kWrite, 4 * 1024 * 1024); - result = arrow::AllocateResizableBuffer((totalEntries + 1) * (int64_t)sizeof(int), mPool); - if (!result.ok()) { - throw runtime_error("Cannot allocate offset buffer"); - } - arrowOffsetBuffer = std::move(result).ValueUnsafe(); - unsigned char* ptrOffset = arrowOffsetBuffer->mutable_data(); - auto* tPtrOffset = reinterpret_cast(ptrOffset); - offsets = gsl::span{tPtrOffset, tPtrOffset + totalEntries + 1}; - - // read sizes first - while (readEntries < totalEntries) { - auto readLast = mSizeBranch->GetBulkRead().GetEntriesSerialized(readEntries, *offsetBuffer); - readEntries += readLast; - for (auto i = 0; i < readLast; ++i) { - offsets[count++] = (int)offset; - offset += swap32_(reinterpret_cast(offsetBuffer->GetCurrent())[i]); - } - } - offsets[count] = (int)offset; - totalSize = offset; - readEntries = 0; - } - - while (readEntries < totalEntries) { - auto readLast = mBranch->GetBulkRead().GetEntriesSerialized(readEntries, *buffer); - if (mVLA) { - size = offsets[readEntries + readLast] - offsets[readEntries]; - } else { - size = readLast * mListSize; - } - readEntries += readLast; - swapCopy(ptr, buffer->GetCurrent(), size, typeSize); - ptr += (ptrdiff_t)(size * typeSize); - } - if (!mVLA) { - totalSize = readEntries * mListSize; - } - std::shared_ptr varray; - switch (mListSize) { - case -1: - varray = std::make_shared(mArrowType->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(mArrowType, readEntries, arrowOffsetBuffer, varray); - break; - case 1: - array = std::make_shared(mArrowType, readEntries, arrowValuesBuffer); - break; - default: - varray = std::make_shared(mArrowType->field(0)->type(), totalSize, arrowValuesBuffer); - array = std::make_shared(mArrowType, readEntries, varray); - } - } - - auto fullArray = std::make_shared(array); - auto field = std::make_shared(mBranch->GetName(), mArrowType); - - mBranch->SetStatus(false); - mBranch->DropBaskets("all"); - mBranch->Reset(); - mBranch->GetTransientBuffer(0)->Expand(0); - - return std::make_pair(fullArray, field); -} - ColumnToBranch::ColumnToBranch(TTree* tree, std::shared_ptr const& column, std::shared_ptr const& field) : mBranchName{field->name()}, mColumn{column.get()}, @@ -447,11 +287,6 @@ std::shared_ptr TableToTree::process() return mTree; } -TreeToTable::TreeToTable(arrow::MemoryPool* pool) - : mArrowMemoryPool{pool} -{ -} - namespace { struct BranchInfo { @@ -461,113 +296,6 @@ struct BranchInfo { }; } // namespace -void TreeToTable::addAllColumns(TTree* tree, std::vector&& names) -{ - auto branches = tree->GetListOfBranches(); - auto n = branches->GetEntries(); - if (n == 0) { - throw runtime_error("Tree has no branches"); - } - - std::vector branchInfos; - for (auto i = 0; i < n; ++i) { - auto branch = static_cast(branches->At(i)); - auto name = std::string{branch->GetName()}; - auto pos = name.find(TableTreeHelpers::sizeBranchSuffix); - if (pos != std::string::npos) { - name.erase(pos); - branchInfos.emplace_back(BranchInfo{name, (TBranch*)nullptr, true}); - } else { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return bi.name == name; - }); - if (lookup == branchInfos.end()) { - branchInfos.emplace_back(BranchInfo{name, branch, false}); - } else { - lookup->ptr = branch; - } - } - } - - if (names.empty()) { - for (auto& bi : branchInfos) { - addReader(bi.ptr, bi.name, bi.mVLA); - } - } else { - for (auto& name : names) { - auto lookup = std::find_if(branchInfos.begin(), branchInfos.end(), [&](BranchInfo const& bi) { - return name == bi.name; - }); - if (lookup != branchInfos.end()) { - addReader(lookup->ptr, lookup->name, lookup->mVLA); - } - } - if (names.size() != mBranchReaders.size()) { - LOGF(warn, "Not all requested columns were found in the tree"); - } - } - if (mBranchReaders.empty()) { - throw runtime_error("No columns will be read"); - } - // Was affected by https://github.com/root-project/root/issues/8962 - // Re-enabling this seems to cut the number of IOPS in half - tree->SetCacheSize(25000000); - // tree->SetClusterPrefetch(true); - for (auto& reader : mBranchReaders) { - tree->AddBranchToCache(reader->branch()); - if (strncmp(reader->branch()->GetName(), "fIndexArray", strlen("fIndexArray")) == 0) { - std::string sizeBranchName = reader->branch()->GetName(); - sizeBranchName += "_size"; - auto* sizeBranch = (TBranch*)tree->GetBranch(sizeBranchName.c_str()); - if (sizeBranch) { - tree->AddBranchToCache(sizeBranch); - } - } - } - tree->StopCacheLearningPhase(); -} - -void TreeToTable::setLabel(const char* label) -{ - mTableLabel = label; -} - -void TreeToTable::fill(TTree* tree) -{ - std::vector> columns; - std::vector> fields; - static TBufferFile buffer{TBuffer::EMode::kWrite, 4 * 1024 * 1024}; - O2_SIGNPOST_ID_FROM_POINTER(sid, tabletree_helpers, &buffer); - O2_SIGNPOST_START(tabletree_helpers, sid, "TreeToTable", "Filling %{public}s", tree->GetName()); - for (auto& reader : mBranchReaders) { - buffer.Reset(); - auto arrayAndField = reader->read(&buffer); - columns.push_back(arrayAndField.first); - fields.push_back(arrayAndField.second); - } - O2_SIGNPOST_END(tabletree_helpers, sid, "TreeToTable", "Done filling."); - - auto schema = std::make_shared(fields, std::make_shared(std::vector{std::string{"label"}}, std::vector{mTableLabel})); - mTable = arrow::Table::Make(schema, columns); -} - -void TreeToTable::addReader(TBranch* branch, std::string const& name, bool VLA) -{ - static TClass* cls; - EDataType type; - branch->GetExpectedType(cls, type); - auto listSize = -1; - if (!VLA) { - listSize = static_cast(branch->GetListOfLeaves()->At(0))->GetLenStatic(); - } - mBranchReaders.emplace_back(std::make_unique(branch, VLA, name, type, listSize, mArrowMemoryPool)); -} - -std::shared_ptr TreeToTable::finalize() -{ - return mTable; -} - FragmentToBatch::FragmentToBatch(StreamerCreator creator, std::shared_ptr fragment, arrow::MemoryPool* pool) : mFragment{std::move(fragment)}, mArrowMemoryPool{pool}, diff --git a/Framework/Core/test/benchmark_TreeToTable.cxx b/Framework/Core/test/benchmark_TreeToTable.cxx deleted file mode 100644 index 6eca853a90ce4..0000000000000 --- a/Framework/Core/test/benchmark_TreeToTable.cxx +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "Framework/CommonDataProcessors.h" -#include "Framework/TableTreeHelpers.h" -#include "Framework/Logger.h" -#include -#include -#include - -#include - -using namespace o2::framework; -using namespace arrow; -using namespace o2::soa; - -namespace test -{ -DECLARE_SOA_COLUMN_FULL(X, x, float, "x"); -DECLARE_SOA_COLUMN_FULL(Y, y, float, "y"); -DECLARE_SOA_COLUMN_FULL(Z, z, float, "z"); -DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](float x, float y) { return x + y; }); -} // namespace test - -#ifdef __APPLE__ -constexpr unsigned int maxrange = 15; -#else -constexpr unsigned int maxrange = 16; -#endif - -static void BM_TreeToTable(benchmark::State& state) -{ - - // initialize a random generator - std::default_random_engine e1(1234567891); - std::uniform_real_distribution rd(0, 1); - std::normal_distribution rf(5., 2.); - std::discrete_distribution rl({10, 20, 30, 30, 5, 5}); - std::discrete_distribution ri({10, 20, 30, 30, 5, 5}); - - // create a table and fill the columns with random numbers - TableBuilder builder; - auto rowWriter = - builder.persist({"a", "b", "c", "d"}); - for (auto i = 0; i < state.range(0); ++i) { - rowWriter(0, rd(e1), rf(e1), rl(e1), ri(e1)); - } - auto table = builder.finalize(); - - // now convert the table to a tree - TFile fout("tree2table.root", "RECREATE"); - TableToTree ta2tr(table, &fout, "tree2table"); - ta2tr.addAllBranches(); - ta2tr.process(); - fout.Close(); - - // read tree and convert to table again - TFile* f = nullptr; - TreeToTable* tr2ta = nullptr; - for (auto _ : state) { - - // Open file and create tree - f = new TFile("tree2table.root", "READ"); - auto tr = (TTree*)f->Get("tree2table"); - - // benchmark TreeToTable - if (tr) { - tr2ta = new TreeToTable; - tr2ta->addAllColumns(tr); - tr2ta->fill(tr); - auto ta = tr2ta->finalize(); - } else { - LOG(info) << "tree is empty!"; - } - - // clean up - delete tr2ta; - - f->Close(); - delete f; - } - - state.SetBytesProcessed(state.iterations() * state.range(0) * 24); -} - -BENCHMARK(BM_TreeToTable)->Range(8, 8 << maxrange); - -BENCHMARK_MAIN(); diff --git a/Framework/Core/test/test_TreeToTable.cxx b/Framework/Core/test/test_TreeToTable.cxx deleted file mode 100644 index 4f3429a5bba62..0000000000000 --- a/Framework/Core/test/test_TreeToTable.cxx +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include - -#include "Framework/CommonDataProcessors.h" -#include "Framework/TableTreeHelpers.h" -#include "Framework/Logger.h" -#include "Framework/TableBuilder.h" - -#include -#include -#include -#include - -using namespace o2::framework; - -TEST_CASE("TreeToTableConversion") -{ - /// Create a simple TTree - Int_t ndp = 17; - - TFile f1("tree2table.root", "RECREATE"); - TTree t1("t1", "a simple Tree with simple variables"); - Bool_t ok, ts[5] = {false}; - Float_t px, py, pz; - Double_t random; - Int_t ev; - uint8_t b; - const Int_t nelem = 9; - Double_t ij[nelem] = {0}; - float xyzw[96]; - memset(xyzw, 1, 96 * 4); - TString leaflist = Form("ij[%i]/D", nelem); - - Int_t ncols = 10; - t1.Branch("ok", &ok, "ok/O"); - t1.Branch("px", &px, "px/F"); - t1.Branch("py", &py, "py/F"); - t1.Branch("pz", &pz, "pz/F"); - t1.Branch("random", &random, "random/D"); - t1.Branch("ev", &ev, "ev/I"); - t1.Branch("ij", ij, leaflist.Data()); - t1.Branch("tests", ts, "tests[5]/O"); - t1.Branch("xyzw", xyzw, "xyzw[96]/F"); - t1.Branch("small", &b, "small/b"); - - // fill the tree - int ntruein[2] = {0}; - for (int i = 0; i < ndp; i++) { - ok = (i % 2) == 0; - if (ok) { - ntruein[0]++; - } - gRandom->Rannor(px, py); - pz = px * px + py * py; - random = gRandom->Rndm(); - ev = i + 1; - b = i % 3; - for (Int_t jj = 0; jj < nelem; jj++) { - ij[jj] = i + 100 * jj; - } - for (Int_t jj = 0; jj < 5; jj++) { - ts[jj] = (((i + jj) % 2) == 0); - if (ts[jj]) { - ntruein[1]++; - } - } - - t1.Fill(); - } - t1.Write(); - - // Create an arrow table from this. - TreeToTable tr2ta; - tr2ta.addAllColumns(&t1); - tr2ta.fill(&t1); - auto table = tr2ta.finalize(); - f1.Close(); - - // test result - REQUIRE(table->Validate().ok() == true); - REQUIRE(table->num_rows() == ndp); - REQUIRE(table->num_columns() == ncols); - - REQUIRE(table->column(0)->type()->id() == arrow::Type::BOOL); - REQUIRE(table->column(1)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(2)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(3)->type()->id() == arrow::Type::FLOAT); - REQUIRE(table->column(4)->type()->id() == arrow::Type::DOUBLE); - REQUIRE(table->column(5)->type()->id() == arrow::Type::INT32); - REQUIRE(table->column(6)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(7)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(8)->type()->id() == arrow::Type::FIXED_SIZE_LIST); - REQUIRE(table->column(9)->type()->id() == arrow::Type::UINT8); - - REQUIRE(table->column(0)->type()->Equals(arrow::boolean())); - REQUIRE(table->column(1)->type()->Equals(arrow::float32())); - REQUIRE(table->column(2)->type()->Equals(arrow::float32())); - REQUIRE(table->column(3)->type()->Equals(arrow::float32())); - REQUIRE(table->column(4)->type()->Equals(arrow::float64())); - REQUIRE(table->column(5)->type()->Equals(arrow::int32())); - REQUIRE(table->column(6)->type()->Equals(arrow::fixed_size_list(arrow::float64(), nelem))); - REQUIRE(table->column(7)->type()->Equals(arrow::fixed_size_list(arrow::boolean(), 5))); - REQUIRE(table->column(8)->type()->Equals(arrow::fixed_size_list(arrow::float32(), 96))); - REQUIRE(table->column(9)->type()->Equals(arrow::uint8())); - - // count number of rows with ok==true - int ntrueout = 0; - auto chunks = table->column(0); - REQUIRE(!(chunks.get() == nullptr)); - - auto oks = std::dynamic_pointer_cast(chunks->chunk(0)); - REQUIRE(!(oks.get() == nullptr)); - - for (int ii = 0; ii < table->num_rows(); ii++) { - ntrueout += oks->Value(ii) ? 1 : 0; - } - REQUIRE(ntruein[0] == ntrueout); - - // count number of ts with ts==true - chunks = table->column(7); - REQUIRE(!(chunks.get() == nullptr)); - - auto chunkToUse = std::static_pointer_cast(chunks->chunk(0))->values(); - REQUIRE(!(chunkToUse.get() == nullptr)); - - auto tests = std::dynamic_pointer_cast(chunkToUse); - ntrueout = 0; - for (int ii = 0; ii < table->num_rows() * 5; ii++) { - ntrueout += tests->Value(ii) ? 1 : 0; - } - REQUIRE(ntruein[1] == ntrueout); - - // save table as tree - TFile* f2 = TFile::Open("table2tree.root", "RECREATE"); - TableToTree ta2tr(table, f2, "mytree"); - ta2tr.addAllBranches(); - - auto t2 = ta2tr.process(); - auto br = (TBranch*)t2->GetBranch("ok"); - REQUIRE(t2->GetEntries() == ndp); - REQUIRE(br->GetEntries() == ndp); - br = (TBranch*)t2->GetBranch("tests"); - REQUIRE(br->GetEntries() == ndp); - - f2->Close(); -} - -namespace o2::aod -{ -namespace cols -{ -DECLARE_SOA_COLUMN(Ivec, ivec, std::vector); -DECLARE_SOA_COLUMN(Fvec, fvec, std::vector); -DECLARE_SOA_COLUMN(Dvec, dvec, std::vector); -DECLARE_SOA_COLUMN(UIvec, uivec, std::vector); -} // namespace cols - -DECLARE_SOA_TABLE(Vectors, "AOD", "VECS", o2::soa::Index<>, cols::Ivec, cols::Fvec, cols::Dvec, cols::UIvec); -} // namespace o2::aod - -TEST_CASE("VariableLists") -{ - TableBuilder b; - auto writer = b.cursor(); - std::vector iv; - std::vector fv; - std::vector dv; - std::vector ui; - - std::array empty = {3, 7, 10}; - auto count = 0; - for (auto i = 1; i < 1000; ++i) { - iv.clear(); - fv.clear(); - dv.clear(); - ui.clear(); - if (count < empty.size() && i != empty[count]) { - for (auto j = 0; j < i % 10 + 1; ++j) { - iv.push_back(j + 2); - fv.push_back((j + 2) * 0.2134f); - dv.push_back((j + 4) * 0.192873819237); - ui.push_back(j); - } - } else { - count++; - } - writer(0, iv, fv, dv, ui); - } - auto table = b.finalize(); - - auto* f = TFile::Open("variable_lists.root", "RECREATE"); - TableToTree ta2tr(table, f, "lists"); - ta2tr.addAllBranches(); - auto tree = ta2tr.process(); - f->Close(); - - auto* f2 = TFile::Open("variable_lists.root", "READ"); - auto* treeptr = static_cast(f2->Get("lists;1")); - TreeToTable tr2ta; - tr2ta.addAllColumns(treeptr); - tr2ta.fill(treeptr); - auto ta = tr2ta.finalize(); - o2::aod::Vectors v{ta}; - int i = 1; - count = 0; - for (auto& row : v) { - auto ivr = row.ivec(); - auto fvr = row.fvec(); - auto dvr = row.dvec(); - auto uvr = row.uivec(); - if (count < empty.size() && i != empty[count]) { - for (auto j = 0; j < i % 10 + 1; ++j) { - REQUIRE(ivr[j] == j + 2); - REQUIRE(fvr[j] == (j + 2) * 0.2134f); - REQUIRE(dvr[j] == (j + 4) * 0.192873819237); - REQUIRE(uvr[j] == j); - } - } else { - REQUIRE(ivr.size() == 0); - REQUIRE(fvr.size() == 0); - REQUIRE(dvr.size() == 0); - REQUIRE(uvr.size() == 0); - count++; - } - ++i; - } -} From ccb26194cc88a5c1f9a352037caa8cd601a5fb0a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 10 Mar 2025 15:14:53 +0100 Subject: [PATCH 0174/1914] GPU TPC: Fix filtering check (#14032) --- .../Global/GPUChainTrackingCompression.cxx | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 57a759a58924e..03d319f42fd6b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -201,12 +201,14 @@ int32_t GPUChainTracking::RunTPCCompression() int32_t GPUChainTracking::RunTPCDecompression() { - const bool runFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0) || (param().tpcCutTimeBin > 0); - if (runFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool needFullFiltering = GetProcessingSettings().tpcApplyCFCutsAtDecoding || (GetProcessingSettings().tpcApplyClusterFilterOnCPU > 0); + const bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; + if (needFullFiltering && !GetProcessingSettings().tpcUseOldCPUDecoding) { GPUFatal("tpcApplyCFCutsAtDecoding, tpcApplyClusterFilterOnCPU and tpcCutTimeBin currently require tpcUseOldCPUDecoding"); } if (GetProcessingSettings().tpcUseOldCPUDecoding) { + const bool runFiltering = needFullFiltering || runTimeBinCutFiltering; const auto& threadContext = GetThreadContext(); TPCClusterDecompressor decomp; auto allocatorFinal = [this](size_t size) { @@ -219,13 +221,13 @@ int32_t GPUChainTracking::RunTPCDecompression() return ((tmpBuffer = std::make_unique(size))).get(); }; auto& decompressTimer = getTimer("TPCDecompression", 0); - auto allocatorUse = runFullFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; + auto allocatorUse = runFiltering ? std::function{allocatorTmp} : std::function{allocatorFinal}; decompressTimer.Start(); if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) { GPUError("Error decompressing clusters"); return 1; } - if (runFullFiltering) { + if (runFiltering) { RunTPCClusterFilter(mClusterNativeAccess.get(), allocatorFinal, GetProcessingSettings().tpcApplyCFCutsAtDecoding); } decompressTimer.Stop(); @@ -245,7 +247,6 @@ int32_t GPUChainTracking::RunTPCDecompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCDCMPR")); RecoStep myStep = RecoStep::TPCDecompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCDecompression; - bool runTimeBinCutFiltering = param().tpcCutTimeBin > 0; GPUTPCDecompression& Decompressor = processors()->tpcDecompressor; GPUTPCDecompression& DecompressorShadow = doGPU ? processorsShadow()->tpcDecompressor : Decompressor; const auto& threadContext = GetThreadContext(); From d2bd60f0d78e9f80807f03f740c7baa488068970 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 10 Mar 2025 11:20:34 +0100 Subject: [PATCH 0175/1914] GPU HIP Cmake: Get rid of hipcc, use Clang --cuda-device-only instead --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt | 6 ++---- GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index e85a3c3e9e1f3..7fb3744551953 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -28,7 +28,7 @@ #ifndef __HIPCC__ // CUDA #define PER_KERNEL_OBJECT_EXT _fatbin #else // HIP -#define PER_KERNEL_OBJECT_EXT _hip_cxx_o +#define PER_KERNEL_OBJECT_EXT _hip_o #endif #define GPUCA_KRNL(x_class, ...) QGET_LD_BINARY_SYMBOLS(GPUCA_M_CAT3(cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), PER_KERNEL_OBJECT_EXT)) #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index f6e420d5b9656..30f6683ff93c5 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -122,7 +122,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${hip_HIPCC_EXECUTABLE} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} --genco" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" @@ -159,7 +159,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP -# SOURCES test/testGPUsortHIP.hip.cxx +# SOURCES test/testGPUsortHIP.hip # PUBLIC_LINK_LIBRARIES O2::GPUCommon hip::host hip::device hip::hipcub roc::rocthrust # COMPONENT_NAME GPU # LABELS gpu) diff --git a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt b/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt index 15b72379fdfa7..789333eea9f04 100644 --- a/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/per_kernel/CMakeLists.txt @@ -9,7 +9,5 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip.cxx>, >) -set(CMAKE_CXX_COMPILER ${hip_HIPCC_EXECUTABLE}) -set(CMAKE_CXX_FLAGS "${GPU_RTC_FLAGS} ${GPU_RTC_FLAGS_ARCH} --genco") -unset(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}) +add_library(GPUTrackingHIPKernels OBJECT $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_>,APPEND,.hip>, >) +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --cuda-device-only") diff --git a/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx b/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx index 822af77bce48c..ed13124ef65df 100644 --- a/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx +++ b/GPU/GPUTracking/Base/hip/test/testGPUsortHIP.hip.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file testGPUsortHIP.hip.cxx +/// \file testGPUsortHIP.hip /// \author Michael Lettrich #define GPUCA_GPUTYPE_VEGA diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index ff003eca78948..b05fa19785dd8 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -96,7 +96,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) endif() if(HIP_ENABLED) - set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.hip.cxx") + set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.hip") set(O2_GPU_KERNEL_TEMPLATE_REPLACE "${TMP_KERNEL}") configure_file(${O2_GPU_BASE_DIR}/Base/hip/GPUReconstructionHIPkernel.template.hip ${TMP_FILENAMEA}) endif() From 9607305b9fed276904133fbd2487da73b3b3b41c Mon Sep 17 00:00:00 2001 From: Diego Stocco Date: Tue, 11 Mar 2025 09:07:48 +0100 Subject: [PATCH 0176/1914] Add MID local board hardware name in the mapping info (#14034) --- Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx | 2 ++ .../include/MIDGlobalMapping/ExtendedMappingInfo.h | 1 + Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx b/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx index 0086bf6b4caa5..f8efd6fddb79d 100644 --- a/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx +++ b/Detectors/MUON/MID/GlobalMapping/exe/global-mapper.cxx @@ -57,6 +57,8 @@ void stripsInfo2json(const std::vector& infos, con writer.Int(infos[idx].locId); writer.Key("locIdDcs"); writer.String(infos[idx].locIdDcs.c_str()); + writer.Key("locIdHw"); + writer.String(infos[idx].locIdHw.c_str()); writer.EndObject(); } writer.EndArray(); diff --git a/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h b/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h index f05b2d6acba1f..1153f75c774ba 100644 --- a/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h +++ b/Detectors/MUON/MID/GlobalMapping/include/MIDGlobalMapping/ExtendedMappingInfo.h @@ -34,6 +34,7 @@ struct ExtendedMappingInfo { int cathode; ///< Bending (0) or Non-bending (1) planes int locId; ///< Local board ID std::string locIdDcs; ///< Local board ID for DCS + std::string locIdHw; /// Local board ID in the hardware int xpos; ///< Position X int ypos; ///< Position Y int xwidth; ///< Width X (signed) diff --git a/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx b/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx index aebaade01f963..eeb17759197c9 100644 --- a/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx +++ b/Detectors/MUON/MID/GlobalMapping/src/GlobalMapper.cxx @@ -79,7 +79,10 @@ std::array GlobalMapper::getStripGeom(int deId, int columnId, int lineId ExtendedMappingInfo GlobalMapper::buildExtendedInfo(int deId, int columnId, int lineId, int stripId, int cathode) const { ExtendedMappingInfo info; + std::array boards{"12", "34", "56", "78"}; info.id = getStripId(deId, columnId, lineId, stripId, cathode); + int irpc = detparams::getRPCLine(deId); + int iline = (irpc == 5 && columnId == 0) ? lineId - 1 : lineId; auto locId = static_cast(mCrateMapper.deLocalBoardToRO(deId, columnId, lineId)); info.locId = locId; std::string side = detparams::isRightSide(deId) ? "R" : "L"; @@ -92,6 +95,7 @@ ExtendedMappingInfo GlobalMapper::buildExtendedInfo(int deId, int columnId, int info.stripId = stripId; info.cathode = cathode; info.locIdDcs = fmt::format("{}{}{}{}", crateId, side, (locInCrate >= 8 ? "1" : "0"), locInCrate); + info.locIdHw = fmt::format("{}{}C{}L{}B{}", detparams::getChamber(deId) + 1, side, columnId + 1, irpc + 1, boards[iline]); auto geom = getStripGeom(deId, columnId, lineId, stripId, cathode); info.xpos = geom[0]; info.ypos = geom[1]; From 41c8f04218623fc065fe6f85eb7d8f96f215cf33 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:15:27 +0100 Subject: [PATCH 0177/1914] Drop need for Framework/RootTableBuilderHelpers.h (#14036) --- .../src/AODJAlienReaderHelpers.cxx | 1 - Framework/Core/CMakeLists.txt | 5 - .../Framework/RootTableBuilderHelpers.h | 233 ------------------ Framework/Core/src/AODReaderHelpers.cxx | 1 - Framework/Core/src/verifyAODFile.cxx | 54 ---- Framework/Core/test/test_Root2ArrowTable.cxx | 136 ---------- 6 files changed, 430 deletions(-) delete mode 100644 Framework/Core/include/Framework/RootTableBuilderHelpers.h delete mode 100644 Framework/Core/src/verifyAODFile.cxx diff --git a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx index f8a9705e4eb62..85ed9cd573d8a 100644 --- a/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODJAlienReaderHelpers.cxx @@ -14,7 +14,6 @@ #include "Framework/TableTreeHelpers.h" #include "Framework/AnalysisHelpers.h" #include "Framework/DataProcessingStats.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/RootArrowFilesystem.h" #include "Framework/AlgorithmSpec.h" #include "Framework/ConfigParamRegistry.h" diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index f059984b5d85d..2691d9d33a0c6 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -328,11 +328,6 @@ o2_add_executable(dpl-run PUBLIC_LINK_LIBRARIES O2::Framework ) -o2_add_executable(verify-aod-file - SOURCES src/verifyAODFile.cxx - PUBLIC_LINK_LIBRARIES O2::Framework ROOT::TreePlayer - COMPONENT_NAME Framework) - # benchmarks foreach(b diff --git a/Framework/Core/include/Framework/RootTableBuilderHelpers.h b/Framework/Core/include/Framework/RootTableBuilderHelpers.h deleted file mode 100644 index 0fa818084a5a4..0000000000000 --- a/Framework/Core/include/Framework/RootTableBuilderHelpers.h +++ /dev/null @@ -1,233 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifndef o2_framework_RootTableBuilderHelpers_H_INCLUDED -#define o2_framework_RootTableBuilderHelpers_H_INCLUDED - -#include "Framework/TableBuilder.h" -#include "Framework/Logger.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -namespace o2::framework -{ - -template -struct TreeReaderValueTraits { -}; - -/// Trait class to go from a set of TTreeReaderValues to -/// arrow types. -template -struct TreeReaderValueTraits> { - using Type = typename TTreeReaderValue::NonConstT_t; - using ArrowType = typename o2::framework::detail::ConversionTraits::ArrowType; - using BuilderType = typename arrow::TypeTraits::BuilderType; -}; - -template -struct TreeReaderValueTraits> { - using Iterator = typename TTreeReaderArray::iterator; - using Type = std::pair; - using ArrowType = arrow::ListType; -}; - -static constexpr int PREBUFFER_SIZE = 32 * 1024; - -// When reading from a ROOT file special care must happen -// because uint64_t is platform specific while ULong64_t is -// always long long unsigned int (same for the signed version). -// By using this traits we make sure that any 64 bit quantity -// read from a root file uses the ROOT datatype, not the platform one. -template -struct Remap64Bit { - using type = T; -}; - -template <> -struct Remap64Bit { - using type = Long64_t; -}; - -template <> -struct Remap64Bit { - using type = ULong64_t; -}; - -template -struct Remap64Bit { - using type = Long64_t[N]; -}; - -template -struct Remap64Bit { - using type = ULong64_t[N]; -}; - -template -using Remap64Bit_t = typename Remap64Bit::type; - -template -struct ReaderHolder { - using Reader = TTreeReaderValue; - using Type = T; - - ReaderHolder(TBranch* branch, std::unique_ptr reader_) - : reader{std::move(reader_)} - { - } - - ReaderHolder(ReaderHolder&& other) - : reader{std::move(other.reader)}, - pos{other.pos} - { - } - - ReaderHolder& operator=(ReaderHolder&& other) = delete; - - std::unique_ptr reader; - int pos = 0; - Remap64Bit_t buffer[PREBUFFER_SIZE]; - int itemSize = sizeof(T); -}; - -template -struct ReaderHolder { - using Reader = TTreeReaderArray; - using Type = T (&)[N]; - - ReaderHolder(TBranch* branch, std::unique_ptr reader_) - : reader{std::move(reader_)} - { - } - - ReaderHolder(ReaderHolder&& other) - : reader{std::move(other.reader)}, - pos{other.pos} - { - } - - ReaderHolder& operator=(ReaderHolder&& other) = delete; - - std::unique_ptr reader; - int pos = 0; - Remap64Bit_t buffer[PREBUFFER_SIZE * N]; - int itemSize = sizeof(T) * N; -}; - -struct BulkExtractor { - template - static auto deref(ReaderHolder& holder, size_t maxSize) - { - holder.buffer[holder.pos % PREBUFFER_SIZE] = **holder.reader; - holder.pos++; - if (holder.pos == maxSize) { - return BulkInfo const*>{holder.buffer, maxSize % PREBUFFER_SIZE}; - } - // We flush only after PREBUFFER_SIZE items have been inserted - if ((holder.pos % PREBUFFER_SIZE) != 0) { - return BulkInfo const*>{nullptr, 0}; - } - return BulkInfo const*>{holder.buffer, PREBUFFER_SIZE}; - } - - template - static auto deref(ReaderHolder& holder, size_t maxSize) - { - memcpy(&holder.buffer[(holder.pos % PREBUFFER_SIZE) * N], &((*holder.reader)[0]), N * sizeof(T)); - holder.pos++; - if (holder.pos == maxSize) { - return BulkInfo const*>{holder.buffer, maxSize % PREBUFFER_SIZE}; - } - // We flush only after PREBUFFER_SIZE items have been inserted - if ((holder.pos % PREBUFFER_SIZE) != 0) { - return BulkInfo const*>{nullptr, 0}; - } - return BulkInfo const*>{reinterpret_cast(holder.buffer), PREBUFFER_SIZE}; - } -}; - -template -struct HolderMaker { - static auto make(TTreeReader& reader, char const* branchName) - { - using Reader = TTreeReaderValue; - return ReaderHolder{reader.GetTree()->GetBranch(branchName), std::move(std::make_unique(reader, branchName))}; - } -}; - -template -struct HolderMaker { - static auto make(TTreeReader& reader, char const* branchName) - { - using Reader = TTreeReaderArray; - return ReaderHolder{reader.GetTree()->GetBranch(branchName), std::move(std::make_unique(reader, branchName))}; - } -}; - -template -struct ColumnReaderTrait { - static auto createReader(TTreeReader& reader) - { - return HolderMaker>::make(reader, C::base::columnLabel()); - } -}; - -struct RootTableBuilderHelpers { - /// Use bulk insertion when TTreeReaderValue everywhere - template - static void convertTTree(TableBuilder& builder, - TTreeReader& reader, - ReaderHolder... holders) - { - std::array branchNames = {holders.reader->GetBranchName()...}; - TTree* tree = reader.GetTree(); - size_t maxExtries = reader.GetEntries(true); - tree->SetCacheSize(maxExtries * (holders.itemSize + ...)); - (tree->AddBranchToCache(tree->GetBranch(holders.reader->GetBranchName()), true), ...); - tree->StopCacheLearningPhase(); - - auto filler = builder.bulkPersistChunked::Type>...>(branchNames, maxExtries); - while (reader.Next()) { - filler(0, BulkExtractor::deref(holders, maxExtries)...); - } - } - - template - static void convertASoAColumns(TableBuilder& builder, TTreeReader& reader, pack) - { - return convertTTree(builder, reader, ColumnReaderTrait::createReader(reader)...); - } - - template - static void convertASoA(TableBuilder& builder, TTreeReader& reader) - { - return convertASoAColumns(builder, reader, typename T::persistent_columns_t{}); - } -}; - -} // namespace o2 -#endif // FRAMEWORK_ROOTTABLEBUILDERHELPERS_H diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index f6513b5facea3..6270d07a022e8 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -15,7 +15,6 @@ #include "Framework/AnalysisDataModelHelpers.h" #include "Framework/DataProcessingHelpers.h" #include "Framework/ExpressionHelpers.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/AlgorithmSpec.h" #include "Framework/ConfigParamRegistry.h" #include "Framework/ControlService.h" diff --git a/Framework/Core/src/verifyAODFile.cxx b/Framework/Core/src/verifyAODFile.cxx deleted file mode 100644 index 2660019031946..0000000000000 --- a/Framework/Core/src/verifyAODFile.cxx +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "Framework/AnalysisDataModel.h" -#include "Framework/RootTableBuilderHelpers.h" -#include "Framework/Logger.h" -#include "Framework/ASoA.h" -#include -#include -#include - -using namespace o2::framework; -using namespace o2::soa; - -template -void verifyTable(TFile* infile, const char* branchName) -{ - std::cout << "Table: " << o2::aod::label() << std::endl; - std::unique_ptr reader = std::make_unique(branchName, infile); - TableBuilder builder; - RootTableBuilderHelpers::convertASoA(builder, *reader); - auto table = builder.finalize(); - std::cout << table->schema()->ToString() << std::endl; - std::cout << "---" << std::endl; -} - -int main(int argc, char** argv) -{ - if (argc != 2) { - LOG(error) << "Bad number of arguments"; - return 1; - } - auto infile = std::make_unique(argv[1]); - if (infile.get() == nullptr || infile->IsOpen() == false) { - LOG(error) << "File not found: " << argv[1]; - return 1; - } - - verifyTable(infile.get(), "O2collision"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2track"); - verifyTable(infile.get(), "O2calo"); - verifyTable(infile.get(), "O2fwdtrack"); - return 0; -} diff --git a/Framework/Core/test/test_Root2ArrowTable.cxx b/Framework/Core/test/test_Root2ArrowTable.cxx index 663be91a1e6f3..395048ae916d6 100644 --- a/Framework/Core/test/test_Root2ArrowTable.cxx +++ b/Framework/Core/test/test_Root2ArrowTable.cxx @@ -12,7 +12,6 @@ #include #include "Framework/TableBuilder.h" -#include "Framework/RootTableBuilderHelpers.h" #include "Framework/ASoA.h" #include "Framework/PluginManager.h" #include "../src/ArrowDebugHelpers.h" @@ -50,87 +49,6 @@ using namespace o2::framework; -TEST_CASE("RootTree2Table") -{ - using namespace o2::framework; - /// Create a simple TTree - TTree t1("t1", "a simple Tree with simple variables"); - Float_t xyz[3]; - Int_t ij[2]; - Float_t px, py, pz; - Double_t random; - Int_t ev; - t1.Branch("px", &px, "px/F"); - t1.Branch("py", &py, "py/F"); - t1.Branch("pz", &pz, "pz/F"); - t1.Branch("random", &random, "random/D"); - t1.Branch("ev", &ev, "ev/I"); - t1.Branch("xyz", xyz, "xyz[3]/F"); - t1.Branch("ij", ij, "ij[2]/I"); - // fill the tree - for (Int_t i = 0; i < 1000; i++) { - xyz[0] = 1; - xyz[1] = 2; - xyz[2] = 3; - gRandom->Rannor(px, py); - pz = px * px + py * py; - xyz[2] = i + 1; - ij[0] = i; - ij[1] = i + 1; - random = gRandom->Rndm(); - ev = i + 1; - t1.Fill(); - } - - // Create an arrow table from this. - TableBuilder builder; - TTreeReader reader(&t1); - auto&& xyzReader = HolderMaker::make(reader, "xyz"); - auto&& ijkReader = HolderMaker::make(reader, "ij"); - auto&& pxReader = HolderMaker::make(reader, "px"); - auto&& pyReader = HolderMaker::make(reader, "py"); - auto&& pzReader = HolderMaker::make(reader, "pz"); - auto&& randomReader = HolderMaker::make(reader, "random"); - auto&& evReader = HolderMaker::make(reader, "ev"); - - RootTableBuilderHelpers::convertTTree(builder, reader, std::move(xyzReader), std::move(ijkReader), std::move(pxReader), std::move(pyReader), std::move(pzReader), std::move(randomReader), std::move(evReader)); - auto table = builder.finalize(); - REQUIRE(table->num_rows() == 1000); - REQUIRE(table->num_columns() == 7); - REQUIRE(table->schema()->field(0)->type()->id() == arrow::fixed_size_list(arrow::float32(), 3)->id()); - REQUIRE(table->schema()->field(1)->type()->id() == arrow::fixed_size_list(arrow::int32(), 2)->id()); - REQUIRE(table->schema()->field(2)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(3)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(4)->type()->id() == arrow::float32()->id()); - REQUIRE(table->schema()->field(5)->type()->id() == arrow::float64()->id()); - REQUIRE(table->schema()->field(6)->type()->id() == arrow::int32()->id()); - - { - auto chunkToUse = table->column(0)->chunk(0); - chunkToUse = std::dynamic_pointer_cast(chunkToUse)->values(); - auto array = std::static_pointer_cast(chunkToUse); - // array of 3 floats, time 1000. - REQUIRE(array->length() == 3000); - const float* c = reinterpret_cast(array->values()->data()); - - CHECK(c[0] == 1); - CHECK(c[1] == 2); - CHECK(c[2] == 1); - } - { - auto chunkToUse = table->column(1)->chunk(0); - chunkToUse = std::dynamic_pointer_cast(chunkToUse)->values(); - auto array = std::static_pointer_cast(chunkToUse); - REQUIRE(array->length() == 2000); - - const int* ptr = reinterpret_cast(array->values()->data()); - for (size_t i = 0; i < 1000; i++) { - CHECK(ptr[2 * i + 0] == i); - CHECK(ptr[2 * i + 1] == i + 1); - } - } -} - namespace o2::aod { namespace test @@ -149,60 +67,6 @@ DECLARE_SOA_TABLE(Test, "AOD", "ETAPHI", test::Random, test::Ev); } // namespace o2::aod -TEST_CASE("RootTree2TableViaASoA") -{ - using namespace o2::framework; - /// Create a simple TTree - TTree t2("t2", "a simple Tree with simple variables"); - Float_t xyz[3]; - Int_t ij[2]; - Float_t px, py, pz; - Double_t random; - Int_t ev; - t2.Branch("px", &px, "px/F"); - t2.Branch("py", &py, "py/F"); - t2.Branch("pz", &pz, "pz/F"); - t2.Branch("random", &random, "random/D"); - t2.Branch("ev", &ev, "ev/I"); - t2.Branch("xyz", xyz, "xyz[3]/F"); - t2.Branch("ij", ij, "ij[2]/I"); - // fill the tree - for (Int_t i = 0; i < 1000; i++) { - gRandom->Rannor(xyz[0], xyz[1]); - gRandom->Rannor(px, py); - pz = px * px + py * py; - xyz[2] = i + 1; - ij[0] = i; - ij[1] = i + 1; - random = gRandom->Rndm(); - ev = i + 1; - t2.Fill(); - } - - // Create an arrow table from this. - TableBuilder builder; - TTreeReader reader(&t2); - REQUIRE(t2.GetEntries() == 1000); - - RootTableBuilderHelpers::convertASoA(builder, reader); - auto table = builder.finalize(); - REQUIRE(table->num_rows() == 1000); - REQUIRE(table->num_columns() == 7); - REQUIRE(table->column(0)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(1)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(2)->type()->id() == arrow::float32()->id()); - REQUIRE(table->column(3)->type()->id() == arrow::fixed_size_list(arrow::float32(), 3)->id()); - REQUIRE(table->column(4)->type()->id() == arrow::fixed_size_list(arrow::int32(), 2)->id()); - REQUIRE(table->column(5)->type()->id() == arrow::float64()->id()); - REQUIRE(table->column(6)->type()->id() == arrow::int32()->id()); - - o2::aod::Test testTable{table}; - for (auto& row : testTable) { - REQUIRE(row.ij()[0] == row.ij()[1] - 1); - REQUIRE(row.ij()[1] == row.ev()); - } -} - TEST_CASE("RootTree2Fragment") { using namespace o2::framework; From 83bcaaadc6bd8378afb18db7f56d11812b60060c Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 12:55:37 +0100 Subject: [PATCH 0178/1914] DPL: provide defaults for inputs and outputs (#14038) Silence a bunch of warnings when using aggregate initialization. --- Framework/Core/include/Framework/DataProcessorSpec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Framework/Core/include/Framework/DataProcessorSpec.h b/Framework/Core/include/Framework/DataProcessorSpec.h index fafb7fda43ce3..9821a2561d08b 100644 --- a/Framework/Core/include/Framework/DataProcessorSpec.h +++ b/Framework/Core/include/Framework/DataProcessorSpec.h @@ -40,8 +40,8 @@ struct DataProcessorMetadata { struct DataProcessorSpec { std::string name; - Inputs inputs; - Outputs outputs; + Inputs inputs = {}; + Outputs outputs = {}; AlgorithmSpec algorithm; Options options = {}; From a8f75744fd7d4078a834ff71891cf306937c8c86 Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Tue, 11 Mar 2025 13:43:03 +0100 Subject: [PATCH 0179/1914] Bump actions version (#14017) > Error: This request has been automatically failed because it uses a deprecated version of `actions/cache: v2`. Please update your workflow to use v3/v4 of actions/cache to avoid interruptions --- .github/workflows/reports.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reports.yml b/.github/workflows/reports.yml index 0762debd04d54..cadb920fa022f 100644 --- a/.github/workflows/reports.yml +++ b/.github/workflows/reports.yml @@ -17,12 +17,12 @@ jobs: if: github.repository == 'AliceO2Group/AliceO2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 uses: actions/setup-python@v5 with: python-version: '3.10' - - uses: actions/cache@v2 + - uses: actions/cache@v4 name: Configure pip caching with: path: ~/.cache/pip From 48c7605979462cfeed6a3944133a755cd0f5ca44 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 11 Mar 2025 17:14:30 +0100 Subject: [PATCH 0180/1914] DPL Analysis: improve error message when messages are malformed (#14040) --- Framework/AnalysisSupport/src/AODWriterHelpers.cxx | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx index fa10d4661f537..2b1b4f880d1ee 100644 --- a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx @@ -274,19 +274,20 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) LOG(error) << "Header not found"; return; } - if (!ref.payload) { - LOG(error) << "Payload not found"; - return; - } auto datah = o2::header::get(ref.header); if (!datah) { LOG(error) << "No data header in stack"; return; } + if (!ref.payload) { + LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } + auto objh = o2::header::get(ref.header); if (!objh) { - LOG(error) << "No output object header in stack"; + LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); return; } @@ -297,7 +298,7 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) tm.SetBufferOffset(0); tm.ResetMap(); if (obj.kind == nullptr) { - LOG(error) << "Cannot read class info from buffer."; + LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); return; } From cfa7b71004811813df8cf27450047d7e427bc1a9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 15:00:18 +0100 Subject: [PATCH 0181/1914] dpl-workflow: Automatically apply MI100 workaround in sync --- prodtests/full-system-test/dpl-workflow.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 0f5083dbcdebb..f9b0c7accbff9 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -259,7 +259,8 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ "${EPN_NODE_MI100:-}" != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 + [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 + [[ $EPNSYNCMODE == 1 && ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From fc1fd7b0174cb3821c8c9616474e696f0cd30dde Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 11 Mar 2025 13:51:52 +0100 Subject: [PATCH 0182/1914] Fix scaling ITS CA tracker params for low Bfield --- Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 4 +--- GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index f625b77a013b0..cbb31ff8bceb7 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -120,17 +120,15 @@ void ITSTrackingInterface::initialise() for (auto& params : trackParams) { params.CorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrLUT; } - // adjust pT settings to actual mag. field for (size_t ip = 0; ip < trackParams.size(); ip++) { auto& param = trackParams[ip]; + param.TrackletMinPt *= bFactor; for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { int lslot = trackConf.MaxTrackLenght - ilg; param.MinPt[lslot] *= bFactor; - param.TrackletMinPt *= bFactor; } } - mTracker->setParameters(trackParams); mVertexer->setParameters(vertParams); } diff --git a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx index b81e816d6fc1d..1601e11f2c6fa 100644 --- a/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx +++ b/GPU/GPUTracking/ITS/GPUITSFitterKernels.cxx @@ -22,7 +22,7 @@ #include "ITStracking/Cell.h" #include "CommonConstants/MathConstants.h" -#ifdef CA_DEBUG +#if defined(CA_DEBUG) && !defined(GPUCA_GPUCODE_DEVICE) #include #endif From c5209b138c1f44fc5cb374d3538eb732506bbdc7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 11 Mar 2025 16:12:57 +0100 Subject: [PATCH 0183/1914] Do not fetch meta-data object unless reading explicit ccdb snapshot file --- CCDB/include/CCDB/CcdbApi.h | 2 +- CCDB/src/CcdbApi.cxx | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CCDB/include/CCDB/CcdbApi.h b/CCDB/include/CCDB/CcdbApi.h index 5ad56fbd50557..1308742b57fd0 100644 --- a/CCDB/include/CCDB/CcdbApi.h +++ b/CCDB/include/CCDB/CcdbApi.h @@ -388,7 +388,7 @@ class CcdbApi //: public DatabaseInterface static bool removeSemaphore(std::string const& name, bool remove = false); static void removeLeakingSemaphores(std::string const& basedir, bool remove = false); - void loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders = nullptr) const; + void loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders = nullptr, bool fetchLocalMetaData = true) const; void loadFileToMemory(o2::pmr::vector& dest, std::string const& path, std::map const& metadata, long timestamp, std::map* headers, std::string const& etag, diff --git a/CCDB/src/CcdbApi.cxx b/CCDB/src/CcdbApi.cxx index 2906438211c65..c9d2fad882aa1 100644 --- a/CCDB/src/CcdbApi.cxx +++ b/CCDB/src/CcdbApi.cxx @@ -1989,7 +1989,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& { if (url.find("alien:/", 0) != std::string::npos) { std::map localHeaders; - loadFileToMemory(dest, url, &localHeaders); + loadFileToMemory(dest, url, &localHeaders, false); auto it = localHeaders.find("Error"); if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { return false; @@ -2001,7 +2001,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& std::string path = url.substr(7); if (std::filesystem::exists(path)) { std::map localHeaders; - loadFileToMemory(dest, url, &localHeaders); + loadFileToMemory(dest, url, &localHeaders, o2::utils::Str::endsWith(path, ".root")); auto it = localHeaders.find("Error"); if (it != localHeaders.end() && it->second == "An error occurred during retrieval") { return false; @@ -2013,7 +2013,7 @@ bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector& dest, std::string& return false; } -void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders) const +void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& path, std::map* localHeaders, bool fetchLocalMetaData) const { // Read file to memory as vector. For special case of the locally cached file retriev metadata stored directly in the file constexpr size_t MaxCopySize = 0x1L << 25; @@ -2061,7 +2061,7 @@ void CcdbApi::loadFileToMemory(o2::pmr::vector& dest, const std::string& p totalread += nread; } while (nread == (long)MaxCopySize); - if (localHeaders) { + if (localHeaders && fetchLocalMetaData) { TMemFile memFile("name", const_cast(dest.data()), dest.size(), "READ"); auto storedmeta = (std::map*)extractFromTFile(memFile, TClass::GetClass("std::map"), CCDBMETA_ENTRY); if (storedmeta) { From ce065f9788b822d83f3154613b5bd9aa41ada987 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 08:58:07 +0100 Subject: [PATCH 0184/1914] GPU: Add IsNaN to CAMath --- GPU/Common/GPUCommonMath.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 9aa260f59842f..ef837658f74d1 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -82,6 +82,7 @@ class GPUCommonMath GPUd() static int32_t Float2IntRn(float x); GPUd() static float Modf(float x, float y); GPUd() static bool Finite(float x); + GPUd() static bool IsNaN(float x); GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); @@ -224,7 +225,8 @@ GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x) #ifdef GPUCA_NO_FAST_MATH GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } +GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } // Fixme: fix these 2 for OpenCL +GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), false); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } From 8b6b16c6e9b861066c7607dde33663ff881187c7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 11:36:06 +0100 Subject: [PATCH 0185/1914] GPU Math: Add memcpy and QuietNaN, fix Finite and IsNaN --- GPU/Common/GPUCommonMath.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index ef837658f74d1..5a813b74ed7b6 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -25,6 +25,8 @@ #include #include #include +#include +#include #endif #if !defined(GPUCA_GPUCODE_COMPILEKERNELS) && (!defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__)) @@ -83,9 +85,14 @@ class GPUCommonMath GPUd() static float Modf(float x, float y); GPUd() static bool Finite(float x); GPUd() static bool IsNaN(float x); + GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() static float QuietNaN(); GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); + GPUd() static void memcpy(void* dst, const void* src, size_t size); + GPUhdni() static float Hypot(float x, float y); GPUhdni() static float Hypot(float x, float y, float z); GPUhdni() static float Hypot(float x, float y, float z, float w); @@ -181,6 +188,23 @@ typedef GPUCommonMath CAMath; #define CHOICE(c1, c2, c3) (c1) // Select first option for Host #endif // clang-format on +GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size) +{ +#ifndef GPUCA_GPUCODE_DEVICE + std::memcpy(dst, src, size); +#elif defined(__CUDACC__) || defined(__HIPCC__) + ::memcpy(dst, src, size); +#elif defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) + __builtin_memcpy(dst, src, size); +#else + char* d = (char*)dst; + const char* s = (const char*)src; + for (size_t i = 0; i < size; i++) { + d[i] = s[i]; + } +#endif +} + template GPUdi() constexpr T GPUCommonMath::nextMultipleOf(T val) { @@ -222,11 +246,12 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); } +GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } +GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), isnan(x)); } +GPUdi() float GPUCommonMath::QuietNaN() { return CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } #ifdef GPUCA_NO_FAST_MATH GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), true); } // Fixme: fix these 2 for OpenCL -GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), false); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } @@ -238,10 +263,11 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), true, true); } GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); } GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } @@ -253,6 +279,8 @@ GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), a GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); } GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); } GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } +GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) From 2dbd7f888467d52b54ebc8d24c4826f495d85440 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Wed, 12 Mar 2025 13:31:58 +0100 Subject: [PATCH 0186/1914] Ctpdev (#14037) * dev: ctp config to BK and first orbit and orbit reset via zmq * clang --- .../include/CTPWorkflowScalers/RunManager.h | 2 +- .../CTPWorkflowScalers/ctpCCDBManager.h | 13 ++-- .../CTP/workflowScalers/src/RunManager.cxx | 37 ++++++++---- .../workflowScalers/src/ctpCCDBManager.cxx | 59 +++++++++++++++++++ 4 files changed, 94 insertions(+), 17 deletions(-) diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index 0d624ecd8b892..e0b204e6c4ce5 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -47,7 +47,7 @@ class CTPRunManager : public ctpCCDBManager CTPRunManager() = default; void init(); int loadRun(const std::string& cfg); - int startRun(const std::string& cfg); + int setRunConfigBK(uint32_t runNumber, const std::string& cfg); int stopRun(uint32_t irun, long timeStamp); int addScalers(uint32_t irun, std::time_t time, bool start = 0); int processMessage(std::string& topic, const std::string& message); diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h index 5fb6d3678f0ba..c968a83183624 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h @@ -27,12 +27,11 @@ class ctpCCDBManager int saveRunScalersToCCDB(CTPRunScalers& scalers, long timeStart, long timeStop); int saveRunScalersToQCDB(CTPRunScalers& scalers, long timeStart, long timeStop); int saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart); + int saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timeStart); + int saveOrbitReset(long timeStamp); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run, bool& ok); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run); CTPRunScalers getScalersFromCCDB(long timestamp, std::string, bool& ok); - void setCCDBPathConfig(std::string path) { mCCDBPathCTPConfig = path; }; - void setCCDBPathScalers(std::string path) { mCCDBPathCTPScalers = path; }; - void setQCDBPathScalers(std::string path) { mQCDBPathCTPScalers = path; }; static void setCCDBHost(std::string host) { mCCDBHost = host; }; static void setQCDBHost(std::string host) { mQCDBHost = host; }; @@ -42,9 +41,11 @@ class ctpCCDBManager // std::string mQCDBHost = "http://ali-qcdb.cern.ch:8083"; static std::string mCCDBHost; static std::string mQCDBHost; - std::string mCCDBPathCTPScalers = "CTP/Calib/Scalers"; - std::string mCCDBPathCTPConfig = "CTP/Config/Config"; - std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; + const std::string mCCDBPathCTPScalers = "CTP/Calib/Scalers"; + // std::string mCCDBPathCTPConfig = "CTP/Config/Config"; - in Configuration.h + const std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; + const std::string mCCDBPathSoxOrbit = "CTP/Calib/FirstRunOrbit"; + const std::string mCCDBPathOrbitReset = "CTP/Calib/OrbitReset"; ClassDefNV(ctpCCDBManager, 1); }; } // namespace ctp diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index e6861e6cb4b38..9af5b5e104120 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -57,7 +57,7 @@ int CTPActiveRun::send2BK(std::unique_ptr& BKClient, size_t ts, bool std::string clsname = cfg.getClassNameFromHWIndex(cls.first); // clsname = std::to_string(runOri) + "_" + clsname; try { - BKClient->triggerCounters()->createOrUpdateForRun(runNumber, clsname, ts, cntsbk[0], cntsbk[1], cntsbk[2], cntsbk[3], cntsbk[4], cntsbk[5]); + BKClient->ctpTriggerCounters()->createOrUpdateForRun(runNumber, clsname, ts, cntsbk[0], cntsbk[1], cntsbk[2], cntsbk[3], cntsbk[4], cntsbk[5]); } catch (std::runtime_error& error) { std::cerr << "An error occurred: " << error.what() << std::endl; return 1; @@ -124,8 +124,19 @@ int CTPRunManager::loadRun(const std::string& cfg) return 0; } -int CTPRunManager::startRun(const std::string& cfg) +int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) { + std::cout << "Printing cfg:" << cfg << std::endl; + if (mBKClient) { + try { + uint32_t runNumber = 1; + mBKClient->run()->setRawCtpTriggerConfiguration(runNumber, cfg); + } catch (std::runtime_error& error) { + std::cerr << "An error occurred: " << error.what() << std::endl; + return 1; + } + LOG(info) << "Run BK:" << runNumber << " CFG:" << cfg; + } return 0; } int CTPRunManager::stopRun(uint32_t irun, long timeStamp) @@ -221,6 +232,13 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message loadRun(message); return 0; } + if (topic.find("soxorbit") != std::string::npos) { + return 0; + } + if (topic.find("orbitreset") != std::string::npos) { + return 0; + } + static int nerror = 0; if (topic.find("sox") != std::string::npos) { // get config size_t irun = message.find("run"); @@ -230,17 +248,15 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message } LOG(info) << "SOX received, Run keyword position:" << irun; std::string cfg = message.substr(irun, message.size() - irun); - startRun(cfg); firstcounters = message.substr(0, irun); - } - if (topic.find("eox") != std::string::npos) { + } else if (topic.find("eox") != std::string::npos) { LOG(info) << "EOX received"; mEOX = 1; - } - static int nerror = 0; - if (topic == "rocnts") { - if (nerror < 1) { - LOG(warning) << "Skipping topic rocnts"; + } else if (topic.find("cnts") != std::string::npos) { + // just continue + } else { + if (nerror < 10) { + LOG(warning) << "Skipping topic:" << topic; nerror++; } return 0; @@ -293,6 +309,7 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message mActiveRunNumbers[i] = mCounters[i]; mActiveRuns[i] = run->second; mRunsLoaded.erase(run); + setRunConfigBK(mActiveRuns[i]->cfg.getRunNumber(), mActiveRuns[i]->cfg.getConfigString()); addScalers(i, tt, 1); saveRunScalersToQCDB(mActiveRuns[i]->scalers, tt * 1000, tt * 1000); } else { diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index 3484cb97279b5..0d81b896b3e91 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -107,6 +107,65 @@ int ctpCCDBManager::saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart) } return ret; } +int ctpCCDBManager::saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timestamp) +{ + // data base + if (mCCDBHost == "none") { + LOG(info) << "SOX Orbit not written to CCDB none"; + return 0; + } + std::vector vect; + if (timestamp == 0) { + auto now = std::chrono::system_clock::now(); + timestamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + } + vect.push_back(timestamp); + vect.push_back((uint64_t)runNumber); + vect.push_back((uint64_t)soxOrbit); + long tmin = timestamp; + long tmax = tmin + 381928219; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + metadata["runNumber"] = std::to_string(runNumber); + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + + // store abitrary user object in strongly typed manner + int ret = api.storeAsTFileAny(&vect, mCCDBPathSoxOrbit, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "SOX orbit saved in ccdb:" << mCCDBHost << " run:" << runNumber << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(fatal) << "SOX orbit Problem writing to database ret:" << ret; + } + return 0; +} +int ctpCCDBManager::saveOrbitReset(long timeStamp) +{ + // data base + if (mCCDBHost == "none") { + LOG(info) << "Orbit Reset not written to CCDB none"; + return 0; + } + std::vector vect; + if (timeStamp == 0) { + auto now = std::chrono::system_clock::now(); + timeStamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + } + vect.push_back(timeStamp); + long tmin = timeStamp; + long tmax = tmin + 381928219; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + + // store abitrary user object in strongly typed manner + int ret = api.storeAsTFileAny(&vect, mCCDBPathOrbitReset, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "Orbit reset saved in ccdb:" << mCCDBHost << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(fatal) << "Orbit reset Problem writing to database ret:" << ret; + } + return 0; +} CTPConfiguration ctpCCDBManager::getConfigFromCCDB(long timestamp, std::string run, bool& ok) { auto& mgr = o2::ccdb::BasicCCDBManager::instance(); From fb4de2054a035146aa88a425cc46b1d54f10a63d Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 12 Mar 2025 13:59:23 +0100 Subject: [PATCH 0187/1914] write pairs correlation tree only if requested --- Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index 36530bfe9238b..db57ad5f8a7eb 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -591,9 +591,8 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) pr.nshTPCRow = shinfo.second; } } + (*mDBGOut) << "pairs" << "pr=" << trcPairsVec << "\n"; } - (*mDBGOut) << "pairs" - << "pr=" << trcPairsVec << "\n"; } int nvtot = mMaxNeighbours < 0 ? -1 : (int)pveVec.size(); From cf94b28f0d092a4e1351bb303143923f9b5487f7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 12 Mar 2025 14:56:33 +0100 Subject: [PATCH 0188/1914] Do not scale ITS tracking cuts for B=0 --- Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index cbb31ff8bceb7..b264ac46bc7b3 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -38,6 +38,9 @@ void ITSTrackingInterface::initialise() std::vector trackParams; const auto& trackConf = o2::its::TrackerParamConfig::Instance(); float bFactor = std::abs(o2::base::Propagator::Instance()->getNominalBz()) / 5.0066791; + if (bFactor < 0.01) { + bFactor = 1.; + } if (mMode == TrackingMode::Unset) { mMode = (TrackingMode)(trackConf.trackingMode); LOGP(info, "Tracking mode not set, trying to fetch it from configurable params to: {}", asString(mMode)); From 7486f59b97f80adc24644b949fcd4671c4045339 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 15:08:37 +0100 Subject: [PATCH 0189/1914] dpl-workflow: automatically apply MI100 workaround in async --- prodtests/full-system-test/dpl-workflow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index f9b0c7accbff9..76235d127037a 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -260,7 +260,7 @@ if [[ $GPUTYPE == "HIP" ]]; then GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 - [[ $EPNSYNCMODE == 1 && ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From fe6cd7c0a1239cfbbb257da2c1e18a61cc15adae Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 12 Mar 2025 16:52:13 +0100 Subject: [PATCH 0190/1914] DataModel: make V0s IsStandard explicit (#13937) This makes the check more explicit to really have a standard v0. If analysers just ask for this bit, for example they still would get tpc-only v0s. @ddobrigk --- Framework/Core/include/Framework/AnalysisDataModel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index 8c9f323f3dcc6..70db8500e3421 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1596,7 +1596,7 @@ DECLARE_SOA_INDEX_COLUMN(Collision, collision); //! Coll DECLARE_SOA_COLUMN(V0Type, v0Type, uint8_t); //! custom bitmap for various selections (see below) DECLARE_SOA_DYNAMIC_COLUMN(IsStandardV0, isStandardV0, //! is standard V0 - [](uint8_t V0Type) -> bool { return V0Type & (1 << 0); }); + [](uint8_t V0Type) -> bool { return V0Type == 1; }); DECLARE_SOA_DYNAMIC_COLUMN(IsPhotonV0, isPhotonV0, //! is TPC-only V0 for which the photon-mass-hypothesis was good [](uint8_t V0Type) -> bool { return V0Type & (1 << 1); }); DECLARE_SOA_DYNAMIC_COLUMN(IsCollinearV0, isCollinearV0, //! is V0 for which the photon-mass-hypothesis was good and was fitted collinearly From f0d987e4be494d043bfb05500d31978d663ce796 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:46:59 +0100 Subject: [PATCH 0191/1914] GPU QA: Fix debug ROOT dump if we wrote a different ROOT file meanwhile --- GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx index 846c85e8e1cb3..7155b783e725e 100644 --- a/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx +++ b/GPU/GPUTracking/Debug/GPUROOTDumpCore.cxx @@ -30,6 +30,7 @@ GPUROOTDumpCore::GPUROOTDumpCore(GPUROOTDumpCore::GPUROOTDumpCorePrivate) GPUROOTDumpCore::~GPUROOTDumpCore() { if (mFile) { + mFile->cd(); for (uint32_t i = 0; i < mBranches.size(); i++) { mBranches[i]->write(); } From 5c6657a7c1843c1a152eda3c7d2776e4ee14785c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:49:58 +0100 Subject: [PATCH 0192/1914] GPU QA: Add histogram with number of rows in which primary TPC track has clusters --- GPU/GPUTracking/qa/GPUQA.cxx | 125 +++++++++++++++++++++-------------- GPU/GPUTracking/qa/GPUQA.h | 8 +-- 2 files changed, 80 insertions(+), 53 deletions(-) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index 552c82f1bd299..ba7aeb3800a5e 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -525,8 +525,10 @@ int32_t GPUQA::InitQACreateHistograms() if (mQATasks & taskTrackStatistics) { // Create Tracks Histograms - snprintf(name, 2048, "nclusters"); - createHist(mNCl, name, name, 160, 0, 159); + for (int32_t i = 0; i < 2; i++) { + snprintf(name, 2048, i ? "nrows_with_cluster" : "nclusters"); + createHist(mNCl[i], name, name, 160, 0, 159); + } snprintf(name, 2048, "tracks"); std::unique_ptr binsPt{CreateLogAxis(AXIS_BINS[4], PT_MIN_CLUST, PT_MAX)}; createHist(mTracks, name, name, AXIS_BINS[4], binsPt.get()); @@ -895,7 +897,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx mTrackMCLabelsReverse[iCol][i] = -1; } } - if (mQATasks & taskClusterAttach) { + if (mQATasks & taskClusterAttach && GetNMCLabels()) { mClusterParam.resize(GetNMCLabels()); memset(mClusterParam.data(), 0, mClusterParam.size() * sizeof(mClusterParam[0])); } @@ -1661,7 +1663,25 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx continue; } mTracks->Fill(1.f / fabsf(track.GetParam().GetQPt())); - mNCl->Fill(track.NClustersFitted()); + mNCl[0]->Fill(track.NClustersFitted()); + uint32_t nClCorrected = 0; + int32_t lastSector = -1, lastRow = -1; + const auto& trackClusters = mTracking->mIOPtrs.mergedTrackHits; + for (uint32_t j = 0; j < track.NClusters(); j++) { + if (trackClusters[track.FirstClusterRef() + j].state & GPUTPCGMMergedTrackHit::flagReject) { + continue; + } + if (trackClusters[track.FirstClusterRef() + j].sector == lastSector && trackClusters[track.FirstClusterRef() + j].row == lastRow) { + continue; + } + if (trackClusters[track.FirstClusterRef() + j].leg != trackClusters[track.FirstClusterRef() + track.NClusters() - 1].leg) { + continue; + } + nClCorrected++; + lastSector = trackClusters[track.FirstClusterRef() + j].sector; + lastRow = trackClusters[track.FirstClusterRef() + j].sector; + } + mNCl[1]->Fill(nClCorrected); } if (mClNative && mTracking && mTracking->GetTPCTransformHelper()) { for (uint32_t i = 0; i < GPUChainTracking::NSECTORS; i++) { @@ -2055,12 +2075,15 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) mLTracks = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); SetLegend(mLTracks); - mCNCl = createGarbageCollected("cncl", "Number of clusters per track", 0, 0, 700, 700. * 2. / 3.); - mCNCl->cd(); - mPNCl = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); - mPNCl->Draw(); - mLNCl = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); - SetLegend(mLNCl); + for (int32_t i = 0; i < 2; i++) { + snprintf(name, 2048, "cncl%d Pull", i); + mCNCl[i] = createGarbageCollected(name, i ? "Number of clusters (corrected for multiple per row)" : "Number of clusters per track", 0, 0, 700, 700. * 2. / 3.); + mCNCl[i]->cd(); + mPNCl[i] = createGarbageCollected("p0", "", 0.0, 0.0, 1.0, 1.0); + mPNCl[i]->Draw(); + mLNCl[i] = createGarbageCollected(0.9 - legendSpacingString * 1.45, 0.93 - (0.93 - 0.86) / 2. * (float)ConfigNumInputs, 0.98, 0.949); + SetLegend(mLNCl[i]); + } mCClXY = createGarbageCollected("clxy", "Number of clusters per X / Y", 0, 0, 700, 700. * 2. / 3.); mCClXY->cd(); @@ -2696,47 +2719,51 @@ int32_t GPUQA::DrawQAHistograms(TObjArray* qcout) mCTracks->Print("plots/tracks.root"); } - tmpMax = 0.; - for (int32_t k = 0; k < ConfigNumInputs; k++) { - TH1F* e = mNCl; - if (GetHist(e, tin, k, nNewInput) == nullptr) { - continue; - } - e->SetMaximum(-1111); - if (e->GetMaximum() > tmpMax) { - tmpMax = e->GetMaximum(); - } - } - mPNCl->cd(); - for (int32_t k = 0; k < ConfigNumInputs; k++) { - TH1F* e = mNCl; - if (GetHist(e, tin, k, nNewInput) == nullptr) { - continue; - } - if (tout && !mConfig.inputHistogramsOnly && k == 0) { - e->Write(); + for (int32_t i = 0; i < 2; i++) { + tmpMax = 0.; + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mNCl[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + e->SetMaximum(-1111); + if (e->GetMaximum() > tmpMax) { + tmpMax = e->GetMaximum(); + } } - e->SetMaximum(tmpMax * 1.02); - e->SetMinimum(tmpMax * -0.02); - e->SetStats(kFALSE); - e->SetLineWidth(1); - e->GetYaxis()->SetTitle("a.u."); - e->GetXaxis()->SetTitle("NClusters"); - if (qcout) { - qcout->Add(e); + mPNCl[i]->cd(); + for (int32_t k = 0; k < ConfigNumInputs; k++) { + TH1F* e = mNCl[i]; + if (GetHist(e, tin, k, nNewInput) == nullptr) { + continue; + } + if (tout && !mConfig.inputHistogramsOnly && k == 0) { + e->Write(); + } + e->SetMaximum(tmpMax * 1.02); + e->SetMinimum(tmpMax * -0.02); + e->SetStats(kFALSE); + e->SetLineWidth(1); + e->GetYaxis()->SetTitle("a.u."); + e->GetXaxis()->SetTitle("NClusters"); + if (qcout) { + qcout->Add(e); + } + e->SetMarkerColor(kBlack); + e->SetLineColor(colorNums[k % COLORCOUNT]); + e->Draw(k == 0 ? "" : "same"); + GetName(fname, k); + snprintf(name, 2048, "%sNClusters%d", fname, i); + mLNCl[i]->AddEntry(e, name, "l"); + } + mLNCl[i]->Draw(); + mCNCl[i]->cd(); + snprintf(name, 2048, "plots/nClusters%s.pdf", i ? "_corrected" : ""); + mCNCl[i]->Print(name); + if (mConfig.writeRootFiles) { + snprintf(name, 2048, "plots/nClusters%s.root", i ? "_corrected" : ""); + mCNCl[i]->Print(name); } - e->SetMarkerColor(kBlack); - e->SetLineColor(colorNums[k % COLORCOUNT]); - e->Draw(k == 0 ? "" : "same"); - GetName(fname, k); - snprintf(name, 2048, "%sNClusters", fname); - mLNCl->AddEntry(e, name, "l"); - } - mLNCl->Draw(); - mCNCl->cd(); - mCNCl->Print("plots/nClusters.pdf"); - if (mConfig.writeRootFiles) { - mCNCl->Print("plots/nClusters.root"); } mPClXY->cd(); diff --git a/GPU/GPUTracking/qa/GPUQA.h b/GPU/GPUTracking/qa/GPUQA.h index 32b0553700f90..87900b5279ec0 100644 --- a/GPU/GPUTracking/qa/GPUQA.h +++ b/GPU/GPUTracking/qa/GPUQA.h @@ -299,10 +299,10 @@ class GPUQA TPad* mPTracks; TLegend* mLTracks; - TH1F* mNCl; - TCanvas* mCNCl; - TPad* mPNCl; - TLegend* mLNCl; + TH1F* mNCl[2]; + TCanvas* mCNCl[2]; + TPad* mPNCl[2]; + TLegend* mLNCl[2]; TH2F* mClXY; TCanvas* mCClXY; From 641977cccfa17710faaca7c18bbb7e607957b232 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Wed, 5 Mar 2025 18:41:31 +0100 Subject: [PATCH 0193/1914] Fixing handling of edge clusters Adapting edge correction Fixing edge handling Please consider the following formatting changes Fix for right edge check --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 7 ++++++- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h | 2 +- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index d145aaed705d9..a826cdf71f575 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -97,7 +97,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo) +GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo, Charge* padBoundaryCharges) { mQtot += q; @@ -116,6 +116,11 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime if (CfUtils::isAtEdge(pos, geo.NPads(pos.row()))) { bool leftEdge = (pad < 2); bool correct = (leftEdge) ? (pad < mPadMean) : (pad > mPadMean); + if (leftEdge && pad == 1) { // only check charge at boundary if maximum is at least one pad away from boundary + correct = correct && (padBoundaryCharges[0] > 0); // Only correct if cluster is asymmetric with charge > 0 towards sector boundary, otherwise all charge is found + } else if (!leftEdge && pad == (geo.NPads(pos.row()) - 2)) { + correct = correct && (padBoundaryCharges[1] > 0); + } mPadMean = (correct) ? pad : mPadMean; } } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 26decbf0a5b14..c409a6cced3a5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -40,7 +40,7 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&); + GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&, tpccf::Charge*); GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&) const; private: diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 1aeae812f5193..f28e80aa08201 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -58,6 +58,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); + Charge padBoundaryCharges[2] = {chargeMap[pos.delta({-1, 0})].unpack(), chargeMap[pos.delta({1, 0})].unpack()}; + ClusterAccumulator pc; CPU_ONLY(labelAcc->collect(pos, charge)); @@ -80,7 +82,7 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t } return; } - pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry); + pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry, padBoundaryCharges); tpc::ClusterNative myCluster; bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param()); From 46445fa21a058dd94017a350be1b8f74cf55d476 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 19:26:48 +0100 Subject: [PATCH 0194/1914] GPU TPC CF: Use corrected check also for edge cluster tagging --- GPU/GPUTracking/TPCClusterFinder/CfUtils.h | 5 -- .../TPCClusterFinder/ClusterAccumulator.cxx | 89 +++++++++---------- .../TPCClusterFinder/ClusterAccumulator.h | 4 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 6 +- 4 files changed, 45 insertions(+), 59 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h index 4504b8288aee0..75dcc166abd9b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h @@ -27,11 +27,6 @@ class CfUtils { public: - static GPUdi() bool isAtEdge(const ChargePos& pos, tpccf::GlobalPad padsPerRow) - { - return (pos.pad() < 2 || pos.pad() >= padsPerRow - 2); - } - static GPUdi() bool innerAboveThreshold(uint8_t aboveThreshold, uint16_t outerIdx) { return aboveThreshold & (1 << cfconsts::OuterToInner[outerIdx]); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index a826cdf71f575..77dc6e119df7d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -21,45 +21,6 @@ using namespace o2::gpu; using namespace o2::gpu::tpccf; -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param) const -{ - cn.qTot = CAMath::Float2UIntRn(mQtot); - if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { - return false; - } - cn.qMax = q; - if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { - return false; - } - if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { - return false; - } - if (q <= param.rec.tpc.cfQMaxCutoffSingleTime && mTimeSigma == 0) { - return false; - } - if (q <= param.rec.tpc.cfQMaxCutoffSinglePad && mPadSigma == 0) { - return false; - } - - bool isEdgeCluster = CfUtils::isAtEdge(pos, param.tpcGeometry.NPads(pos.row())); - bool wasSplitInTime = mSplitInTime >= param.rec.tpc.cfMinSplitNum; - bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum; - bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0); - - uint8_t flags = 0; - flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0; - flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0; - flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0; - flags |= (isSingleCluster) ? tpc::ClusterNative::flagSingle : 0; - - cn.setTimeFlags(mTimeMean - param.rec.tpc.clustersShiftTimebinsClusterizer, flags); - cn.setPad(mPadMean); - cn.setSigmaTime(mTimeSigma); - cn.setSigmaPad(mPadSigma); - - return true; -} - GPUd() void ClusterAccumulator::update(Charge splitCharge, Delta2 d) { mQtot += splitCharge; @@ -97,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime timeOffset, const GPUTPCGeometry& geo, Charge* padBoundaryCharges) +GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, TPCTime timeOffset, const Array2D& chargeMap) { mQtot += q; @@ -113,14 +74,48 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, Charge q, TPCTime mPadMean += pad; mTimeMean += timeOffset + pos.time(); - if (CfUtils::isAtEdge(pos, geo.NPads(pos.row()))) { + bool isEdgeCluster = pos.pad() < 2 || pos.pad() >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + if (isEdgeCluster) { bool leftEdge = (pad < 2); - bool correct = (leftEdge) ? (pad < mPadMean) : (pad > mPadMean); - if (leftEdge && pad == 1) { // only check charge at boundary if maximum is at least one pad away from boundary - correct = correct && (padBoundaryCharges[0] > 0); // Only correct if cluster is asymmetric with charge > 0 towards sector boundary, otherwise all charge is found - } else if (!leftEdge && pad == (geo.NPads(pos.row()) - 2)) { - correct = correct && (padBoundaryCharges[1] > 0); + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. + } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { + mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak } - mPadMean = (correct) ? pad : mPadMean; } + + cn.qTot = CAMath::Float2UIntRn(mQtot); + if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { + return false; + } + cn.qMax = q; + if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { + return false; + } + if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { + return false; + } + if (q <= param.rec.tpc.cfQMaxCutoffSingleTime && mTimeSigma == 0) { + return false; + } + if (q <= param.rec.tpc.cfQMaxCutoffSinglePad && mPadSigma == 0) { + return false; + } + + bool wasSplitInTime = mSplitInTime >= param.rec.tpc.cfMinSplitNum; + bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum; + bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0); + + uint8_t flags = 0; + flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0; + flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0; + flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0; + flags |= (isSingleCluster) ? tpc::ClusterNative::flagSingle : 0; + + cn.setTimeFlags(mTimeMean - param.rec.tpc.clustersShiftTimebinsClusterizer, flags); + cn.setPad(mPadMean); + cn.setSigmaTime(mTimeSigma); + cn.setSigmaPad(mPadSigma); + + return true; } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index c409a6cced3a5..73f7cb439775a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -17,6 +17,7 @@ #include "clusterFinderDefs.h" #include "PackedCharge.h" +#include "Array2D.h" namespace o2 { @@ -40,8 +41,7 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() void finalize(const ChargePos&, tpccf::Charge, tpccf::TPCTime, const GPUTPCGeometry&, tpccf::Charge*); - GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&) const; + GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, tpccf::TPCTime, const Array2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index f28e80aa08201..407deb6a588d0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -58,8 +58,6 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); - Charge padBoundaryCharges[2] = {chargeMap[pos.delta({-1, 0})].unpack(), chargeMap[pos.delta({1, 0})].unpack()}; - ClusterAccumulator pc; CPU_ONLY(labelAcc->collect(pos, charge)); @@ -82,10 +80,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t } return; } - pc.finalize(pos, charge, fragment.start, clusterer.Param().tpcGeometry, padBoundaryCharges); - tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param()); + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); if (rejectCluster) { if (clusterPosInRow) { From dffee581d1402171d2088cb1c5d4d5ad76d7bace Mon Sep 17 00:00:00 2001 From: iravasen Date: Thu, 13 Mar 2025 11:20:27 +0100 Subject: [PATCH 0195/1914] Refine calculations for rise time and ToT (#14039) --- .../workflow/src/ThresholdCalibratorSpec.cxx | 46 +++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx index 075a4869210e1..b651bfdedf3aa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/ThresholdCalibratorSpec.cxx @@ -1001,8 +1001,8 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mFitType = NO_FIT; this->mMin = 0; this->mMax = 400; // strobe delay goes from 0 to 400 (included) in steps of 4 - this->mStep = 4; - this->mStrobeWindow = 5; // it's 4 but it corresponds to 4+1 (as from alpide manual) + this->mStep = 1; + this->mStrobeWindow = 1; // it's 0 but it corresponds to 0+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mCheckExactRow = true; } else if (runtype == TOT_CALIBRATION_1_ROW) { @@ -1013,7 +1013,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mMin = 0; this->mMax = 2000; // strobe delay goes from 0 to 2000 in steps of 10 this->mStep = 10; - this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) + this->mStrobeWindow = 10; // it's 9 but it corresponds to 9+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mMin2 = 0; // charge min this->mMax2 = 170; // charge max @@ -1028,7 +1028,7 @@ void ITSThresholdCalibrator::setRunType(const short int& runtype) this->mMin = 300; this->mMax = 1100; // strobe delay goes from 300 to 1100 (included) in steps of 10 this->mStep = 10; - this->mStrobeWindow = 2; // it's 1 but it corresponds to 1+1 (as from alpide manual) + this->mStrobeWindow = 10; // it's 9 but it corresponds to 9+1 (as from alpide manual) this->N_RANGE = (mMax - mMin) / mStep + 1; this->mMin2 = 30; // charge min this->mMax2 = 60; // charge max @@ -1116,39 +1116,39 @@ std::vector ITSThresholdCalibrator::calculatePulseParams(const short int& for (auto itrow = mPixelHits[chipID].begin(); itrow != mPixelHits[chipID].end(); itrow++) { // loop over the chip rows short int row = itrow->first; - for (short int col_i = 0; col_i < this->N_COL; col_i++) { // loop over the pixels on the row - for (short int sdel_i = 0; sdel_i < this->N_RANGE; sdel_i++) { // loop over the strobe delays - if (mPixelHits[chipID][row][col_i][0][sdel_i] > 0 && mPixelHits[chipID][row][col_i][0][sdel_i] < nInj && rt_mindel < 0) { // from left, the last bin with 0 hits or the first with some hits - rt_mindel = sdel_i > 0 ? ((sdel_i - 1) * mStep) + 1 : (sdel_i * mStep) + 1; // + 1 because if delay = n, we get n+1 in reality (ALPIDE feature) + for (short int col_i = 0; col_i < this->N_COL; col_i++) { // loop over the pixels on the row + for (short int sdel_i = 0; sdel_i < this->N_RANGE; sdel_i++) { // loop over the strobe delays + if (mPixelHits[chipID][row][col_i][0][sdel_i] > 0.1 * nInj && mPixelHits[chipID][row][col_i][0][sdel_i] < nInj && rt_mindel < 0) { // from left, first bin with 10% hits and 90% hits + rt_mindel = (sdel_i * mStep) + 1; // + 1 because if delay = n, we get n+1 in reality (ALPIDE feature) } - if (mPixelHits[chipID][row][col_i][0][sdel_i] == nInj) { + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.9 * nInj) { // for Rt max take the 90% point rt_maxdel = (sdel_i * mStep) + 1; + break; + } + } + for (short int sdel_i = 0; sdel_i < N_RANGE; sdel_i++) { + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.5 * nInj) { // for ToT take the 50% point tot_mindel = (sdel_i * mStep) + 1; break; } } - for (short int sdel_i = N_RANGE - 1; sdel_i >= 0; sdel_i--) { // from right, the first bin with nInj hits - if (mPixelHits[chipID][row][col_i][0][sdel_i] == nInj) { + for (short int sdel_i = N_RANGE - 1; sdel_i >= 0; sdel_i--) { // from right, the first bin with 50% nInj hits + if (mPixelHits[chipID][row][col_i][0][sdel_i] >= 0.5 * nInj) { tot_maxdel = (sdel_i * mStep) + 1; break; } } if (tot_maxdel > tot_mindel && tot_mindel >= 0 && tot_maxdel >= 0) { - sumTot += tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2); - sumSqTot += (tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2)) * (tot_maxdel - tot_mindel - (int)(mStrobeWindow / 2)); + sumTot += tot_maxdel - tot_mindel - mStrobeWindow; + sumSqTot += (tot_maxdel - tot_mindel - mStrobeWindow) * (tot_maxdel - tot_mindel - mStrobeWindow); countTot++; } - if (rt_maxdel > rt_mindel && rt_maxdel > 0) { - if (rt_mindel < 0) { - sumRt += mStep + (int)(mStrobeWindow / 2); // resolution -> in case the rise is "instantaneous" - sumSqRt += (mStep + (int)(mStrobeWindow / 2)) * (mStep + (int)(mStrobeWindow / 2)); - } else { - sumRt += rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2); - sumSqRt += (rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2)) * (rt_maxdel - rt_mindel + (int)(mStrobeWindow / 2)); - } + if (rt_maxdel > rt_mindel && rt_maxdel > 0 && rt_mindel > 0) { + sumRt += rt_maxdel - rt_mindel + mStrobeWindow; + sumSqRt += (rt_maxdel - rt_mindel + mStrobeWindow) * (rt_maxdel - rt_mindel + mStrobeWindow); countRt++; } @@ -1232,8 +1232,8 @@ std::vector ITSThresholdCalibrator::calculatePulseParams2D(const short in } if (maxPl > tot_mindel && tot_mindel < 1e7 && maxPl >= 0) { // ToT - sumTot += maxPl - tot_mindel - (int)(mStrobeWindow / 2); - sumSqTot += (maxPl - tot_mindel - (int)(mStrobeWindow / 2)) * (maxPl - tot_mindel - (int)(mStrobeWindow / 2)); + sumTot += maxPl - tot_mindel - mStrobeWindow; + sumSqTot += (maxPl - tot_mindel - mStrobeWindow) * (maxPl - tot_mindel - mStrobeWindow); countTot++; } From 46380fc7fbd16567c6acae29dc991005a5b9b974 Mon Sep 17 00:00:00 2001 From: Chunzheng Wang <83008337+ChunzhengLab@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:21:59 +0100 Subject: [PATCH 0196/1914] AlpideResponse adjust for APTS (#13929) Signed-off-by: Chunzheng Wang --- .../AlpideResponseData/AlpideResponse.cxx | 66 ++++++++++++++----- .../ITSMFTSimulation/AlpideSimResponse.h | 4 +- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx b/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx index f4c71d85df24e..86ffb24b239ed 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/AlpideResponse.cxx @@ -14,55 +14,87 @@ #include "ITSMFTSimulation/AlpideSimResponse.h" #include #include +#include #include #include #include #include #include -void alpideResponse(const std::string& inpath = "./", - const std::string& outpath = "./", - const std::string& response_file = "AlpideResponseData.root") +void alpideResponse(const std::string& inpath, const std::string& outpath, const std::string& chip_name) { + // Check input path validity + if (gSystem->AccessPathName(inpath.c_str())) { + throw std::invalid_argument("Input path does not exist or is inaccessible: " + inpath); + } + + // Check output path validity + if (gSystem->AccessPathName(outpath.c_str(), kWritePermission)) { + throw std::invalid_argument("Output path is not writable: " + outpath); + } o2::itsmft::AlpideSimResponse resp0, resp1; - resp0.initData(0, inpath.data()); - resp1.initData(1, inpath.data()); + if (chip_name == "Alpide") { + resp0.initData(0, inpath.c_str()); + resp1.initData(1, inpath.c_str()); + } else if (chip_name == "APTS") { + resp1.setColMax(1.5e-4); + resp1.setRowMax(1.5e-4); + resp1.initData(1, inpath.c_str()); + } else { + throw std::invalid_argument("Unknown chip name: " + chip_name); + } - auto file = TFile::Open((outpath + response_file).data(), "recreate"); - file->WriteObjectAny(&resp0, "o2::itsmft::AlpideSimResponse", "response0"); + std::string output_file = outpath + "/" + chip_name + "ResponseData.root"; + auto file = TFile::Open(output_file.c_str(), "recreate"); + + if (!file || file->IsZombie()) { + throw std::runtime_error("Failed to create output file: " + output_file); + } else if (chip_name == "Alpide") { + file->WriteObjectAny(&resp0, "o2::itsmft::AlpideSimResponse", "response0"); + } file->WriteObjectAny(&resp1, "o2::itsmft::AlpideSimResponse", "response1"); file->Close(); + delete file; } int main(int argc, const char* argv[]) { namespace bpo = boost::program_options; bpo::variables_map vm; - bpo::options_description options("Alpide reponse generator options"); - options.add_options()( - "inputdir,i", bpo::value()->default_value("./"), "Path where Vbb-0.0V and Vbb-3.0V are located.")( - "outputdir,o", bpo::value()->default_value("./"), "Path where to store the output.")( - "name,n", bpo::value()->default_value("AlpideResponseData.root"), "Output file name."); + bpo::options_description options("Alpide response generator options"); + options.add_options()("inputdir,i", bpo::value()->default_value("./"), "Path where Vbb-0.0V and Vbb-3.0V are located.")("outputdir,o", bpo::value()->default_value("./"), "Path where to store the output.")("chip,c", bpo::value()->default_value("Alpide"), "Chip name (Alpide or APTS)."); try { bpo::store(parse_command_line(argc, argv, options), vm); + if (vm.count("help")) { std::cout << options << std::endl; - return 1; + return 0; } + bpo::notify(vm); } catch (const bpo::error& e) { std::cerr << e.what() << "\n\n"; std::cerr << "Error parsing command line arguments. Available options:\n"; - std::cerr << options << std::endl; return 2; } - std::cout << "Generating " << vm["inputdir"].as() + vm["name"].as() << std::endl; - alpideResponse(vm["inputdir"].as(), vm["outputdir"].as(), vm["name"].as()); + try { + std::cout << "Generating response for chip: " << vm["chip"].as() << std::endl; + std::cout << "Input directory: " << vm["inputdir"].as() << std::endl; + std::cout << "Output directory: " << vm["outputdir"].as() << std::endl; + + alpideResponse(vm["inputdir"].as(), + vm["outputdir"].as(), + vm["chip"].as()); + std::cout << "Response file generated successfully." << std::endl; + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } return 0; -} \ No newline at end of file +} diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h index 0462115d3bfc6..92656a16257a1 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h @@ -132,6 +132,8 @@ class AlpideSimResponse float getStepCol() const { return mStepInvCol ? 1. / mStepInvCol : 0.f; } float getStepRow() const { return mStepInvRow ? 1. / mStepInvRow : 0.f; } float getStepDepth() const { return mStepInvDpt ? 1. / mStepInvDpt : 0.f; } + void setColMax(float v) noexcept { mColMax = v; } + void setRowMax(float v) noexcept { mRowMax = v; } void setDataPath(const std::string pth) { mDataPath = pth; } void setGridColName(const std::string nm) { mGridColName = nm; } void setGridRowName(const std::string nm) { mGridRowName = nm; } @@ -142,7 +144,7 @@ class AlpideSimResponse const std::string& getColRowDataFmt() const { return mColRowDataFmt; } void print() const; - ClassDefNV(AlpideSimResponse, 1); + ClassDefNV(AlpideSimResponse, 2); }; //----------------------------------------------------- From c0831adaeecc26ceff98d7f1aae8b2e8b3c46741 Mon Sep 17 00:00:00 2001 From: Cas van Veen - They/them <96796377+Cas1997@users.noreply.github.com> Date: Thu, 13 Mar 2025 11:42:23 +0100 Subject: [PATCH 0197/1914] ALICE3: TRK now hosts IRIS tracker which is divided into petal cases and made retractable (#13953) * ALICE3: TRK now hosts IRIS tracker which is divided into petal cases and made retractable * Fixed hits in sensitive volumes + fixed CMakeLists.txt * Applied clang * Uncommented services * Final bug fix * Fixed placement of the beam pipe from earlier PR * Applied clang --------- Co-authored-by: Matteo Concas --- .../ALICE3/FT3/simulation/src/Detector.cxx | 17 +- .../include/Alice3DetectorsPassive/Pipe.h | 12 +- .../Upgrades/ALICE3/Passive/src/Pipe.cxx | 144 ++----------- .../TRK/base/include/TRKBase/TRKBaseParam.h | 2 + .../ALICE3/TRK/simulation/CMakeLists.txt | 8 +- .../include/TRKSimulation/Detector.h | 7 +- .../include/TRKSimulation/TRKPetalCase.h | 93 ++++++++ .../include/TRKSimulation/TRKPetalDisk.h | 64 ++++++ .../include/TRKSimulation/TRKPetalLayer.h | 61 ++++++ .../include/TRKSimulation/TRKServices.h | 12 +- .../ALICE3/TRK/simulation/src/Detector.cxx | 108 +++++++--- .../TRK/simulation/src/TRKPetalCase.cxx | 202 ++++++++++++++++++ .../TRK/simulation/src/TRKPetalDisk.cxx | 94 ++++++++ .../TRK/simulation/src/TRKPetalLayer.cxx | 79 +++++++ .../ALICE3/TRK/simulation/src/TRKServices.cxx | 96 ++++----- .../TRK/simulation/src/TRKSimulationLinkDef.h | 3 + macro/build_geometry.C | 2 +- 17 files changed, 764 insertions(+), 240 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx diff --git a/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx b/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx index 296bec8aa8922..ce132fdb33cd3 100644 --- a/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx +++ b/Detectors/Upgrades/ALICE3/FT3/simulation/src/Detector.cxx @@ -184,8 +184,8 @@ void Detector::buildBasicFT3(const FT3BaseParam& param) //_________________________________________________________________________________________________ void Detector::buildFT3V1() { - //Build FT3 detector according to - //https://indico.cern.ch/event/992488/contributions/4174473/attachments/2168881/3661331/tracker_parameters_werner_jan_11_2021.pdf + // Build FT3 detector according to + // https://indico.cern.ch/event/992488/contributions/4174473/attachments/2168881/3661331/tracker_parameters_werner_jan_11_2021.pdf LOG(info) << "Building FT3 Detector: V1"; @@ -284,17 +284,15 @@ void Detector::buildFT3NewVacuumVessel() // to adhere to the changes that were presented at the ALICE 3 Upgrade days in March 2024 // Inner radius at C-side to 7 cm // Inner radius at A-side stays at 5 cm + // 06.02.2025 update: IRIS layers are now in TRK LOG(info) << "Building FT3 Detector: After Upgrade Days March 2024 version"; - mNumberOfLayers = 12; + mNumberOfLayers = 9; float sensorThickness = 30.e-4; float layersx2X0 = 1.e-2; std::vector> layersConfigCSide{ - {26., .5, 2.5, 0.1f * layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} - {30., .5, 2.5, 0.1f * layersx2X0}, - {34., .5, 2.5, 0.1f * layersx2X0}, - {77., 7.0, 35., layersx2X0}, + {77., 7.0, 35., layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} {100., 7.0, 35., layersx2X0}, {122., 7.0, 35., layersx2X0}, {150., 7.0, 68.f, layersx2X0}, @@ -305,10 +303,7 @@ void Detector::buildFT3NewVacuumVessel() {350., 7.0, 68.f, layersx2X0}}; std::vector> layersConfigASide{ - {26., .5, 2.5, 0.1f * layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} - {30., .5, 2.5, 0.1f * layersx2X0}, - {34., .5, 2.5, 0.1f * layersx2X0}, - {77., 5.0, 35., layersx2X0}, + {77., 5.0, 35., layersx2X0}, // {z_layer, r_in, r_out, Layerx2X0} {100., 5.0, 35., layersx2X0}, {122., 5.0, 35., layersx2X0}, {150., 5.0, 68.f, layersx2X0}, diff --git a/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h b/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h index 1d9858e2dfec3..b13d9ab68780a 100644 --- a/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h +++ b/Detectors/Upgrades/ALICE3/Passive/include/Alice3DetectorsPassive/Pipe.h @@ -26,8 +26,6 @@ class Alice3Pipe : public Alice3PassiveBase ~Alice3Pipe() override; Alice3Pipe(const char* name, const char* title = "Alice 3 Pipe", - const bool isTRKActivated = false, - const bool isFT3Activated = false, const float pipeRIn = 0.f, const float pipeThickness = 0.f, const float a3ipLength = 0.f, @@ -50,9 +48,6 @@ class Alice3Pipe : public Alice3PassiveBase float getVacuumVesselWidth() const { return mVacuumVesselThick; } float getVacuumVesselLength() const { return mVacuumVesselASideLength; } - bool IsTRKActivated() const { return mIsTRKActivated; } - bool IsFT3Activated() const { return mIsFT3Activated; } - private: void createMaterials(); Alice3Pipe(const Alice3Pipe& orig) = default; @@ -62,13 +57,10 @@ class Alice3Pipe : public Alice3PassiveBase float mPipeThick = 0.; // inner beam pipe section thickness float mA3IPLength = 0.; // Length of A3IP - float mVacuumVesselRIn = 0.; // inner diameter of the vacuum vessel - float mVacuumVesselThick = 0.; // outer beam pipe section thickness + float mVacuumVesselRIn = 0.; // inner diameter of the vacuum vessel + float mVacuumVesselThick = 0.; // outer beam pipe section thickness float mVacuumVesselASideLength = 0.; // Length of the A Side of the vacuum vessel around the IP - bool mIsTRKActivated = true; // If TRK is not active don't create TRK layers allocations in the vacuum volume - bool mIsFT3Activated = true; - ClassDefOverride(Alice3Pipe, 1); }; } // namespace passive diff --git a/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx b/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx index 57f30241bd4ff..7dfd26a79b38d 100644 --- a/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx +++ b/Detectors/Upgrades/ALICE3/Passive/src/Pipe.cxx @@ -12,8 +12,8 @@ #include "Alice3DetectorsPassive/Pipe.h" #include #include -#include -#include +#include "TGeoTube.h" +#include "TVirtualMC.h" #include "TGeoManager.h" // for TGeoManager, gGeoManager #include "TGeoMaterial.h" // for TGeoMaterial #include "TGeoMedium.h" // for TGeoMedium @@ -31,8 +31,6 @@ using namespace o2::passive; Alice3Pipe::Alice3Pipe() : Alice3PassiveBase{"Alice3PIPE", ""} {} Alice3Pipe::Alice3Pipe(const char* name, const char* title, - bool isTRKActivated, - bool isFT3Activated, float pipeRIn, float pipeThickness, float a3ipLength, @@ -40,8 +38,6 @@ Alice3Pipe::Alice3Pipe(const char* name, float vacuumVesselThickness, float vacuumVesselASideLength) : Alice3PassiveBase{name, title}, - mIsTRKActivated{isTRKActivated}, - mIsFT3Activated{isFT3Activated}, mPipeRIn{pipeRIn}, mPipeThick{pipeThickness}, mA3IPLength{a3ipLength}, @@ -85,7 +81,6 @@ void Alice3Pipe::ConstructGeometry() auto& matmgr = o2::base::MaterialManager::Instance(); const TGeoMedium* kMedBe = matmgr.getTGeoMedium("ALICE3_PIPE_BE"); - const TGeoMedium* kMedVac = matmgr.getTGeoMedium("ALICE3_PIPE_VACUUM"); // Top volume TGeoVolume* top = gGeoManager->GetVolume("cave"); @@ -96,96 +91,11 @@ void Alice3Pipe::ConstructGeometry() // We split the naming of the parts if the beam pipe for ALICE 3 into parts // - pipe A Side - // - vacuum vessel (which hosts the primary vacuum and covers all C Side as well) - // - iris vacuum vessel (which hosts the secondary vacuum) + // - pipe C Side (which hosts the primary vacuum vessel and covers all C Side as well) // A3IP update - // Vacuum - Double_t pipeASideLength = mA3IPLength / 2. - mVacuumVesselThick - mVacuumVesselASideLength; - Double_t pipeCSideLength = mA3IPLength / 2. + mVacuumVesselASideLength; - TGeoTube* vacuumBasePipe = new TGeoTube("PIPEVACUUM_BASEsh", 0., mPipeRIn, mA3IPLength / 2.); - TGeoTube* vacuumBaseVacuumVessel = new TGeoTube("VACUUM_VESSELVACUUM_BASEsh", mPipeRIn, mVacuumVesselRIn, pipeCSideLength / 2.); - - TGeoTranslation* posPipeCSide = new TGeoTranslation("PIPE_CSIDE_POSITION", 0, 0, mVacuumVesselASideLength - pipeCSideLength / 2.); - posPipeCSide->RegisterYourself(); - // Excavate volumes from the vacuum such that there is place for the TRK barrel layers and FT3 disc layers of the IRIS tracker - // And the other passive shapes: coldplate, iris tracker vacuum vessel - TGeoCompositeShape* vacuumComposite; - TGeoVolume* vacuumVolume; - TString compositeFormula{"PIPEVACUUM_BASEsh+VACUUM_VESSELVACUUM_BASEsh:PIPE_CSIDE_POSITION"}; - TString subtractorsFormula; - - if (!mIsTRKActivated) { - std::vector trkLayerShapes; - - std::vector> layersQuotas = {std::array{0.5f, 50.f, 100.e-4}, // TODO: Set layers dynamically. {radius, zLen, thickness} - std::array{1.2f, 50.f, 100.e-4}, - std::array{2.5f, 50.f, 100.e-4}}; - - for (auto iLayer{0}; iLayer < layersQuotas.size(); ++iLayer) { // Create TRK layers shapes - auto& layerData = layersQuotas[iLayer]; - trkLayerShapes.emplace_back(new TGeoTube(Form("TRKLAYER_%dsh", iLayer), layerData[0], layerData[0] + layerData[2], layerData[1] / 2.)); - if (iLayer != 0) { - subtractorsFormula += "+"; - } - subtractorsFormula += Form("TRKLAYER_%dsh", iLayer); - } - - // IRIS vacuum vessel and coldplate dimensions - float coldplateRIn = 2.6f; // cm - float coldplateThick = 150.e-3; // cm - float coldplateLength = 50.f; // cm - float irisVacuumVesselInnerRIn = 0.48f; // cm - float irisVacuumVesselOuterRIn = coldplateRIn + coldplateThick; - float irisVacuumVesselLength = 70.f; // cm - float irisVacuumVesselThick = 150.e-4; // cm - - // Excavate vacuum for hosting cold plate and IRIS tracker - TGeoTube* coldPlate = new TGeoTube("TRK_COLDPLATEsh", coldplateRIn, coldplateRIn + coldplateThick, coldplateLength / 2.); - subtractorsFormula += "+TRK_COLDPLATEsh"; - - TGeoTube* irisVacuumVesselInner = new TGeoTube("TRK_IRISVACUUMVESSELINNERsh", irisVacuumVesselInnerRIn, irisVacuumVesselInnerRIn + irisVacuumVesselThick, irisVacuumVesselLength / 2.); - subtractorsFormula += "+TRK_IRISVACUUMVESSELINNERsh"; - - TGeoTube* irisVacuumVesselOuter = new TGeoTube("TRK_IRISVACUUMVESSELOUTERsh", irisVacuumVesselOuterRIn, irisVacuumVesselOuterRIn + irisVacuumVesselThick, irisVacuumVesselLength / 2.); - subtractorsFormula += "+TRK_IRISVACUUMVESSELOUTERsh"; - - TGeoTube* irisVacuumVesselWall = new TGeoTube("TRK_IRISVACUUMVESSELWALLsh", irisVacuumVesselInnerRIn, irisVacuumVesselOuterRIn + irisVacuumVesselThick, irisVacuumVesselThick / 2.); - TGeoTranslation* posIrisVacVWallNegZSide = new TGeoTranslation("IRISWALLNEGZ", 0., 0., -irisVacuumVesselLength / 2. - irisVacuumVesselThick / 2.); - posIrisVacVWallNegZSide->RegisterYourself(); - subtractorsFormula += "+TRK_IRISVACUUMVESSELWALLsh:IRISWALLNEGZ"; - - TGeoTranslation* posIrisVacVWallPosZSide = new TGeoTranslation("IRISWALLPOSZ", 0., 0., irisVacuumVesselLength / 2. + irisVacuumVesselThick / 2.); - posIrisVacVWallPosZSide->RegisterYourself(); - subtractorsFormula += "+TRK_IRISVACUUMVESSELWALLsh:IRISWALLPOSZ"; - } - - if (!mIsFT3Activated) { - std::vector ft3DiscShapes; - std::vector ft3DiscPositions; - - std::vector> discsQuotas = {std::array{0.5f, 2.5f, 100.e-4, 26.}, // TODO: Set discs dynamically. {rIn, rOut, thickness, zpos} - std::array{0.5f, 2.5f, 100.e-4, 30.}, - std::array{0.5f, 2.5f, 100.e-4, 34.}, - std::array{0.5f, 2.5f, 100.e-4, -26.}, - std::array{0.5f, 2.5f, 100.e-4, -30.}, - std::array{0.5f, 2.5f, 100.e-4, -34.}}; - TString tempSubtractorsFormula = ""; - if (!mIsTRKActivated) { - tempSubtractorsFormula = "+"; - } - for (auto iDisc{0}; iDisc < discsQuotas.size(); ++iDisc) { - auto& discData = discsQuotas[iDisc]; - ft3DiscShapes.emplace_back(new TGeoTube(Form("FT3DISC_%dsh", iDisc), discData[0], discData[1], discData[2] / 2.)); - ft3DiscPositions.emplace_back(new TGeoTranslation(Form("t%d", iDisc), 0., 0., discData[3])); - ft3DiscPositions[iDisc]->RegisterYourself(); - if (iDisc != 0) { - tempSubtractorsFormula += "+"; - } - tempSubtractorsFormula += Form("FT3DISC_%dsh:t%d", iDisc, iDisc); - } - subtractorsFormula += tempSubtractorsFormula; - } + Double_t pipeASideLength = mA3IPLength / 2. - mVacuumVesselThick - mVacuumVesselASideLength / 2.; + Double_t pipeCSideLength = mA3IPLength / 2. + mVacuumVesselASideLength / 2.; // Pipe tubes TGeoTube* pipeASide = new TGeoTube("PIPE_Ash", mPipeRIn, mPipeRIn + mPipeThick, pipeASideLength / 2.); @@ -193,37 +103,25 @@ void Alice3Pipe::ConstructGeometry() TGeoTube* vacuumVesselWall = new TGeoTube("VACUUM_VESSEL_WALLsh", mPipeRIn, mVacuumVesselRIn + mVacuumVesselThick, mVacuumVesselThick / 2.); // Pipe and vacuum vessel positions - TGeoTranslation* posVacuumVesselWall = new TGeoTranslation("WALL_POSITION", 0, 0, mVacuumVesselASideLength + mVacuumVesselThick / 2.); - posVacuumVesselWall->RegisterYourself(); - TGeoTranslation* posPipeASide = new TGeoTranslation("PIPE_ASIDE_POSITION", 0, 0, mVacuumVesselASideLength + mVacuumVesselThick + pipeASideLength / 2.); + TGeoTranslation* posPipeASide = new TGeoTranslation("PIPE_ASIDE_POSITION", 0, 0, mVacuumVesselASideLength / 2. + mVacuumVesselThick + pipeASideLength / 2.); posPipeASide->RegisterYourself(); + TGeoTranslation* posPipeCSide = new TGeoTranslation("PIPE_CSIDE_POSITION", 0, 0, mVacuumVesselASideLength / 2. - pipeCSideLength / 2.); + posPipeCSide->RegisterYourself(); + TGeoTranslation* posVacuumVesselWall = new TGeoTranslation("WALL_POSITION", 0, 0, mVacuumVesselASideLength / 2. + mVacuumVesselThick / 2.); + posVacuumVesselWall->RegisterYourself(); // Pipe composite shape and volume TString pipeCompositeFormula = - "VACUUM_VESSEL_WALLsh:WALL_POSITION" - "+PIPE_Ash:PIPE_ASIDE_POSITION" - "+PIPE_Csh:PIPE_CSIDE_POSITION"; - - if (subtractorsFormula.Length()) { - LOG(info) << "Subtractors formula before : " << subtractorsFormula; - subtractorsFormula = Form("-(%s)", subtractorsFormula.Data()); - LOG(info) << "Subtractors formula after: " << subtractorsFormula; - - vacuumComposite = new TGeoCompositeShape("VACUUM_BASEsh", (compositeFormula + subtractorsFormula).Data()); - vacuumVolume = new TGeoVolume("VACUUM_BASE", vacuumComposite, kMedVac); - } else { - vacuumComposite = new TGeoCompositeShape("VACUUM_BASEsh", compositeFormula.Data()); - vacuumVolume = new TGeoVolume("VACUUM_BASE", vacuumComposite, kMedVac); - } + "PIPE_Ash:PIPE_ASIDE_POSITION" + "+PIPE_Csh:PIPE_CSIDE_POSITION" + "+VACUUM_VESSEL_WALLsh:WALL_POSITION"; TGeoCompositeShape* pipeComposite = new TGeoCompositeShape("A3IPsh", pipeCompositeFormula); TGeoVolume* pipeVolume = new TGeoVolume("A3IP", pipeComposite, kMedBe); // Add everything to the barrel - barrel->AddNode(vacuumVolume, 1, new TGeoTranslation(0, 30.f, 0)); barrel->AddNode(pipeVolume, 1, new TGeoTranslation(0, 30.f, 0)); - vacuumVolume->SetLineColor(kGreen + 3); pipeVolume->SetLineColor(kGreen + 3); } @@ -236,15 +134,6 @@ void Alice3Pipe::createMaterials() float sxmgmx = 10.; o2::base::Detector::initFieldTrackingParams(isxfld, sxmgmx); - // - // Air - // - float aAir[4] = {12.0107, 14.0067, 15.9994, 39.948}; - float zAir[4] = {6., 7., 8., 18.}; - float wAir[4] = {0.000124, 0.755267, 0.231781, 0.012827}; - float dAir = 1.20479E-3; - float dAir1 = 1.20479E-11; - // **************** // Defines tracking media parameters. // @@ -258,13 +147,8 @@ void Alice3Pipe::createMaterials() auto& matmgr = o2::base::MaterialManager::Instance(); // Beryllium - matmgr.Material("ALICE3_PIPE", 5, "BERILLIUM$", 9.01, 4., 1.848, 35.3, 36.7); + matmgr.Material("ALICE3_PIPE", 5, "BERYLLIUM$", 9.01, 4., 1.848, 35.3, 36.7); matmgr.Medium("ALICE3_PIPE", 5, "BE", 5, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); - - // Vacuum - matmgr.Mixture("ALICE3_PIPE", 16, "VACUUM$ ", aAir, zAir, dAir1, 4, wAir); - - matmgr.Medium("ALICE3_PIPE", 16, "VACUUM", 16, 0, isxfld, sxmgmx, tmaxfd, stemax, deemax, epsil, stmin); } // ---------------------------------------------------------------------------- diff --git a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h index 9ea4bd1072d91..63c95b1e6b2f6 100644 --- a/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h +++ b/Detectors/Upgrades/ALICE3/TRK/base/include/TRKBase/TRKBaseParam.h @@ -29,6 +29,8 @@ enum eLayout { struct TRKBaseParam : public o2::conf::ConfigurableParamHelper { std::string configFile = ""; float serviceTubeX0 = 0.02f; // X0 Al2O3 + Bool_t irisOpen = false; + eLayout layoutML = kCylinder; // Type of segmentation for the middle layers eLayout layoutOL = kCylinder; // Type of segmentation for the outer layers diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index 7706c0e10d778..c21b7b9aebbf6 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -13,6 +13,9 @@ o2_add_library(TRKSimulation SOURCES src/TRKLayer.cxx src/Detector.cxx src/TRKServices.cxx + src/TRKPetalCase.cxx + src/TRKPetalLayer.cxx + src/TRKPetalDisk.cxx PUBLIC_LINK_LIBRARIES O2::TRKBase O2::FT3Simulation O2::ITSMFTSimulation) @@ -20,4 +23,7 @@ o2_add_library(TRKSimulation o2_target_root_dictionary(TRKSimulation HEADERS include/TRKSimulation/Detector.h include/TRKSimulation/TRKLayer.h - include/TRKSimulation/TRKServices.h) \ No newline at end of file + include/TRKSimulation/TRKServices.h + include/TRKSimulation/TRKPetalCase.h + include/TRKSimulation/TRKPetalLayer.h + include/TRKSimulation/TRKPetalDisk.h) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h index 5b777641dbe99..31f3da7a00bb4 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Detector.h @@ -17,6 +17,7 @@ #include "TRKSimulation/TRKLayer.h" #include "TRKSimulation/TRKServices.h" +#include "TRKSimulation/TRKPetalCase.h" #include "TRKBase/GeometryTGeo.h" #include @@ -87,7 +88,11 @@ class Detector : public o2::base::DetImpl GeometryTGeo* mGeometryTGeo; //! std::vector* mHits; // ITSMFT ones for the moment std::vector mLayers; - TRKServices mServices; + TRKServices mServices; // Houses the services of the TRK, but not the Iris tracker + std::vector mPetalCases; // Houses the Iris tracker and its services. Created fully in the beam pipe + + std::vector mFirstOrLastLayers; // Names of the first or last layers + bool InsideFirstOrLastLayer(std::string layerName); void defineSensitiveVolumes(); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h new file mode 100644 index 0000000000000..cd45cc98fd177 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalCase.h @@ -0,0 +1,93 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRK_PETALCASE_H +#define ALICEO2_TRK_PETALCASE_H + +#include + +#include "TRKSimulation/TRKPetalLayer.h" +#include "TRKSimulation/TRKPetalDisk.h" +#include "TGeoCompositeShape.h" + +namespace o2 +{ +namespace trk +{ +class TRKPetalCase +{ + public: + TRKPetalCase() = default; + TRKPetalCase(Int_t number, TGeoVolume* motherVolume, Bool_t irisOpen); + ~TRKPetalCase() = default; + + // Sensitive volume list + std::vector mPetalLayers; + std::vector mPetalDisks; + + auto getPetalCaseName() { return mPetalCaseName; } + TString getFullName(); + + private: + void constructCase(TGeoVolume* motherVolume); + void constructColdPlate(TGeoVolume* motherVolume); + void constructDetectionPetals(TGeoVolume* motherVolume); + void addDetectionPetelsToFullComposite(); + + void addToPetalCaseComposite(TString shape) { mFullCompositeFormula += ("+" + shape); } + + Int_t mPetalCaseNumber; // Used to determine rotation and position. 0-3 + Bool_t mOpenState; // At injection energy, the iris tracker is in the open position. During stable beams, it is closed + + TString mPetalCaseName; + TString mFullCompositeFormula; // Used to excavate the petal and all its components from the vacuum + + // Center position of the petal case. 0,0,0 at stable beams (a.k.a. closed state) + Double_t mXPos, mYPos, mZPos; + + Double_t mWallThickness; // cm // Assume all the walls have the same thickness for now + Double_t mRIn; // cm + Double_t mROut; // cm + Double_t mRInOpenState; // cm + Double_t mPetalCaseLength; // cm + + Double_t mAngularCoverageAzimuthalWall; // Rad // Angular coverage of azimuthal part of wall (equivalent to that of the sensitive volumes) + Double_t mAngularCoverageRadialWall; // Rad // Angular coverage of radial part of wall + Double_t mToDeg; + + // Petal case parts -> In one composite shape + TGeoTubeSeg* mInnerAzimuthalWall; + TGeoTubeSeg* mOuterAzimuthalWall; + TGeoTubeSeg* mRadialWall; + TGeoTubeSeg* mForwardWall; + + TGeoRotation* mAzimuthalWallRot; + TGeoRotation* mRadialWall1Rot; + TGeoRotation* mRadialWall2Rot; + + TGeoCombiTrans* mAzimuthalWallCombiTrans; + TGeoCombiTrans* mRadialWall1CombiTrans; + TGeoCombiTrans* mRadialWall2CombiTrans; + TGeoCombiTrans* mForwardWall1CombiTrans; + TGeoCombiTrans* mForwardWall2CombiTrans; + + TGeoVolume* mPetalCaseVolume; + + // Cold plate + TGeoTubeSeg* mColdPlate; + TGeoVolume* mColdPlateVolume; + + ClassDef(TRKPetalCase, 1); +}; + +} // namespace trk +} // namespace o2 +#endif // ALICEO2_TRK_PETALCASE_H \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h new file mode 100644 index 0000000000000..465f52eb8d41b --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalDisk.h @@ -0,0 +1,64 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file TRKPetalDisk.h +/// \brief Definition of the TRKPetalDisk class + +#ifndef ALICEO2_TRK_PETAL_DISK_H_ +#define ALICEO2_TRK_PETAL_DISK_H_ + +#include "TGeoManager.h" // for gGeoManager +#include "Rtypes.h" // for Double_t, Int_t, Bool_t, etc +#include // for LOG + +namespace o2 +{ +namespace trk +{ + +/// This class defines the Geometry for the TRK Disk TGeo. +class TRKPetalDisk +{ + public: + TRKPetalDisk() = default; + TRKPetalDisk(Int_t diskNumber, std::string diskName, Float_t z, Float_t rIn, Float_t rOut, Float_t angularCoverage, Float_t Diskx2X0); + ~TRKPetalDisk() = default; + + auto getInnerRadius() const { return mInnerRadius; } + auto getOuterRadius() const { return mOuterRadius; } + auto getThickness() const { return mChipThickness; } + auto getAngularCoverage() const { return mAngularCoverage; } + auto getZ() const { return mZ; } + auto getx2X0() const { return mx2X0; } + auto getName() const { return mDiskName; } + auto getSensorName() const { return mSensorName; } + + /// Creates the actual Disk and places inside its mother volume + /// \param motherVolume the TGeoVolume owing the volume structure + void createDisk(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans); + + private: + Int_t mDiskNumber = -1; ///< Current disk number + std::string mDiskName; ///< Current disk name + std::string mSensorName; + Double_t mInnerRadius; ///< Inner radius of this disk + Double_t mOuterRadius; ///< Outer radius of this disk + Double_t mAngularCoverage; + Double_t mZ; ///< Z position of the disk + Double_t mChipThickness; ///< Chip thickness + Double_t mx2X0; ///< Disk material budget x/X0 + + ClassDef(TRKPetalDisk, 1); +}; +} // namespace trk +} // namespace o2 + +#endif // ALICEO2_TRK_PETAL_DISK_H diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h new file mode 100644 index 0000000000000..4e7a7735d51f0 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKPetalLayer.h @@ -0,0 +1,61 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRK_PETAL_LAYER_H +#define ALICEO2_TRK_PETAL_LAYER_H + +#include "TGeoManager.h" +#include +#include "TGeoTube.h" + +#include "TRKBase/TRKBaseParam.h" + +namespace o2 +{ +namespace trk +{ +class TRKPetalLayer +{ + public: + TRKPetalLayer() = default; + TRKPetalLayer(Int_t layerNumber, std::string layerName, Float_t rIn, Float_t angularCoverage, Float_t zLength, Float_t layerX2X0); + ~TRKPetalLayer() = default; + + auto getInnerRadius() const { return mInnerRadius; } + auto getAngularCoverage() const { return mAngularCoverage; } + auto getZLength() { return mZ; } + auto getx2X0() const { return mX2X0; } + auto getChipThickness() const { return mChipThickness; } + auto getNumber() const { return mLayerNumber; } + auto getName() const { return mLayerName; } + auto getSensorName() const { return mSensorName; } + + void createLayer(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans); + + private: + Int_t mLayerNumber; + std::string mLayerName; + std::string mSensorName; + Float_t mInnerRadius; + Float_t mZ; + Float_t mX2X0; + Float_t mChipThickness; + Float_t mModuleWidth; // u.m. = cm + Float_t mAngularCoverage; // rad + + TGeoTubeSeg* mLayer; + + ClassDef(TRKPetalLayer, 1); +}; + +} // namespace trk +} // namespace o2 +#endif // ALICEO2_TRK_PETAL_LAYER_H \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h index 4a12be8572ed5..8dd3968743024 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKServices.h @@ -46,15 +46,18 @@ class TRKServices : public FairModule // =================================================== ---> createOuterServices public: TRKServices() = default; - TRKServices(float rMin, float zLength, float thickness); void createMaterials(); void createServices(TGeoVolume* motherVolume); - void createColdplate(TGeoVolume* motherVolume); void createMiddleServices(TGeoVolume* motherVolume); void createOuterDisksServices(TGeoVolume* motherVolume); void createOuterBarrelServices(TGeoVolume* motherVolume); + void createVacuumCompositeShape(); + void excavateFromVacuum(TString shapeToExcavate); + void registerVacuum(TGeoVolume* motherVolume); protected: + // Vacuum + TString mVacuumCompositeFormula; // Coldplate float mColdPlateRMin; // cm float mColdPlateZLength; // cm @@ -70,11 +73,6 @@ class TRKServices : public FairModule float mMiddleDiskThickness = 1.0; // cm std::vector mCableFanWeights = {0.5, 0.3, 0.2}; // relative weights of the fan layers - // IRIS vacuum vessel - float mRInIRISVacV; // cm - float mROutIRISVacV; // cm - float mZLengthIRISVacV; // cm - float mThicknessIRISVacV; // cm ClassDefOverride(TRKServices, 1); }; } // namespace trk diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx index b9015ce578caf..9b8ffc07b2d0e 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Detector.cxx @@ -80,17 +80,17 @@ void Detector::configDefault() mLayers.clear(); LOGP(warning, "Loading Scoping Document configuration for ALICE3 TRK"); - mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); - mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); - mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); - mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 3.78f, 124.f, 100.e-3); - mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 7.f, 124.f, 100.e-3); - mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 12.f, 124.f, 100.e-3); - mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 20.f, 124.f, 100.e-3); - mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 30.f, 124.f, 100.e-3); - mLayers.emplace_back(8, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(8)}, 45.f, 258.f, 100.e-3); - mLayers.emplace_back(9, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(9)}, 60.f, 258.f, 100.e-3); - mLayers.emplace_back(10, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(10)}, 80.f, 258.f, 100.e-3); + // mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); + // mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); + // mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); + mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 3.78f, 124.f, 100.e-3); + mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 7.f, 124.f, 100.e-3); + mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 12.f, 124.f, 100.e-3); + mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 20.f, 124.f, 100.e-3); + mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 30.f, 124.f, 100.e-3); + mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 45.f, 258.f, 100.e-3); + mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 60.f, 258.f, 100.e-3); + mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 80.f, 258.f, 100.e-3); } void Detector::buildTRKNewVacuumVessel() @@ -103,31 +103,32 @@ void Detector::buildTRKNewVacuumVessel() mLayers.clear(); LOGP(warning, "Loading \"After Upgrade Days March 2024\" configuration for ALICE3 TRK"); - mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); - mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); - mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); - mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 7.f, 124.f, 100.e-3); - mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 9.f, 124.f, 100.e-3); - mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 12.f, 124.f, 100.e-3); - mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 20.f, 124.f, 100.e-3); - mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 30.f, 124.f, 100.e-3); - mLayers.emplace_back(8, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(8)}, 45.f, 258.f, 100.e-3); - mLayers.emplace_back(9, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(9)}, 60.f, 258.f, 100.e-3); - mLayers.emplace_back(10, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(10)}, 80.f, 258.f, 100.e-3); + // mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 0.5f, 50.f, 100.e-4); + // mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 1.2f, 50.f, 100.e-4); + // mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 2.5f, 50.f, 100.e-4); + mLayers.emplace_back(0, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}, 7.f, 124.f, 100.e-3); + LOGP(info, "TRKLayer created. Name: {}", std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(0)}); + mLayers.emplace_back(1, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(1)}, 9.f, 124.f, 100.e-3); + mLayers.emplace_back(2, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(2)}, 12.f, 124.f, 100.e-3); + mLayers.emplace_back(3, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(3)}, 20.f, 124.f, 100.e-3); + mLayers.emplace_back(4, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(4)}, 30.f, 124.f, 100.e-3); + mLayers.emplace_back(5, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(5)}, 45.f, 258.f, 100.e-3); + mLayers.emplace_back(6, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(6)}, 60.f, 258.f, 100.e-3); + mLayers.emplace_back(7, std::string{GeometryTGeo::getTRKLayerPattern() + std::to_string(7)}, 80.f, 258.f, 100.e-3); auto& trkPars = TRKBaseParam::Instance(); // Middle layers + mLayers[0].setLayout(trkPars.layoutML); + mLayers[1].setLayout(trkPars.layoutML); + mLayers[2].setLayout(trkPars.layoutML); mLayers[3].setLayout(trkPars.layoutML); - mLayers[4].setLayout(trkPars.layoutML); - mLayers[5].setLayout(trkPars.layoutML); - mLayers[6].setLayout(trkPars.layoutML); // Outer tracker + mLayers[4].setLayout(trkPars.layoutOL); + mLayers[5].setLayout(trkPars.layoutOL); + mLayers[6].setLayout(trkPars.layoutOL); mLayers[7].setLayout(trkPars.layoutOL); - mLayers[8].setLayout(trkPars.layoutOL); - mLayers[9].setLayout(trkPars.layoutOL); - mLayers[10].setLayout(trkPars.layoutOL); } void Detector::configFromFile(std::string fileName) @@ -173,7 +174,7 @@ void Detector::configToFile(std::string fileName) void Detector::configServices() { - mServices = TRKServices{2.6f, 50.f, 150.e-3}; + mServices = TRKServices(); } void Detector::createMaterials() @@ -237,6 +238,14 @@ void Detector::createGeometry() // Add service for inner tracker mServices.createServices(vTRK); + mPetalCases.clear(); + // Add petal cases (the sensitive layers inside the petal cases get constructed here too) + auto& trkPars = TRKBaseParam::Instance(); + for (Int_t petalCaseNumber = 0; petalCaseNumber < 4; ++petalCaseNumber) { + mPetalCases.emplace_back(petalCaseNumber, vTRK, trkPars.irisOpen); + mServices.excavateFromVacuum(mPetalCases[petalCaseNumber].getFullName()); + } + mServices.registerVacuum(vTRK); } void Detector::InitializeO2Detector() @@ -254,9 +263,35 @@ void Detector::defineSensitiveVolumes() TString volumeName; LOGP(info, "Adding TRK Sensitive Volumes"); + // Add petal case sensitive volumes + for (int petalCase = 0; petalCase < 4; ++petalCase) { + // Petal layers + for (int petalLayer = 0; petalLayer < mPetalCases[petalCase].mPetalLayers.size(); ++petalLayer) { + volumeName = mPetalCases[petalCase].mPetalLayers[petalLayer].getSensorName(); + if (petalLayer == 0) { + mFirstOrLastLayers.push_back(volumeName.Data()); + } + LOGP(info, "Trying {}", volumeName.Data()); + v = geoManager->GetVolume(volumeName.Data()); + LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); + AddSensitiveVolume(v); + } + // Petal disks + for (int petalDisk = 0; petalDisk < mPetalCases[petalCase].mPetalDisks.size(); ++petalDisk) { + volumeName = mPetalCases[petalCase].mPetalDisks[petalDisk].getSensorName(); + LOGP(info, "Trying {}", volumeName.Data()); + v = geoManager->GetVolume(volumeName.Data()); + LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); + AddSensitiveVolume(v); + } + } + // The names of the TRK sensitive volumes have the format: TRKLayer(0...mLayers.size()-1) for (int j{0}; j < mLayers.size(); j++) { volumeName = GeometryTGeo::getTRKSensorPattern() + TString::Itoa(j, 10); + if (j == mLayers.size() - 1) { + mFirstOrLastLayers.push_back(volumeName.Data()); + } LOGP(info, "Trying {}", volumeName.Data()); v = geoManager->GetVolume(volumeName.Data()); LOGP(info, "Adding TRK Sensitive Volume {}", v->GetName()); @@ -284,6 +319,18 @@ void Detector::Reset() } } +bool Detector::InsideFirstOrLastLayer(std::string layerName) +{ + bool inside = false; + for (auto& firstOrLastLayer : mFirstOrLastLayers) { + if (firstOrLastLayer == layerName) { + inside = true; + break; + } + } + return inside; +} + bool Detector::ProcessHits(FairVolume* vol) { // This method is called from the MC stepping @@ -296,7 +343,8 @@ bool Detector::ProcessHits(FairVolume* vol) // Is it needed to keep a track reference when the outer ITS volume is encountered? auto stack = (o2::data::Stack*)fMC->GetStack(); - if (fMC->IsTrackExiting() && (lay == 0 || lay == mLayers.size() - 1)) { + // if (fMC->IsTrackExiting() && (lay == 0 || lay == mLayers.size() - 1)) { + if (fMC->IsTrackExiting() && InsideFirstOrLastLayer(vol->GetName())) { // Keep the track refs for the innermost and outermost layers only o2::TrackReference tr(*fMC, GetDetId()); tr.setTrackID(stack->GetCurrentTrackNumber()); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx new file mode 100644 index 0000000000000..c729d7d1ec4dd --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalCase.cxx @@ -0,0 +1,202 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/TRKPetalCase.h" +#include "TRKBase/GeometryTGeo.h" +#include + +#include "Framework/Logger.h" + +#include "TGeoTube.h" +#include "TGeoMatrix.h" +#include "TGeoCompositeShape.h" +#include "TGeoVolume.h" +#include "TString.h" +#include "TMath.h" + +namespace o2 +{ +namespace trk +{ +TRKPetalCase::TRKPetalCase(Int_t number, TGeoVolume* motherVolume, Bool_t irisOpen) : mPetalCaseNumber(number), mOpenState(irisOpen) +{ + + mWallThickness = .15e-1; // cm // Assume all the walls have the same thickness for now. + mRIn = 0.48; // cm + mROut = 3; // cm + mRInOpenState = 1.5; // cm + mPetalCaseLength = 70.; // cm + + // Calculate angular coverages of azimuthal part of wall (equivalent to that of the sensitive volumes) + mAngularCoverageAzimuthalWall = (0.25 * (2 * mRIn * TMath::Pi()) - 2 * mWallThickness) / mRIn; + mAngularCoverageRadialWall = mWallThickness / mRIn; + mToDeg = 180 / TMath::Pi(); + + // Calculate the center of the petal (x_c, y_c, z_c) based on whether it is open or not + mZPos = 0; + if (mOpenState) { + Double_t rHalfPetal = 0.5 * (mRIn + mROut); + Double_t rOpenStateCenter = TMath::Sqrt(rHalfPetal * rHalfPetal + mRInOpenState * mRInOpenState); + mXPos = rOpenStateCenter * TMath::Cos(0.25 * TMath::Pi() + (mPetalCaseNumber - 1) * 0.5 * TMath::Pi()); + mYPos = rOpenStateCenter * TMath::Sin(0.25 * TMath::Pi() + (mPetalCaseNumber - 1) * 0.5 * TMath::Pi()); + } else { + mXPos = 0.; + mYPos = 0.; + } + + // Make the petal case + constructCase(motherVolume); + // Make coldplate + constructColdPlate(motherVolume); + // Add the detection petals (quarter disks and barrel layers) + constructDetectionPetals(motherVolume); +} + +TString TRKPetalCase::getFullName() +{ + TString fullCompositeName = Form("PETALCASE%d_FULLCOMPOSITE", mPetalCaseNumber); + TGeoCompositeShape* fullCompositeShape = new TGeoCompositeShape(fullCompositeName, mFullCompositeFormula); + return fullCompositeName; +} + +void TRKPetalCase::constructCase(TGeoVolume* motherVolume) +{ + + // Petal case parts in TGeoTubeSeg + mInnerAzimuthalWall = new TGeoTubeSeg(Form("PETAL%d_INNER_AZIMUTHAL_WALL", mPetalCaseNumber), mRIn, mRIn + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + mOuterAzimuthalWall = new TGeoTubeSeg(Form("PETAL%d_OUTER_AZIMUTHAL_WALL", mPetalCaseNumber), mROut, mROut + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + mRadialWall = new TGeoTubeSeg(Form("PETAL%d_RADIAL_WALL", mPetalCaseNumber), mRIn, mROut + mWallThickness, mPetalCaseLength / 2., -0.5 * mAngularCoverageRadialWall * mToDeg, 0.5 * mAngularCoverageRadialWall * mToDeg); + mForwardWall = new TGeoTubeSeg(Form("PETAL%d_FORWARD_WALL", mPetalCaseNumber), mRIn, mROut + mWallThickness, mWallThickness / 2., -0.5 * (mAngularCoverageAzimuthalWall + 2 * mAngularCoverageRadialWall) * mToDeg, 0.5 * (mAngularCoverageAzimuthalWall + 2 * mAngularCoverageRadialWall) * mToDeg); + + // Rotate to correct section : 0-3 + mAzimuthalWallRot = new TGeoRotation((TString)Form("PETAL%d_AZIMUTHAL_WALL_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + 0.5 * mAngularCoverageAzimuthalWall + mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mAzimuthalWallRot->RegisterYourself(); + mRadialWall1Rot = new TGeoRotation((TString)Form("PETAL%d_RADIAL_WALL1_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + 0.5 * mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mRadialWall1Rot->RegisterYourself(); + mRadialWall2Rot = new TGeoRotation((TString)Form("PETAL%d_RADIAL_WALL2_ROT", mPetalCaseNumber), (mPetalCaseNumber * 0.5 * TMath::Pi() + mAngularCoverageAzimuthalWall + 1.5 * mAngularCoverageRadialWall) * mToDeg, 0., 0.); + mRadialWall2Rot->RegisterYourself(); + + // Place to correct position (open or closed) + mAzimuthalWallCombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mAzimuthalWallRot); + mAzimuthalWallCombiTrans->RegisterYourself(); + mRadialWall1CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_RADIAL_WALL1_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mRadialWall1Rot); + mRadialWall1CombiTrans->RegisterYourself(); + mRadialWall2CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_RADIAL_WALL2_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, mZPos, mRadialWall2Rot); + mRadialWall2CombiTrans->RegisterYourself(); + mForwardWall1CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_FORWARD_WALL1_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, (mPetalCaseLength + mWallThickness) / 2., mAzimuthalWallRot); + mForwardWall1CombiTrans->RegisterYourself(); + mForwardWall2CombiTrans = new TGeoCombiTrans((TString)Form("PETAL%d_FORWARD_WALL2_COMBITRANS", mPetalCaseNumber), mXPos, mYPos, -(mPetalCaseLength + mWallThickness) / 2., mAzimuthalWallRot); + mForwardWall2CombiTrans->RegisterYourself(); + + TString petalCaseCompositeFormula = (TString)Form("PETAL%d_INNER_AZIMUTHAL_WALL:PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_OUTER_AZIMUTHAL_WALL:PETAL%d_AZIMUTHAL_WALL_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_RADIAL_WALL:PETAL%d_RADIAL_WALL1_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_RADIAL_WALL:PETAL%d_RADIAL_WALL2_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_FORWARD_WALL:PETAL%d_FORWARD_WALL1_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber) + (TString)Form("+PETAL%d_FORWARD_WALL:PETAL%d_FORWARD_WALL2_COMBITRANS", mPetalCaseNumber, mPetalCaseNumber); + + TGeoCompositeShape* petalCaseComposite = new TGeoCompositeShape((TString)Form("PETALCASE%dsh", mPetalCaseNumber), petalCaseCompositeFormula); + mFullCompositeFormula = petalCaseComposite->GetName(); + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* kMedBe = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_BERYLLIUM"); + + mPetalCaseName = Form("PETALCASE%d", mPetalCaseNumber); + mPetalCaseVolume = new TGeoVolume(mPetalCaseName, petalCaseComposite, kMedBe); + mPetalCaseVolume->SetVisibility(1); + mPetalCaseVolume->SetLineColor(kGray); + + LOGP(info, "Creating IRIS Tracker vacuum petal case {}", mPetalCaseNumber); + LOGP(info, "Inserting {} in {} ", mPetalCaseVolume->GetName(), motherVolume->GetName()); + motherVolume->AddNode(mPetalCaseVolume, 1, nullptr); +} + +void TRKPetalCase::constructColdPlate(TGeoVolume* motherVolume) +{ + Double_t coldPlateRadius = 2.6; // cm + Double_t coldPlateThickness = 0.15; // cm + Double_t coldPlateLength = 50.; // cm + + mColdPlate = new TGeoTubeSeg((TString)Form("PETAL%d_COLDPLATE", mPetalCaseNumber), coldPlateRadius, coldPlateRadius + coldPlateThickness, coldPlateLength / 2., -0.5 * mAngularCoverageAzimuthalWall * mToDeg, 0.5 * mAngularCoverageAzimuthalWall * mToDeg); + auto& matmgr = o2::base::MaterialManager::Instance(); + const TGeoMedium* medCeramic = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_CERAMIC"); + mColdPlateVolume = new TGeoVolume(Form("COLDPLATE%d", mPetalCaseNumber), mColdPlate, medCeramic); + + TString coldPlateCompositeFormula = mColdPlate->GetName(); + coldPlateCompositeFormula += ":"; + coldPlateCompositeFormula += mAzimuthalWallCombiTrans->GetName(); + addToPetalCaseComposite(coldPlateCompositeFormula); + + mColdPlateVolume->SetVisibility(1); + mColdPlateVolume->SetLineColor(kGray); + + LOGP(info, "Creating cold plate service"); + LOGP(info, "Inserting {} in {} ", mColdPlateVolume->GetName(), motherVolume->GetName()); + motherVolume->AddNode(mColdPlateVolume, 1, mAzimuthalWallCombiTrans); +} + +void TRKPetalCase::constructDetectionPetals(TGeoVolume* motherVolume) +{ + // Add petal layers + // layerNumber, layerName, rIn, angularCoverage, zLength, layerx2X0 + mPetalLayers.emplace_back(0, Form("%s_LAYER%d", mPetalCaseName.Data(), 0), 0.5f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + mPetalLayers.emplace_back(1, Form("%s_LAYER%d", mPetalCaseName.Data(), 1), 1.2f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + mPetalLayers.emplace_back(2, Form("%s_LAYER%d", mPetalCaseName.Data(), 2), 2.5f, mAngularCoverageAzimuthalWall, 50.f, 1.e-3); + for (Int_t i = 0; i < mPetalLayers.size(); ++i) { + mPetalLayers[i].createLayer(motherVolume, mAzimuthalWallCombiTrans); + } + + // Add petal disks + // diskNumber, diskName, zPos, rIn, rOut, angularCoverage, diskx2X0 + mPetalDisks.emplace_back(0, Form("%s_DISK%d", mPetalCaseName.Data(), 0), 26., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(1, Form("%s_DISK%d", mPetalCaseName.Data(), 1), 30., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(2, Form("%s_DISK%d", mPetalCaseName.Data(), 2), 34., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(3, Form("%s_DISK%d", mPetalCaseName.Data(), 3), -26., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(4, Form("%s_DISK%d", mPetalCaseName.Data(), 4), -30., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + mPetalDisks.emplace_back(5, Form("%s_DISK%d", mPetalCaseName.Data(), 5), -34., .5, 2.5, mAngularCoverageAzimuthalWall, 1.e-3); + for (Int_t i = 0; i < mPetalDisks.size(); ++i) { + mPetalDisks[i].createDisk(motherVolume, mAzimuthalWallCombiTrans); + } + + addDetectionPetelsToFullComposite(); +} + +void TRKPetalCase::addDetectionPetelsToFullComposite() +{ + for (Int_t i = 0; i < mPetalLayers.size(); ++i) { + Double_t zLength = mPetalLayers[i].getZLength(); + Double_t rIn = mPetalLayers[i].getInnerRadius(); + Double_t thickness = mPetalLayers[i].getChipThickness(); + Double_t angularCoverage = mPetalLayers[i].getAngularCoverage(); + TGeoTubeSeg* layerForExcavation = new TGeoTubeSeg(Form("PETALCASE%d_EXCAVATIONLAYER%d", mPetalCaseNumber, i), rIn, rIn + thickness, zLength / 2., -0.5 * angularCoverage * mToDeg, 0.5 * angularCoverage * mToDeg); + + TString layerForExcavationCompositeFormula = layerForExcavation->GetName(); + layerForExcavationCompositeFormula += ":"; + layerForExcavationCompositeFormula += mAzimuthalWallCombiTrans->GetName(); + addToPetalCaseComposite(layerForExcavationCompositeFormula); + } + + for (Int_t i = 0; i < mPetalDisks.size(); ++i) { + Double_t zPos = mPetalDisks[i].getZ(); + Double_t rIn = mPetalDisks[i].getInnerRadius(); + Double_t rOut = mPetalDisks[i].getOuterRadius(); + Double_t thickness = mPetalDisks[i].getThickness(); + Double_t angularCoverage = mPetalDisks[i].getAngularCoverage(); + TGeoTubeSeg* diskForExcavation = new TGeoTubeSeg(Form("PETALCASE%d_EXCAVATIONDISK%d", mPetalCaseNumber, i), rIn, rOut, thickness / 2., -0.5 * angularCoverage * mToDeg, 0.5 * angularCoverage * mToDeg); + TGeoCombiTrans* diskForExcavationCombiTrans = new TGeoCombiTrans(*(mAzimuthalWallCombiTrans->MakeClone())); // Copy from petal case + diskForExcavationCombiTrans->SetName((TString)Form("PETALCASE%d_EXCAVATIONDISK%d_COMBITRANS", mPetalCaseNumber, i)); + diskForExcavationCombiTrans->SetDz(zPos); // Overwrite z location + diskForExcavationCombiTrans->RegisterYourself(); + + TString diskForExcavationCompositeFormula = diskForExcavation->GetName(); + diskForExcavationCompositeFormula += ":"; + diskForExcavationCompositeFormula += diskForExcavationCombiTrans->GetName(); + addToPetalCaseComposite(diskForExcavationCompositeFormula); + } +} + +// ClassImp(TRKPetalCase); +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx new file mode 100644 index 0000000000000..e24b24b48c882 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalDisk.cxx @@ -0,0 +1,94 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file TRKPetalDisk.cxx +/// \brief Implementation of the TRKPetalDisk class + +#include "TRKSimulation/TRKPetalDisk.h" +#include "TRKBase/GeometryTGeo.h" + +#include // for LOG + +#include "TGeoManager.h" // for TGeoManager, gGeoManager +#include "TGeoMatrix.h" // for TGeoCombiTrans, TGeoRotation, etc +#include "TGeoTube.h" // for TGeoTube, TGeoTubeSeg +#include "TGeoVolume.h" // for TGeoVolume, TGeoVolumeAssembly +#include "TGeoCompositeShape.h" // for TGeoCompositeShape +#include "TMathBase.h" // for Abs +#include "TMath.h" // for Sin, RadToDeg, DegToRad, Cos, Tan, etc +#include "TGeoTube.h" + +#include // for snprintf + +namespace o2 +{ +namespace trk +{ + +TRKPetalDisk::TRKPetalDisk(Int_t diskNumber, std::string diskName, Float_t z, Float_t rIn, Float_t rOut, Float_t angularCoverage, Float_t Diskx2X0) +{ + // Creates a simple parametrized petal disk + mDiskNumber = diskNumber; + mDiskName = diskName; + mZ = z; + mAngularCoverage = angularCoverage; + mx2X0 = Diskx2X0; + mInnerRadius = rIn; + mOuterRadius = rOut; + Float_t Si_X0 = 9.5; + mChipThickness = Diskx2X0 * Si_X0; + + LOG(info) << "Creating TRK Disk " << mDiskNumber; + LOG(info) << " Using silicon X0 = " << Si_X0 << " to emulate disk radiation length."; + LOG(info) << " Disk z = " << mZ << " ; R_in = " << mInnerRadius << " ; R_out = " << mOuterRadius << " ; x2X0 = " << mx2X0 << " ; ChipThickness = " << mChipThickness; +} + +void TRKPetalDisk::createDisk(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans) +{ + // Create tube, set sensitive volume, add to mother volume + Double_t toDeg = 180 / TMath::Pi(); + std::string chipName = mDiskName + "_" + o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mDiskNumber), + sensName = mDiskName + "_" + Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mDiskNumber); + + mSensorName = sensName; + + TGeoTubeSeg* sensor = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* chip = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* disk = new TGeoTubeSeg(mInnerRadius, mOuterRadius, mChipThickness / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + TGeoMedium* medAir = gGeoManager->GetMedium("TRK_AIR$"); + + TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); + sensVol->SetLineColor(kYellow); + TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); + chipVol->SetLineColor(kYellow); + TGeoVolume* diskVol = new TGeoVolume(mDiskName.c_str(), disk, medAir); + diskVol->SetLineColor(kYellow); + + LOG(info) << "Inserting " << sensVol->GetName() << " inside " << chipVol->GetName(); + chipVol->AddNode(sensVol, 1, nullptr); + + LOG(info) << "Inserting " << chipVol->GetName() << " inside " << diskVol->GetName(); + diskVol->AddNode(chipVol, 1, nullptr); + + // Finally put everything in the mother volume + TGeoCombiTrans* fwdPetalCombiTrans = new TGeoCombiTrans(*(combiTrans->MakeClone())); // Copy from petal case + fwdPetalCombiTrans->SetDz(mZ); // Overwrite z location + fwdPetalCombiTrans->RegisterYourself(); + + LOG(info) << "Inserting " << diskVol->GetName() << " inside " << motherVolume->GetName(); + motherVolume->AddNode(diskVol, 1, fwdPetalCombiTrans); +} +// ClassImp(TRKPetalLayer); + +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx new file mode 100644 index 0000000000000..c8ff0d957bb19 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKPetalLayer.cxx @@ -0,0 +1,79 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/TRKPetalLayer.h" +#include "TRKBase/GeometryTGeo.h" + +#include "Framework/Logger.h" + +#include "TGeoTube.h" +#include "TGeoBBox.h" +#include "TGeoVolume.h" +#include "TGeoTube.h" +#include "TGeoMatrix.h" + +#include "TMath.h" + +namespace o2 +{ +namespace trk +{ +TRKPetalLayer::TRKPetalLayer(Int_t layerNumber, std::string layerName, Float_t rIn, Float_t angularCoverage, Float_t zLength, Float_t layerX2X0) + : mLayerNumber(layerNumber), mLayerName(layerName), mInnerRadius(rIn), mAngularCoverage(angularCoverage), mZ(zLength), mX2X0(layerX2X0), mModuleWidth(4.54) +{ + Float_t Si_X0 = 9.5f; + mChipThickness = mX2X0 * Si_X0; + LOGP(info, "Creating layer: id: {} rInner: {} thickness: {} zLength: {} x2X0: {}", mLayerNumber, mInnerRadius, mChipThickness, mZ, mX2X0); +} + +void TRKPetalLayer::createLayer(TGeoVolume* motherVolume, TGeoCombiTrans* combiTrans) +{ + TGeoMedium* medSi = gGeoManager->GetMedium("TRK_SILICON$"); + TGeoMedium* medAir = gGeoManager->GetMedium("TRK_AIR$"); + + std::string staveName = mLayerName + "_" + o2::trk::GeometryTGeo::getTRKStavePattern() + std::to_string(mLayerNumber), + chipName = mLayerName + "_" + o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber), + sensName = mLayerName + "_" + Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mLayerNumber); + + mSensorName = sensName; + + Double_t toDeg = 180 / TMath::Pi(); + mLayer = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoVolume* layerVol = new TGeoVolume(mLayerName.c_str(), mLayer, medAir); + layerVol->SetLineColor(kYellow); + + TGeoTubeSeg* stave = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* chip = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + TGeoTubeSeg* sensor = new TGeoTubeSeg(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2., -0.5 * mAngularCoverage * toDeg, 0.5 * mAngularCoverage * toDeg); + + TGeoVolume* sensVol = new TGeoVolume(sensName.c_str(), sensor, medSi); + sensVol->SetLineColor(kYellow); + TGeoVolume* chipVol = new TGeoVolume(chipName.c_str(), chip, medSi); + chipVol->SetLineColor(kYellow); + TGeoVolume* staveVol = new TGeoVolume(staveName.c_str(), stave, medSi); + staveVol->SetLineColor(kYellow); + + LOGP(info, "Inserting {} in {} ", sensVol->GetName(), chipVol->GetName()); + chipVol->AddNode(sensVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", chipVol->GetName(), staveVol->GetName()); + staveVol->AddNode(chipVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", staveVol->GetName(), layerVol->GetName()); + layerVol->AddNode(staveVol, 1, nullptr); + + LOGP(info, "Inserting {} in {} ", layerVol->GetName(), motherVolume->GetName()); + motherVolume->AddNode(layerVol, 1, combiTrans); +} +// ClassImp(TRKPetalLayer); + +} // namespace trk +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx index 7937e3b4de09a..1fb966425f974 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKServices.cxx @@ -27,16 +27,6 @@ namespace o2 { namespace trk { -TRKServices::TRKServices(float rMin, float zLength, float thickness) -{ - mColdPlateRMin = rMin; - mColdPlateZLength = zLength; - mColdPlateThickness = thickness; - mZLengthIRISVacV = 70.; - mThicknessIRISVacV = 150.e-4; - mRInIRISVacV = 0.48; - mROutIRISVacV = mColdPlateRMin + mColdPlateThickness; -} void TRKServices::createMaterials() { @@ -63,6 +53,7 @@ void TRKServices::createMaterials() float zAir[4] = {6., 7., 8., 18.}; float wAir[4] = {0.000124, 0.755267, 0.231781, 0.012827}; float dAir = 1.20479E-3; + float dAir1 = 1.20479E-11; // vacuum denisity inside pipe // Water float aWater[2] = {1.00794, 15.9994}; @@ -115,6 +106,7 @@ void TRKServices::createMaterials() matmgr.Mixture("ALICE3_TRKSERVICES", 74, "ALUMINIUM5083", aAl5083, zAl5083, dAl5083, 9, wAl5083); // AL5083 - Candidate for IRIS vacuum vessel matmgr.Mixture("ALICE3_TRKSERVICES", 75, "ALUMINIUMBERYLLIUMMETAL", aAlBeMet, zAlBeMet, dAlBeMet, 2, wAlBeMet); // Aluminium-Beryllium metal - Candidate for IRIS vacuum vessel matmgr.Material("ALICE3_TRKSERVICES", 76, "CARBONFIBERM55J6K", 12.0107, 6, 1.92, 999, 999); // Carbon Fiber M55J + matmgr.Mixture("ALICE3_PIPE", 77, "VACUUM", aAir, zAir, dAir1, 4, wAir); matmgr.Medium("ALICE3_TRKSERVICES", 1, "CERAMIC", 66, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Ceramic for cold plate matmgr.Medium("ALICE3_TRKSERVICES", 2, "COPPER", 67, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Copper for cables @@ -127,56 +119,62 @@ void TRKServices::createMaterials() matmgr.Medium("ALICE3_TRKSERVICES", 9, "ALUMINIUM5083", 74, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Al5083 for IRIS vacuum vessel matmgr.Medium("ALICE3_TRKSERVICES", 10, "ALUMINIUMBERYLLIUMMETAL", 75, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // AlBeMet for IRIS vacuum vessel matmgr.Medium("ALICE3_TRKSERVICES", 11, "CARBONFIBERM55J6K", 76, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Carbon Fiber M55J + matmgr.Medium("ALICE3_PIPE", 12, "VACUUM", 77, 0, ifield, fieldm, tmaxfd, stemax, deemax, epsil, stmin); // Vacuum inside the beam pipe } void TRKServices::createServices(TGeoVolume* motherVolume) { createMaterials(); - createColdplate(motherVolume); + createVacuumCompositeShape(); createMiddleServices(motherVolume); createOuterDisksServices(motherVolume); createOuterBarrelServices(motherVolume); } -void TRKServices::createColdplate(TGeoVolume* motherVolume) +void TRKServices::createVacuumCompositeShape() +{ + Double_t pipeRIn = 1.8f; + Double_t A3IPLength = 1000.f; + Double_t vacuumVesselRIn = 5.6f; + Double_t vacuumVesselThickness = 0.08f; + Double_t vacuumVesselLength = 76.f; + + // Vacuum for A and C Side + Double_t vacuumASideLength = A3IPLength / 2. - vacuumVesselThickness - vacuumVesselLength / 2.; + Double_t vacuumCSideLength = A3IPLength / 2. + vacuumVesselLength / 2.; + + // Vacuum tubes + TGeoTube* vacuumASide = new TGeoTube("VACUUM_Ash", 0., pipeRIn, vacuumASideLength / 2.); + TGeoTube* vacuumCSide = new TGeoTube("VACUUM_Csh", 0., vacuumVesselRIn, vacuumCSideLength / 2.); + + // Vacuum positions + TGeoTranslation* posVacuumASide = new TGeoTranslation("VACUUM_ASIDE_POSITION", 0, 0, vacuumVesselLength / 2. + vacuumVesselThickness + vacuumASideLength / 2.); + posVacuumASide->RegisterYourself(); + TGeoTranslation* posVacuumCSide = new TGeoTranslation("VACUUM_CSIDE_POSITION", 0, 0, vacuumVesselLength / 2. - vacuumCSideLength / 2.); + posVacuumCSide->RegisterYourself(); + + mVacuumCompositeFormula = + "VACUUM_Ash:VACUUM_ASIDE_POSITION" + "+VACUUM_Csh:VACUUM_CSIDE_POSITION"; +} + +void TRKServices::excavateFromVacuum(TString shapeToExcavate) +{ + mVacuumCompositeFormula += "-"; + mVacuumCompositeFormula += shapeToExcavate; +} + +void TRKServices::registerVacuum(TGeoVolume* motherVolume) { auto& matmgr = o2::base::MaterialManager::Instance(); - const TGeoMedium* medCeramic = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_CERAMIC"); - - TGeoTube* coldPlate = new TGeoTube("TRK_COLDPLATEsh", mColdPlateRMin, mColdPlateRMin + mColdPlateThickness, mColdPlateZLength / 2.); - TGeoVolume* coldPlateVolume = new TGeoVolume("TRK_COLDPLATE", coldPlate, medCeramic); - coldPlateVolume->SetVisibility(1); - coldPlateVolume->SetLineColor(kGray); - - LOGP(info, "Creating cold plate service"); - - LOGP(info, "Inserting {} in {} ", coldPlateVolume->GetName(), motherVolume->GetName()); - motherVolume->AddNode(coldPlateVolume, 1, nullptr); - - // IRIS Tracker Vacuum Vessel - TGeoTube* irisVacuumVesselInnerTube = new TGeoTube("TRK_IRISVACUUMVESSEL_INNERTUBEsh", mRInIRISVacV, mRInIRISVacV + mThicknessIRISVacV, mZLengthIRISVacV / 2.); - TGeoTube* irisVacuumVesselOuterTube = new TGeoTube("TRK_IRISVACUUMVESSEL_OUTERTUBEsh", mROutIRISVacV, mROutIRISVacV + mThicknessIRISVacV, mZLengthIRISVacV / 2.); - TGeoTube* irisVacuumVesselWall = new TGeoTube("TRK_IRISVACUUMVESSEL_WALLsh", mRInIRISVacV, mROutIRISVacV + mThicknessIRISVacV, mThicknessIRISVacV / 2.); - TGeoTranslation* irisVacVWallNegZ = new TGeoTranslation("IRISVACVWALLNEGZ", 0., 0., -mZLengthIRISVacV / 2. - mThicknessIRISVacV / 2.); - irisVacVWallNegZ->RegisterYourself(); - TGeoTranslation* irisVacVWallPosZ = new TGeoTranslation("IRISVACVWALLPOSZ", 0., 0., mZLengthIRISVacV / 2. + mThicknessIRISVacV / 2.); - irisVacVWallPosZ->RegisterYourself(); - TString irisCompositeFormula = - "TRK_IRISVACUUMVESSEL_INNERTUBEsh" - "+TRK_IRISVACUUMVESSEL_OUTERTUBEsh" - "+TRK_IRISVACUUMVESSEL_WALLsh:IRISVACVWALLNEGZ" - "+TRK_IRISVACUUMVESSEL_WALLsh:IRISVACVWALLPOSZ"; - TGeoCompositeShape* irisVacuumVesselComposite = new TGeoCompositeShape("TRK_IRISVACUUMVESSELsh", irisCompositeFormula); - - const TGeoMedium* medBe = matmgr.getTGeoMedium("ALICE3_TRKSERVICES_BERYLLIUM"); - TGeoVolume* irisVacuumVesselVolume = new TGeoVolume("TRK_IRISVACUUMVESSEL", irisVacuumVesselComposite, medBe); - - irisVacuumVesselVolume->SetVisibility(1); - irisVacuumVesselVolume->SetLineColor(kGray); - - LOGP(info, "Creating IRIS Tracker vacuum vessel"); - LOGP(info, "Inserting {} in {} ", irisVacuumVesselVolume->GetName(), motherVolume->GetName()); - motherVolume->AddNode(irisVacuumVesselVolume, 1, nullptr); + const TGeoMedium* kMedVac = matmgr.getTGeoMedium("ALICE3_PIPE_VACUUM"); + + TGeoCompositeShape* vacuumComposite = new TGeoCompositeShape("A3IP_VACUUMsh", mVacuumCompositeFormula); + TGeoVolume* vacuumVolume = new TGeoVolume("A3IP_VACUUM", vacuumComposite, kMedVac); + + // Add the vacuum to the barrel + vacuumVolume->SetLineColor(kGreen - 3); + motherVolume->AddNode(vacuumVolume, 1, new TGeoTranslation(0, 0, 0)); } void TRKServices::createOuterDisksServices(TGeoVolume* motherVolume) @@ -459,7 +457,7 @@ void TRKServices::createOuterBarrelServices(TGeoVolume* motherVolume) // Fiber 0.269 cm const float siO2FiberThick = 0.5 * 0.269; const float peFiberThick = 0.5 * 0.269; - float rMinOuterBarrelServices = ((TGeoTube*)motherVolume->GetNode(Form("%s10_1", GeometryTGeo::getTRKLayerPattern()))->GetVolume()->GetShape())->GetRmax(); + float rMinOuterBarrelServices = ((TGeoTube*)motherVolume->GetNode(Form("%s7_1", GeometryTGeo::getTRKLayerPattern()))->GetVolume()->GetShape())->GetRmax(); const float zLengthOuterBarrelServices = 350.f; // 175cm TGeoTube* outerBarrelFiberSIO2 = new TGeoTube("TRK_OUTERBARREL_FIBER_SIO2sh", rMinOuterBarrelServices, rMinOuterBarrelServices + siO2FiberThick, zLengthOuterBarrelServices); diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index 1b0181144b5d4..b82d8879e7dad 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -15,7 +15,10 @@ #pragma link off all classes; #pragma link off all functions; +#pragma link C++ class o2::trk::TRKPetalCase + ; #pragma link C++ class o2::trk::TRKLayer + ; +#pragma link C++ class o2::trk::TRKPetalLayer + ; +#pragma link C++ class o2::trk::TRKPetalDisk + ; #pragma link C++ class o2::trk::TRKServices + ; #pragma link C++ class o2::trk::Detector + ; #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; diff --git a/macro/build_geometry.C b/macro/build_geometry.C index 12d3842239874..93b06eb4bfeae 100644 --- a/macro/build_geometry.C +++ b/macro/build_geometry.C @@ -167,7 +167,7 @@ void build_geometry(FairRunSim* run = nullptr) #ifdef ENABLE_UPGRADES // upgraded beampipe at the interaction point (IP) if (isActivated("A3IP")) { - run->AddModule(new o2::passive::Alice3Pipe("A3IP", "Alice 3 beam pipe", !isActivated("TRK"), !isActivated("FT3"), 1.8f, 0.08f, 1000.f, 5.6f, 0.08f, 76.f)); + run->AddModule(new o2::passive::Alice3Pipe("A3IP", "Alice 3 beam pipe", 1.8f, 0.08f, 1000.f, 5.6f, 0.08f, 76.f)); } // the absorber From ced3e8ada32e6a6393745a811f46fe37cd441448 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Thu, 13 Mar 2025 09:15:11 +0100 Subject: [PATCH 0198/1914] GPU: Remove thrust deprecated declarations in ITS code --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 4fa7913c10e82..ce93523319e99 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -201,7 +201,7 @@ struct equal_tracklets { }; template -struct pair_to_first : public thrust::unary_function, T1> { +struct pair_to_first { GPUhd() int operator()(const gpuPair& a) const { return a.first; @@ -209,7 +209,7 @@ struct pair_to_first : public thrust::unary_function, T1> { }; template -struct pair_to_second : public thrust::unary_function, T2> { +struct pair_to_second { GPUhd() int operator()(const gpuPair& a) const { return a.second; @@ -710,7 +710,7 @@ GPUg() void printPointersKernel(std::tuple args) } template -struct trackletSortEmptyFunctor : public thrust::binary_function { +struct trackletSortEmptyFunctor { GPUhd() bool operator()(const T& lhs, const T& rhs) const { return lhs.firstClusterIndex > rhs.firstClusterIndex; @@ -718,7 +718,7 @@ struct trackletSortEmptyFunctor : public thrust::binary_function { }; template -struct trackletSortIndexFunctor : public thrust::binary_function { +struct trackletSortIndexFunctor { GPUhd() bool operator()(const T& lhs, const T& rhs) const { return lhs.firstClusterIndex < rhs.firstClusterIndex || (lhs.firstClusterIndex == rhs.firstClusterIndex && lhs.secondClusterIndex < rhs.secondClusterIndex); From 5ec817dba37076f67a1fe7cbc28c189641a30e1f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 13:05:34 +0100 Subject: [PATCH 0199/1914] GPU: Remove obsolete option to run TrackletSelector not in pipeline --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 -- .../Base/GPUReconstructionIncludes.h | 3 - .../Definitions/GPUDefGPUParameters.h | 8 --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 57 +------------------ .../SectorTracker/GPUTPCTracker.cxx | 2 +- GPU/GPUTracking/kernels.cmake | 2 +- 7 files changed, 3 insertions(+), 76 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index a7e0c2cb827f1..4b767a6b8a8f7 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -273,9 +273,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.trackletConstructorInPipeline < 0) { mProcessingSettings.trackletConstructorInPipeline = 1; } - if (mProcessingSettings.trackletSelectorInPipeline < 0) { - mProcessingSettings.trackletSelectorInPipeline = 1; - } if (mProcessingSettings.trackletSelectorSectors < 0) { mProcessingSettings.trackletSelectorSectors = 1; } @@ -296,9 +293,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() UpdateAutomaticProcessingSettings(); GPUCA_GPUReconstructionUpdateDefaults(); - if (!mProcessingSettings.trackletConstructorInPipeline) { - mProcessingSettings.trackletSelectorInPipeline = false; - } if (!mProcessingSettings.rtc.enable) { mProcessingSettings.rtc.optConstexpr = false; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index b35613f3bec59..f18ab21dc3972 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -33,9 +33,6 @@ if (mProcessingSettings.trackletConstructorInPipeline < 0) { \ mProcessingSettings.trackletConstructorInPipeline = GPUCA_CONSTRUCTOR_IN_PIPELINE; \ } \ - if (mProcessingSettings.trackletSelectorInPipeline < 0) { \ - mProcessingSettings.trackletSelectorInPipeline = GPUCA_SELECTOR_IN_PIPELINE; \ - } \ if (mProcessingSettings.trackletSelectorSectors < 0) { \ mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ } \ diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index c40659ecd2632..772b4684b590e 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -85,7 +85,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -151,7 +150,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -217,7 +215,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -275,7 +272,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 @@ -537,9 +533,6 @@ #ifndef GPUCA_CONSTRUCTOR_IN_PIPELINE #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #endif - #ifndef GPUCA_SELECTOR_IN_PIPELINE - #define GPUCA_SELECTOR_IN_PIPELINE 0 - #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif @@ -562,7 +555,6 @@ #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #define GPUCA_SELECTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 3c31a4fbb8409..5663aed8033b7 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -259,7 +259,6 @@ AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") -AddOption(trackletSelectorInPipeline, int8_t, -1, "", 0, "Run tracklet selector in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit") AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd7fe285265ad..4b9b8c33a0887 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -226,7 +226,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletSelectorInPipeline) { + if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { runKernel({GetGridAuto(useStream), {iSector}}); runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); if (GetProcessingSettings().deterministicGPUReconstruction) { @@ -250,56 +250,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() ReleaseEvent(mEvents->init); } - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().trackletConstructorInPipeline) { - SynchronizeGPU(); - } else { - for (int32_t i = 0; i < mRec->NStreams(); i++) { - RecordMarker(&mEvents->stream[i], i); - } - runKernel({GetGridAuto(0), krnlRunRangeNone, {&mEvents->single, mEvents->stream, mRec->NStreams()}}); - for (int32_t i = 0; i < mRec->NStreams(); i++) { - ReleaseEvent(mEvents->stream[i]); - } - SynchronizeEventAndRelease(mEvents->single); - } - - if (GetProcessingSettings().debugLevel >= 4) { - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, processors()->tpcTrackers[iSector], &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - } - } - - int32_t runSectors = 0; - int32_t useStream = 0; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector += runSectors) { - if (runSectors < GetProcessingSettings().trackletSelectorSectors) { - runSectors++; - } - runSectors = CAMath::Min(runSectors, NSECTORS - iSector); - if (getKernelProperties().minBlocks * BlockCount() < (uint32_t)runSectors) { - runSectors = getKernelProperties().minBlocks * BlockCount(); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Running TPC Tracklet selector (Stream %d, Sector %d to %d)", useStream, iSector, iSector + runSectors); - } - runKernel({GetGridAuto(useStream), {iSector, runSectors}}); - runKernel({{1, -ThreadCount(), useStream}, {iSector}}, runSectors); - for (uint32_t k = iSector; k < iSector + runSectors; k++) { - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {k}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[k].MemoryResCommon(), useStream, &mEvents->sector[k]); - streamMap[k] = useStream; - } - useStream++; - if (useStream >= mRec->NStreams()) { - useStream = 0; - } - } - } - mSectorSelectorReady = 0; std::array transferRunning; @@ -335,11 +285,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); } } - if (!GetProcessingSettings().trackletSelectorInPipeline) { - if (GetProcessingSettings().debugMask & 512) { - processors()->tpcTrackers[iSector].DumpTrackHits(*mDebugFile); - } - } } if (transferRunning[iSector]) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 63c64f78cc095..28521b2987a45 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -93,7 +93,7 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletSelectorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletConstructorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index c973264bfde2a..a823fc853e5b1 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -33,7 +33,7 @@ o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB both) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB single) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) From 1b8388da5e43bbc506e62f0ad1f1d1b9449e87c6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 14:34:06 +0100 Subject: [PATCH 0200/1914] GPU: Remove obsolete preprocessor magic to create 2 different kernels for single-slice and multi-slice --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 37 ++-- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 6 +- .../Base/GPUReconstructionKernelMacros.h | 82 ++------ .../Base/GPUReconstructionKernels.h | 11 +- .../Base/cuda/GPUReconstructionCUDA.cu | 22 +- .../Base/cuda/GPUReconstructionCUDA.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 47 ++--- .../GPUReconstructionCUDAkernel.template.cu | 4 +- .../GPUReconstructionHIPkernel.template.hip | 4 +- .../Base/opencl/GPUReconstructionOCL.cl | 6 +- .../Base/opencl/GPUReconstructionOCL.h | 6 +- .../opencl/GPUReconstructionOCLKernels.cxx | 37 +--- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 41 ++-- GPU/GPUTracking/kernels.cmake | 194 +++++++++--------- 15 files changed, 201 insertions(+), 300 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 969dd06d6297e..f5d350b4064d0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -66,28 +66,25 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu if (x.nThreads != 1) { throw std::runtime_error("Cannot run device kernel on host with nThreads != 1"); } - uint32_t num = y.num == 0 || y.num == -1 ? 1 : y.num; - for (uint32_t k = 0; k < num; k++) { - int32_t nThreads = getNKernelHostThreads(false); - if (nThreads > 1) { - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d Threads\n", nThreads); - } - tbb::this_task_arena::isolate([&] { - mThreading->activeThreads->execute([&] { - tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { - typename T::GPUSharedMemory smem; - for (uint32_t iB = r.begin(); iB < r.end(); iB++) { - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } - }); + int32_t nThreads = getNKernelHostThreads(false); + if (nThreads > 1) { + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d Threads\n", nThreads); + } + tbb::this_task_arena::isolate([&] { + mThreading->activeThreads->execute([&] { + tbb::parallel_for(tbb::blocked_range(0, x.nBlocks, 1), [&](const tbb::blocked_range& r) { + typename T::GPUSharedMemory smem; + for (uint32_t iB = r.begin(); iB < r.end(); iB++) { + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.index], args...); + } }); }); - } else { - for (uint32_t iB = 0; iB < x.nBlocks; iB++) { - typename T::GPUSharedMemory smem; - T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.start + k], args...); - } + }); + } else { + for (uint32_t iB = 0; iB < x.nBlocks; iB++) { + typename T::GPUSharedMemory smem; + T::template Thread(x.nBlocks, 1, iB, 0, smem, T::Processor(*mHostConstantMem)[y.index], args...); } } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 7901c34866c66..f90820281c74d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -49,7 +49,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels @@ -77,7 +77,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ if (cpuFallback) { \ @@ -161,7 +161,7 @@ inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); } if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); + GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); } if (nThreads == 0 || nBlocks == 0) { return; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 41abc8725c07b..f80b324970dc9 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -51,97 +51,45 @@ #define GPUCA_ATTRRES3(XX) // 3 attributes not supported #define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) #define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) -// GPU Kernel entry point for single sector -#define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments)) -#ifdef GPUCA_KRNL_DEFONLY -#define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__); -#else -#define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ - { \ - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \ - } -#endif -// GPU Kernel entry point for multiple sector -#define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments)) +// GPU Kernel entry point +#define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ + GPUg() void GPUCA_ATTRRES(,GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + #ifdef GPUCA_KRNL_DEFONLY -#define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__); +#define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); #else -#define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ +#define GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, ...) \ + GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ { \ - const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ - const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \ - const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \ - const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \ GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ - GPUCA_M_STRIP_FIRST(x_class)::template Thread(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \ + GPUCA_M_STRIP_FIRST(x_class)::template Thread(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[_iSector_internal] GPUCA_M_STRIP(x_forward)); \ } #endif -// GPU Host wrapper pre- and post-parts -#define GPUCA_KRNL_PRE(x_class, ...) \ +// GPU Host wrappers for kernel +#define GPUCA_KRNL_HOST(x_class, ...) \ + GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ { \ auto& x = _xyz.x; \ - auto& y = _xyz.y; - -#define GPUCA_KRNL_POST() \ + auto& y = _xyz.y; \ + GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \ } \ }; -// GPU Host wrappers for single kernel, multi-sector, or auto-detection -#define GPUCA_KRNL_single(...) \ - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - if (y.num > 1) { \ - throw std::runtime_error("Kernel called with invalid number of sectors"); \ - } else { \ - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ - } \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_multi(...) \ - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_(...) GPUCA_KRNL_single(__VA_ARGS__) -#define GPUCA_KRNL_simple(...) GPUCA_KRNL_single(__VA_ARGS__) -#define GPUCA_KRNL_both(...) \ - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ - GPUCA_KRNL_PRE(__VA_ARGS__) \ - if (y.num <= 1) { \ - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ - } else { \ - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ - } \ - GPUCA_KRNL_POST() - -#define GPUCA_KRNL_LOAD_(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_simple(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_both(...) \ - GPUCA_KRNL_LOAD_single(__VA_ARGS__) \ - GPUCA_KRNL_LOAD_multi(__VA_ARGS__) - #define GPUCA_KRNL_PROP(x_class, x_attributes) \ template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes)))}; \ + gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } -// Generate GPU kernel and host wrapper -#define GPUCA_KRNL_WRAP(x_func, x_class, x_attributes, ...) GPUCA_M_CAT(x_func, GPUCA_M_STRIP_FIRST(x_attributes))(x_class, x_attributes, __VA_ARGS__) #endif // GPUCA_GPUCODE -#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (GPUCA_M_STRIP(x_attributes), REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class)))), __VA_ARGS__) +#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) #endif // O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H // clang-format on diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index d541e36a06af9..ba30f38e902ad 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -41,11 +41,8 @@ struct krnlExec { }; struct krnlRunRange { constexpr krnlRunRange() = default; - constexpr krnlRunRange(uint32_t a) : start(a), num(0) {} - constexpr krnlRunRange(uint32_t s, int32_t n) : start(s), num(n) {} - - uint32_t start = 0; - int32_t num = 0; + constexpr krnlRunRange(uint32_t v) : index(v) {} + uint32_t index = 0; }; struct krnlEvent { constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} @@ -63,7 +60,7 @@ struct krnlProperties { }; struct krnlSetup { - krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0, -1}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} + krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} krnlExec x; krnlRunRange y; krnlEvent z; @@ -98,7 +95,7 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ virtual void runKernelImpl(const krnlSetupArgs& args) \ { \ T::template runKernelBackend(args); \ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 7fb3744551953..d2adc3cc1fd19 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -380,7 +380,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - loadKernelModules(true, false); + loadKernelModules(true); } #endif void* devPtrConstantMem = nullptr; @@ -630,13 +630,10 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() } } -void GPUReconstructionCUDA::loadKernelModules(bool perKernel, bool perSingleMulti) +void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) \ - j += !perSingleMulti; -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ +#define GPUCA_KRNL(x_class, ...) \ getRTCkernelNum(mInternals->kernelFunctions.size()); \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ @@ -644,20 +641,9 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel, bool perSingleMult GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ - j += perSingleMulti; -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ - mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi))); \ - if (mProcessingSettings.debugLevel >= 3) { \ - GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)), j); \ - } \ - GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)))); \ - j += perSingleMulti; + j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi if (j != mInternals->kernelModules.size()) { GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ee2f069028d74..dde70b9076e08 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -98,7 +98,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels 1)], args...); - if (y.num <= 1) { - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); - } else { - pArgs[arg_offset + 1] = &y.num; - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); - } + pArgs[arg_offset] = &y.index; + GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); + GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -88,33 +83,31 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); -#else +#else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 -#define GPUCA_KRNL_DEFONLY +#define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_, x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + GPUCA_KRNL_PROP(x_class, x_attributes) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); + #ifndef __HIPCC__ // CUDA version -#define GPUCA_KRNL_CALL_single(x_class, ...) \ - GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, args...); -#define GPUCA_KRNL_CALL_multi(x_class, ...) \ - GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, y.num, args...); +#define GPUCA_KRNL_CALL(x_class, ...) \ + GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); #else // HIP version #undef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) -#define GPUCA_KRNL_CALL_single(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.start, args...); -#define GPUCA_KRNL_CALL_multi(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.start, y.num, args...); +#define GPUCA_KRNL_CALL(x_class, ...) \ + hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); #endif // __HIPCC__ -#endif + +#endif // ---------- COMPILE_MODE = onefile | rdc ---------- #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -137,13 +130,9 @@ int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) { -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU_SINGLE(__VA_ARGS__))); -#define GPUCA_KRNL_LOAD_multi(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU_MULTI(__VA_ARGS__))); +#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi } #ifndef GPUCA_NO_CONSTANT_MEMORY diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu index bcf61eb07383f..3140c6b9158ad 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAkernel.template.cu @@ -15,9 +15,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionCUDAIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__); +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionKernelMacros.h" // clang-format off diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip index ddbc9285763a9..427938a3bd704 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPkernel.template.hip @@ -15,9 +15,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #include "GPUReconstructionHIPIncludesHost.h" #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__); -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__); +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__); #include "GPUReconstructionKernelMacros.h" // clang-format off diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 4a3cda6c2cddc..10a425e4c76e8 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -75,14 +75,10 @@ typedef signed char int8_t; // if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! -#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) -#define GPUCA_KRNL_LOAD_multi(...) GPUCA_KRNLGPU_MULTI(__VA_ARGS__) +#define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__) #define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi // clang-format on diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 15015cdcb43c5..5132baa444cd9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -56,9 +56,9 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase void RecordMarker(deviceEvent* ev, int32_t stream) override; template - int32_t AddKernel(bool multi = false); + int32_t AddKernel(); template - uint32_t FindKernel(int32_t num); + uint32_t FindKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template @@ -69,7 +69,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase template void runKernelBackend(const krnlSetupArgs& args); - template + template S& getKernelObject(); int32_t GetOCLPrograms(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 8a1c8a6525c0d..8a6c889773cb0 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -24,15 +24,11 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { - cl_kernel k = _xyz.y.num > 1 ? getKernelObject() : getKernelObject(); + cl_kernel k = getKernelObject(); auto& x = _xyz.x; auto& y = _xyz.y; auto& z = _xyz.z; - if (y.num <= 1) { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, args...)); - } else { - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.start, y.num, args...)); - } + GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); cl_event ev; cl_event* evr; @@ -63,12 +59,9 @@ void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs -inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) +inline uint32_t GPUReconstructionOCLBackend::FindKernel() { std::string name(GetKernelName()); - if (num > 1) { - name += "_multi"; - } for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { if (mInternals->kernels[k].second == name) { @@ -80,12 +73,9 @@ inline uint32_t GPUReconstructionOCLBackend::FindKernel(int32_t num) } template -int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) +int32_t GPUReconstructionOCLBackend::AddKernel() { std::string name(GetKernelName()); - if (multi) { - name += "_multi"; - } std::string kname("krnl_" + name); cl_int ocl_error; @@ -98,30 +88,21 @@ int32_t GPUReconstructionOCLBackend::AddKernel(bool multi) return 0; } -template +template S& GPUReconstructionOCLBackend::getKernelObject() { - static uint32_t krnl = FindKernel(MULTI ? 2 : 1); + static uint32_t krnl = FindKernel(); return mInternals->kernels[krnl].first; } int32_t GPUReconstructionOCLBackend::AddKernels() { -#define GPUCA_KRNL(...) \ - GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__) -#define GPUCA_KRNL_LOAD_single(x_class, ...) \ - if (AddKernel(false)) { \ - return 1; \ - } -#define GPUCA_KRNL_LOAD_multi(x_class, ...) \ - if (AddKernel(true)) { \ - return 1; \ +#define GPUCA_KRNL(x_class, ...) \ + if (AddKernel()) { \ + return 1; \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL -#undef GPUCA_KRNL_LOAD_single -#undef GPUCA_KRNL_LOAD_multi - return 0; } diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index e017d9b60a269..b9da1c9a330d3 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -34,7 +34,7 @@ class GPUChain using krnlExec = gpu_reconstruction_kernels::krnlExec; using krnlEvent = gpu_reconstruction_kernels::krnlEvent; using deviceEvent = gpu_reconstruction_kernels::deviceEvent; - static constexpr krnlRunRange krnlRunRangeNone{0, -1}; + static constexpr krnlRunRange krnlRunRangeNone{0}; static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0}; virtual ~GPUChain() = default; diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index b05fa19785dd8..1f35b6fc468b2 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -20,18 +20,36 @@ define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") -function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) - math(EXPR TMP_CHK "${ARGC} & 1") - if(${TMP_CHK}) - message(FATAL_ERROR "Invalid number of arguments to kernel ${TMP_CHK}, must be odd to have pairs of argument type, argument name") - endif() +function(o2_gpu_add_kernel kernel_name kernel_files) list(LENGTH ARGV n) + if(${n} GREATER 2) + set(kernel_options "${ARGV2}") + else() + set(kernel_options "") + endif() + if(kernel_options MATCHES "^LB") + set(TMP_BOUNDS "_LB") + elseif(kernel_options MATCHES "^NO" OR kernel_options STREQUAL "") + set(TMP_BOUNDS "") + else() + message(FATAL_ERROR "Invalid kernel options, must contain bounds at first") + endif() + string(LENGTH "${kernel_options}" TMP_CHK) + if(${TMP_CHK} GREATER 3) + string(SUBSTRING "${kernel_options}" 3 -1 kernel_extra) + else() + set(kernel_extra "") + endif() set(OPT1 "") set(OPT2 "") set(OPT3 "") - if(${n} GREATER 4) + if(${n} GREATER 3) + math(EXPR TMP_CHK "${ARGC} & 1") + if(NOT ${TMP_CHK}) + message(FATAL_ERROR "Invalid number of arguments to kernel ${ARGC}, must be odd to have pairs of argument type, argument name") + endif() math(EXPR n "${n} - 1") - foreach(i RANGE 4 ${n} 2) + foreach(i RANGE 3 ${n} 2) math(EXPR j "${i} + 1") if(${ARGV${i}} MATCHES "\\*$") string(APPEND OPT1 ",GPUPtr1(${ARGV${i}},${ARGV${j}})") @@ -43,16 +61,9 @@ function(o2_gpu_add_kernel kernel_name kernel_files kernel_bounds kernel_type) string(APPEND OPT3 ",${ARGV${i}}") endforeach() endif() - if(kernel_bounds MATCHES "^LB") - set(TMP_BOUNDS "_LB") - elseif(kernel_bounds MATCHES "^NO") - set(TMP_BOUNDS "") - else() - message(FATAL_ERROR "Invalid bounds") - endif() set(TMP_PRE "") set(TMP_POST "") - set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_type}), (${OPT1}), (${OPT2}), (${OPT3}))\n") + set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}))\n") separate_arguments(kernel_files NATIVE_COMMAND ${kernel_files}) list(GET kernel_files 0 TMP_KERNEL_CLASS_FILE) if (TMP_KERNEL_CLASS_FILE STREQUAL "=") diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index a823fc853e5b1..4b7aab75519fa 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -27,100 +27,100 @@ o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB single) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO "simple, REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO single int32_t n) -o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB single) -o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER" NO single) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO single int8_t parameter) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) -o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB simple int32_t mode) -o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t id) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t useOrigTrackParam int8_t mergeAll) -o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB simple int8_t output) -o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO simple gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) -o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB simple) -o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER" NO simple) -o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB simple GPUTRDTrackerGPU* externalInstance) -o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB simple GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB simple) -o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB simple int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB simple int32_t sectorStart int32_t nSectors) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB simple) -o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB single int8_t setPositions) -o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB single) -o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB single int8_t onlyMC) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER" NO single) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO single GPUTPCLinearLabels* out) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t stage) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB single int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB single int32_t iBuf int32_t stage ChargePos* in ChargePos* out) -o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB single int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFGather" "=" LB single o2::tpc::ClusterNative* dest) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB simple) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB simple) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) +o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) +o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) +o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) +o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) +o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) +o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") +o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) +o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) +o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) +o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage ChargePos* in ChargePos* out) +o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) From 4795ce795401ca4e8c40ca150da0f820e6001150 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 22:12:24 +0100 Subject: [PATCH 0201/1914] GPU: Improve kernel attribute resolution preprocessor logic --- .../Base/GPUReconstructionKernelMacros.h | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index f80b324970dc9..cd1180cbc9991 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -35,26 +35,24 @@ #ifndef GPUCA_KRNL_REG #define GPUCA_KRNL_REG(...) #endif -#define GPUCA_KRNL_REG_INTERNAL_PROP(...) GPUCA_M_STRIP(__VA_ARGS__) #ifndef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(...) #endif -#define GPUCA_KRNL_CUSTOM_INTERNAL_PROP(...) -#define GPUCA_ATTRRES_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES2(XX, __VA_ARGS__) -#define GPUCA_ATTRRES2_REG(XX, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, XX))(num) GPUCA_ATTRRES3(XX, __VA_ARGS__) -#define GPUCA_ATTRRES_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES2(XX, __VA_ARGS__) -#define GPUCA_ATTRRES2_CUSTOM(XX, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, XX))(args) GPUCA_ATTRRES3(XX, __VA_ARGS__) -#define GPUCA_ATTRRES_NONE(XX, ...) -#define GPUCA_ATTRRES2_NONE(XX, ...) -#define GPUCA_ATTRRES_(XX, ...) -#define GPUCA_ATTRRES2_(XX, ...) -#define GPUCA_ATTRRES3(XX) // 3 attributes not supported -#define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) -#define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) +#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__) +#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) +#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_(MODE, ...) +#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) // GPU Kernel entry point #define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(,GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); @@ -83,7 +81,7 @@ #define GPUCA_KRNL_PROP(x_class, x_attributes) \ template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_STRIP(x_attributes))}; \ + gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } From ccabdf2e66a38222b26dd143c053707341bf2768 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 11 Mar 2025 22:23:47 +0100 Subject: [PATCH 0202/1914] GPU: Remove obsolete tracketConstructorWithoutPipeline option --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 --- .../Base/GPUReconstructionIncludes.h | 6 --- .../Definitions/GPUDefGPUParameters.h | 16 -------- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 - .../Global/GPUChainTrackingSectorTracker.cxx | 37 +++++++------------ .../SectorTracker/GPUTPCTracker.cxx | 2 +- 6 files changed, 15 insertions(+), 54 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 4b767a6b8a8f7..656fa37fb6a4c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -270,12 +270,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; - if (mProcessingSettings.trackletConstructorInPipeline < 0) { - mProcessingSettings.trackletConstructorInPipeline = 1; - } - if (mProcessingSettings.trackletSelectorSectors < 0) { - mProcessingSettings.trackletSelectorSectors = 1; - } } if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { mProcessingSettings.createO2Output = 1; diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index f18ab21dc3972..6aba7e30a49d7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -30,12 +30,6 @@ #include #define GPUCA_GPUReconstructionUpdateDefaults() \ - if (mProcessingSettings.trackletConstructorInPipeline < 0) { \ - mProcessingSettings.trackletConstructorInPipeline = GPUCA_CONSTRUCTOR_IN_PIPELINE; \ - } \ - if (mProcessingSettings.trackletSelectorSectors < 0) { \ - mProcessingSettings.trackletSelectorSectors = GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT; \ - } \ if (mProcessingSettings.alternateBorderSort < 0) { \ mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ } \ diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 772b4684b590e..3ed6c25762405 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -84,11 +84,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -149,11 +147,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -214,11 +210,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_DEDX_STORAGE_TYPE uint16_t #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half @@ -271,11 +265,9 @@ #define GPUCA_LB_COMPRESSION_GATHER 1024 #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 1 #define GPUCA_SORT_BEFORE_FIT 1 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_NO_ATOMIC_PRECHECK 1 #define GPUCA_COMP_GATHER_KERNEL 4 #define GPUCA_COMP_GATHER_MODE 3 @@ -530,9 +522,6 @@ #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif - #ifndef GPUCA_CONSTRUCTOR_IN_PIPELINE - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 - #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif @@ -542,9 +531,6 @@ #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif - #ifndef GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 8 // Currently must be smaller than avaiable MultiProcessors on GPU or will result in wrong results - #endif #ifndef GPUCA_COMP_GATHER_KERNEL #define GPUCA_COMP_GATHER_KERNEL 0 #endif @@ -554,11 +540,9 @@ #else #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_CONSTRUCTOR_IN_PIPELINE 1 #define GPUCA_ALTERNATE_BORDER_SORT 0 #define GPUCA_SORT_BEFORE_FIT 0 #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_TRACKLET_SELECTOR_SECTOR_COUNT 1 #define GPUCA_THREAD_COUNT_FINDER 1 #define GPUCA_COMP_GATHER_KERNEL 0 #define GPUCA_COMP_GATHER_MODE 0 diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 5663aed8033b7..9b6be7743e485 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -257,8 +257,6 @@ AddOption(autoAdjustHostThreads, bool, true, "", 0, "Auto-adjust number of OMP t AddOption(nStreams, int8_t, 8, "", 0, "Number of GPU streams / command queues") AddOption(nTPCClustererLanes, int8_t, -1, "", 0, "Number of TPC clusterers that can run in parallel (-1 = autoset)") AddOption(overrideClusterizerFragmentLen, int32_t, -1, "", 0, "Force the cluster max fragment len to a certain value (-1 = autodetect)") -AddOption(trackletSelectorSectors, int8_t, -1, "", 0, "Number of sectors to processes in parallel at max") -AddOption(trackletConstructorInPipeline, int8_t, -1, "", 0, "Run tracklet constructor in the pipeline") AddOption(delayedOutput, bool, true, "", 0, "Delay output to be parallel to track fit") AddOption(mergerSortTracks, int8_t, -1, "", 0, "Sort track indizes for GPU track fit") AddOption(alternateBorderSort, int8_t, -1, "", 0, "Alternative implementation for sorting of border tracks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 4b9b8c33a0887..0eb8af6a6a006 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -218,27 +218,23 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() AllocateRegisteredMemory(trk.MemoryResOutput()); } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - trk.DumpHitWeights(*mDebugFile); - } + runKernel({GetGridAuto(useStream), {iSector}}); + DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + trk.DumpHitWeights(*mDebugFile); } - if (!(doGPU || GetProcessingSettings().debugLevel >= 1) || GetProcessingSettings().trackletConstructorInPipeline) { - runKernel({GetGridAuto(useStream), {iSector}}); - runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); - if (GetProcessingSettings().deterministicGPUReconstruction) { - runKernel({GetGrid(1, 1, useStream), {iSector}}); - } - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); - streamMap[iSector] = useStream; - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); - } - DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + runKernel({GetGridAuto(useStream), {iSector}}); + runKernel({{1, -ThreadCount(), useStream}, {iSector}}, 1); + if (GetProcessingSettings().deterministicGPUReconstruction) { + runKernel({GetGrid(1, 1, useStream), {iSector}}); } + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, trk.MemoryResCommon(), useStream, &mEvents->sector[iSector]); + streamMap[iSector] = useStream; + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); + } + DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { @@ -280,11 +276,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().keepAllMemory) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); - if (!GetProcessingSettings().trackletConstructorInPipeline) { - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { - processors()->tpcTrackers[iSector].DumpHitWeights(*mDebugFile); - } - } } if (transferRunning[iSector]) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 28521b2987a45..4e815784f7cad 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -93,7 +93,7 @@ void* GPUTPCTracker::SetPointersCommon(void* mem) void GPUTPCTracker::RegisterMemoryAllocation() { AllocateAndInitializeLate(); - bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && mRec->GetProcessingSettings().trackletConstructorInPipeline && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); + bool reuseCondition = !mRec->GetProcessingSettings().keepDisplayMemory && ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) || mRec->GetProcessingSettings().inKernelParallel == 1 || mRec->GetProcessingSettings().nHostThreads == 1); GPUMemoryReuse reLinks{reuseCondition, GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::TrackerDataLinks, (uint16_t)(mISector % mRec->GetProcessingSettings().nStreams)}; mMemoryResLinks = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataLinks, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK, "TPCSectorLinks", reLinks); mMemoryResSectorScratch = mRec->RegisterMemoryAllocation(this, &GPUTPCTracker::SetPointersDataScratch, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK | GPUMemoryResource::MEMORY_CUSTOM, "TPCSectorScratch"); From 22a9b80b1c0932cf3c7c11d9a20b197cae11537a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 13:40:43 +0100 Subject: [PATCH 0203/1914] Simplify GPUCAMath a bit --- GPU/Common/GPUCommonMath.h | 191 +++++++----------- .../GPUTPCCompressionKernels.cxx | 2 +- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 4 +- .../display/render/GPUDisplayDraw.cxx | 2 +- 4 files changed, 80 insertions(+), 119 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 5a813b74ed7b6..f4b9cd945799a 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -33,6 +33,15 @@ #include #endif +// GPUCA_CHOICE Syntax: GPUCA_CHOICE(Host, CUDA&HIP, OpenCL) +#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off + #define GPUCA_CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP +#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__) + #define GPUCA_CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL +#else + #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host +#endif // clang-format on + namespace o2 { namespace gpu @@ -44,9 +53,15 @@ class GPUCommonMath GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr template - GPUhd() static T Min(const T x, const T y); + GPUhd() static T Min(const T x, const T y) + { + return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y)); + } template - GPUhd() static T Max(const T x, const T y); + GPUhd() static T Max(const T x, const T y) + { + return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y)); + } template GPUd() static T MinWithRef(T x, T y, S refX, S refY, R& r); template @@ -74,28 +89,28 @@ class GPUCommonMath GPUd() static float Pow(float x, float y); GPUd() static float Log(float x); GPUd() static float Exp(float x); - GPUhdni() static float Copysign(float x, float y); + GPUhdni() static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } GPUd() static constexpr float TwoPi() { return 6.2831853f; } GPUd() static constexpr float Pi() { return 3.1415927f; } GPUd() static float Round(float x); - GPUd() static float Floor(float x); + GPUd() static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } GPUd() static uint32_t Float2UIntReint(const float& x); - GPUd() static uint32_t Float2UIntRn(float x); + GPUd() static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUd() static int32_t Float2IntRn(float x); GPUd() static float Modf(float x, float y); - GPUd() static bool Finite(float x); - GPUd() static bool IsNaN(float x); + GPUd() static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } + GPUd() static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static float QuietNaN(); + GPUd() static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } GPUd() static uint32_t Clz(uint32_t val); GPUd() static uint32_t Popcount(uint32_t val); GPUd() static void memcpy(void* dst, const void* src, size_t size); - GPUhdni() static float Hypot(float x, float y); - GPUhdni() static float Hypot(float x, float y, float z); - GPUhdni() static float Hypot(float x, float y, float z, float w); + GPUhdi() static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } + GPUhdi() static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } + GPUhdi() static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } template GPUhd() static void Swap(T& a, T& b); @@ -154,15 +169,7 @@ class GPUCommonMath GPUd() constexpr static T nextMultipleOf(T val); template - GPUdi() static float Sum2(float w, Args... args) - { - if constexpr (sizeof...(Args) == 0) { - return w * w; - } else { - return w * w + Sum2(args...); - } - return 0; - } + GPUhdni() static float Sum2(float w, Args... args); private: template @@ -179,14 +186,16 @@ class GPUCommonMath typedef GPUCommonMath CAMath; -// CHOICE Syntax: CHOICE(Host, CUDA&HIP, OpenCL) -#if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) // clang-format off - #define CHOICE(c1, c2, c3) (c2) // Select second option for CUDA and HIP -#elif defined(GPUCA_GPUCODE_DEVICE) && defined (__OPENCL__) - #define CHOICE(c1, c2, c3) (c3) // Select third option for OpenCL -#else - #define CHOICE(c1, c2, c3) (c1) // Select first option for Host -#endif // clang-format on +template +GPUhdi() float GPUCommonMath::Sum2(float w, Args... args) +{ + if constexpr (sizeof...(Args) == 0) { + return w * w; + } else { + return w * w + Sum2(args...); + } + return 0; +} GPUdi() void GPUCommonMath::memcpy(void* dst, const void* src, size_t size) { @@ -230,7 +239,7 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) #endif // GPUCA_GPUCODE } -GPUdi() float GPUCommonMath::Modf(float x, float y) { return CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } +GPUdi() float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) { @@ -243,42 +252,36 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -GPUdi() uint32_t GPUCommonMath::Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } -GPUdi() float GPUCommonMath::Floor(float x) { return CHOICE(floorf(x), floorf(x), floor(x)); } - -GPUdi() bool GPUCommonMath::Finite(float x) { return CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } -GPUdi() bool GPUCommonMath::IsNaN(float x) { return CHOICE(std::isnan(x), isnan(x), isnan(x)); } -GPUdi() float GPUCommonMath::QuietNaN() { return CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } #ifdef GPUCA_NO_FAST_MATH -GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), roundf(x), round(x)); } +GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return CHOICE((float)log((double)x), (float)log((double)x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } +GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } +GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } +GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } +GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } +GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } +GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } +GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } +GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } +GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } +GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else -GPUdi() float GPUCommonMath::Round(float x) { return CHOICE(roundf(x), rintf(x), rint(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return CHOICE(atanf(x), atanf(x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return CHOICE(sinf(x), sinf(x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return CHOICE(cosf(x), cosf(x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return CHOICE(tanf(x), tanf(x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return CHOICE(powf(x, y), powf(x, y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return CHOICE(asinf(x), asinf(x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return CHOICE(acosf(x), acosf(x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return CHOICE(logf(x), logf(x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } +GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } +GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } +GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } +GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } +GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } +GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } +GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } +GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } +GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } +GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } +GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } +GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif @@ -293,7 +296,7 @@ GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) sincosf(x, &s, &c); #else - CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); #endif } @@ -304,14 +307,14 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) sincos(x, &s, &c); #else - CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sin(x)) + (c = cos(x))), sincos(x, &s, &c), s = sincos(x, &c)); #endif } GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) - return x == 0 ? 32 : CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available + return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available #else for (int32_t i = 31; i >= 0; i--) { if (x & (1u << i)) { @@ -326,7 +329,7 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available - return CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); + return GPUCA_CHOICE(__builtin_popcount(x), __popc(x), __builtin_popcount(x)); #else x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); @@ -334,45 +337,16 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) #endif } -GPUhdi() float GPUCommonMath::Hypot(float x, float y) -{ - return Sqrt(x * x + y * y); -} - -GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z) -{ - return Sqrt(x * x + y * y + z * z); -} - -GPUhdi() float GPUCommonMath::Hypot(float x, float y, float z, float w) -{ - return Sqrt(x * x + y * y + z * z + w * w); -} - template -GPUd() void _swap(T& a, T& b) +GPUhdi() void GPUCommonMath::Swap(T& a, T& b) { +#ifndef GPUCA_GPUCODE_DEVICE + std::swap(a, b); +#else T tmp = a; a = b; b = tmp; -} - -template -GPUhdi() void GPUCommonMath::Swap(T& a, T& b) -{ - CHOICE(std::swap(a, b), _swap(a, b), _swap(a, b)); -} - -template -GPUhdi() T GPUCommonMath::Min(const T x, const T y) -{ - return CHOICE(std::min(x, y), min(x, y), min(x, y)); -} - -template -GPUhdi() T GPUCommonMath::Max(const T x, const T y) -{ - return CHOICE(std::max(x, y), max(x, y), max(x, y)); +#endif } template @@ -441,32 +415,19 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x) template <> GPUhdi() float GPUCommonMath::Abs(float x) { - return CHOICE(fabsf(x), fabsf(x), fabs(x)); + return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x)); } -#if !defined(__OPENCL__) || defined(cl_khr_fp64) template <> GPUhdi() double GPUCommonMath::Abs(double x) { - return CHOICE(fabs(x), fabs(x), fabs(x)); + return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x)); } -#endif template <> GPUhdi() int32_t GPUCommonMath::Abs(int32_t x) { - return CHOICE(abs(x), abs(x), abs(x)); -} - -GPUhdi() float GPUCommonMath::Copysign(float x, float y) -{ -#if defined(__OPENCL__) - return copysign(x, y); -#elif defined(GPUCA_GPUCODE) && !defined(__OPENCL__) - return copysignf(x, y); -#else - return std::copysignf(x, y); -#endif // GPUCA_GPUCODE + return GPUCA_CHOICE(abs(x), abs(x), abs(x)); } template @@ -579,7 +540,7 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt } #endif -#undef CHOICE +#undef GPUCA_CHOICE } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 966bffa963c7e..4831be9b12bcc 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -199,7 +199,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); + const uint32_t nn = CAMath::nextMultipleOf(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; int32_t cidx = 0; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index 1bd2eca769913..fa0711887f60f 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -787,8 +787,8 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK if (mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId == trkltIdx) { continue; } - if (GPUCommonMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetZbin() - tracklets[trkltIdx].GetZbin()) == 1 && - GPUCommonMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetY() - tracklets[trkltIdx].GetY()) < 1) { + if (CAMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetZbin() - tracklets[trkltIdx].GetZbin()) == 1 && + CAMath::Abs(tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetY() - tracklets[trkltIdx].GetY()) < 1) { trkWork->setIsCrossingNeighbor(iLayer); trkWork->setHasNeighbor(); break; diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 188df5467e83d..24668c576d795 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -64,7 +64,7 @@ inline void GPUDisplay::drawPointLinestrip(int32_t iSector, int32_t cid, int32_t mVertexBuffer[iSector].emplace_back(mGlobalPos[cid].x, mGlobalPos[cid].y * mYFactor, mCfgH.projectXY ? 0 : mGlobalPos[cid].z); float curVal; while ((curVal = mGlobalPos[cid].w) < id_limit) { - if (GPUCommonMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { + if (CAMath::AtomicCAS(&mGlobalPos[cid].w, curVal, (float)id)) { break; } curVal = mGlobalPos[cid].w; From c68243887320761f3a8ca2526403808a8fb2b7f0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 15:57:21 +0100 Subject: [PATCH 0204/1914] GPU TPC: Get rid of duplicate ReadEvent code path for initializing tracking data on CPU --- GPU/GPUTracking/Global/GPUChainTracking.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 26 +++---------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 4a2778851e517..e7d6f420b9c4d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,6 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ReadEvent(uint32_t iSector, int32_t threadId); void WriteOutput(int32_t iSector, int32_t threadId); int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 0eb8af6a6a006..efb3deb257a42 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -158,17 +158,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Sector Data (Sector %d)", iSector); } - if (doGPU) { - TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); - runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); - streamInit[useStream] = true; - } else { - if (ReadEvent(iSector, 0)) { - GPUError("Error reading event"); - error = 1; - return; - } - } + TransferMemoryResourcesToGPU(RecoStep::TPCSectorTracking, &trk, useStream); + runKernel({doGPU ? GetGridBlk(GPUCA_ROW_COUNT, useStream) : GetGridAuto(0), {iSector}, {nullptr, streamInit[useStream] ? nullptr : &mEvents->init}}); // TODO: Check why GetGridAuto(0) is much fast on CPU + streamInit[useStream] = true; if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGridBlk(GPUCA_ROW_COUNT, useStream), {iSector}}); } @@ -381,18 +373,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() return 0; } -int32_t GPUChainTracking::ReadEvent(uint32_t iSector, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running ReadEvent for sector %d on thread %d\n", iSector, threadId); - } - runKernel({{GetGridAuto(0, GPUReconstruction::krnlDeviceType::CPU)}, {iSector}}); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished ReadEvent for sector %d on thread %d\n", iSector, threadId); - } - return (0); -} - void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) { if (GetProcessingSettings().debugLevel >= 5) { From 5848069446af06802fc6221868e9d075bba3d257 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 16:24:19 +0100 Subject: [PATCH 0205/1914] GPU TPC: Remove option to write out / start from obsolete TPC sector track data format --- GPU/GPUTracking/Base/GPUReconstruction.h | 1 - GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 1 - .../Base/GPUReconstructionDeviceBase.cxx | 1 - GPU/GPUTracking/CMakeLists.txt | 1 - GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 9 +- GPU/GPUTracking/Global/GPUChainTracking.h | 3 +- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 36 +---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 - GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 1 - .../SectorTracker/GPUTPCSectorOutput.cxx | 57 -------- .../SectorTracker/GPUTPCSectorOutput.h | 79 ----------- .../SectorTracker/GPUTPCTracker.cxx | 123 +----------------- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 9 -- .../SectorTracker/GPUTPCTrackerDump.cxx | 17 --- .../Standalone/Benchmark/standalone.cxx | 1 - 18 files changed, 11 insertions(+), 333 deletions(-) delete mode 100644 GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx delete mode 100644 GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 529cce2bd087f..a0248180a5e2c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -31,7 +31,6 @@ #include "GPUOutputControl.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" -#include "GPUTPCSectorOutput.h" #include "GPULogging.h" namespace o2::its diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f5d350b4064d0..f397fc51bd407 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -18,7 +18,6 @@ #include "GPUChain.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index 64d9351b447e2..d1091f59b784a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -16,7 +16,6 @@ #include "GPUReconstructionIncludes.h" #include "GPUTPCTracker.h" -#include "GPUTPCSectorOutput.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b65674a68e6aa..c97742ac1d47f 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -37,7 +37,6 @@ set(SRCS SectorTracker/GPUTPCNeighboursCleaner.cxx SectorTracker/GPUTPCTracker.cxx SectorTracker/GPUTPCTrackingData.cxx - SectorTracker/GPUTPCSectorOutput.cxx SectorTracker/GPUTPCTrackletConstructor.cxx SectorTracker/GPUTPCSectorDebugSortKernels.cxx SectorTracker/GPUTPCCreateOccupancyMap.cxx diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 51b5c0b101537..f7bfe38be988d 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -139,7 +139,7 @@ class GPUDataTypes AllRecoSteps = 0x7FFFFFFF, NoRecoStep = 0 }; enum ENUM_CLASS InOutType { TPCClusters = 1, - TPCSectorTracks = 2, + OBSOLETE = 2, TPCMergedTracks = 4, TPCCompressedClusters = 8, TRDTracklets = 16, diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 0e1cde343135e..c186f916891ba 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -20,7 +20,6 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" @@ -185,7 +184,7 @@ bool GPUChainTracking::ValidateSteps() GPUError("Invalid input, TPC Clusterizer needs TPC raw input"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && ((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCConversion)) { GPUError("Invalid input / output / step, merger cannot read/store sectors tracks and needs TPC conversion"); return false; } @@ -204,7 +203,7 @@ bool GPUChainTracking::ValidateSteps() GPUError("Missing input for TPC Cluster conversion / sector tracking / compression / dEdx: TPC Clusters required"); return false; } - if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !((GetRecoStepsInputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking))) { + if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { GPUError("Input for TPC merger missing"); return false; } @@ -220,10 +219,6 @@ bool GPUChainTracking::ValidateSteps() GPUError("TPC Raw / TPC Clusters / TRD Tracklets cannot be output"); return false; } - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCSectorTracking)) { - GPUError("No TPC Sector Tracker Output available"); - return false; - } if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCMergedTracks) && !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCMerging)) { GPUError("No TPC Merged Track Output available"); return false; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index e7d6f420b9c4d..194573981838e 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,6 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - void WriteOutput(int32_t iSector, int32_t threadId); int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); int32_t PrepareProfile(); @@ -280,7 +279,7 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; volatile int32_t mSectorSelectorReady = 0; - std::array mWriteOutputDone; + std::array mExtrapolationTrackingDone; std::vector mOutputQueue; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index c4dddd4b8b88f..4f7846b852b98 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -15,7 +15,6 @@ #include "GPUChainTracking.h" #include "GPUReconstructionIO.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" #include "GPUTPCGMMergedTrackHit.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index efb3deb257a42..522ccbad47e59 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -242,9 +242,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() std::array transferRunning; transferRunning.fill(true); - if ((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { + if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed. if (param().rec.tpc.extrapolationTracking) { - mWriteOutputDone.fill(0); + mExtrapolationTrackingDone.fill(0); } uint32_t tmpSector = 0; @@ -288,18 +288,15 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() uint32_t sectorLeft, sectorRight; GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); - if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mWriteOutputDone[tmpSector2] == 0) { + if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) { ExtrapolationTracking(tmpSector2, 0); - WriteOutput(tmpSector2, 0); - mWriteOutputDone[tmpSector2] = 1; + mExtrapolationTrackingDone[tmpSector2] = 1; } } - } else { - WriteOutput(iSector, 0); } } } - if (!(GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) && param().rec.tpc.extrapolationTracking) { + if (param().rec.tpc.extrapolationTracking) { std::vector blocking(NSECTORS * mRec->NStreams()); for (int32_t i = 0; i < NSECTORS; i++) { for (int32_t j = 0; j < mRec->NStreams(); j++) { @@ -308,7 +305,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); - if (!((GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) || (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)))) { + if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { uint32_t sectorLeft, sectorRight; GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { @@ -334,9 +331,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.extrapolationTracking) { ExtrapolationTracking(iSector, 0); } - if (GetRecoStepsOutputs() & GPUDataTypes::InOutType::TPCSectorTracks) { - WriteOutput(iSector, 0); - } }); mRec->SetNActiveThreadsOuterLoop(1); } @@ -348,12 +342,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } } - if (GetProcessingSettings().debugMask & 1024 && !GetProcessingSettings().deterministicGPUReconstruction) { - for (uint32_t i = 0; i < NSECTORS; i++) { - processors()->tpcTrackers[i].DumpOutput(*mDebugFile); - } - } - if (DoProfile()) { return (1); } @@ -372,15 +360,3 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() mRec->PopNonPersistentMemory(RecoStep::TPCSectorTracking, qStr2Tag("TPCSLTRK")); return 0; } - -void GPUChainTracking::WriteOutput(int32_t iSector, int32_t threadId) -{ - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Running WriteOutput for sector %d on thread %d\n", iSector, threadId); - } - processors()->tpcTrackers[iSector].WriteOutputPrepare(); - processors()->tpcTrackers[iSector].WriteOutput(); - if (GetProcessingSettings().debugLevel >= 5) { - GPUInfo("Finished WriteOutput for sector %d on thread %d\n", iSector, threadId); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f6a50565bac52..fa85d796baeba 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -42,7 +42,6 @@ #include "GPUCommonConstants.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUTPCTrackLinearisation.h" diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index ae6a2582d833a..506dd88ab7058 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -42,7 +42,6 @@ struct ClusterNative; namespace o2::gpu { class GPUTPCSectorTrack; -class GPUTPCSectorOutput; class GPUTPCGMTrackParam; class GPUTPCTracker; class GPUChainTracking; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index d6dfcc8424e65..ae413aaa98648 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -25,7 +25,6 @@ #include "GPUO2DataTypes.h" #include "GPUCommonMath.h" #include "GPUTPCTrackParam.h" -#include "GPUTPCSectorOutput.h" #include "GPUTPCGMMergedTrack.h" #include "GPUParam.h" #include "GPUParam.inc" diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx deleted file mode 100644 index 864a5c6b7106e..0000000000000 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.cxx +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCSectorOutput.cxx -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#include "GPUOutputControl.h" -#include "GPUTPCSectorOutput.h" -#include "GPUCommonMath.h" -#include - -using namespace o2::gpu; - -uint32_t GPUTPCSectorOutput::EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters) -{ - // calculate the amount of memory [bytes] needed for the event - return sizeof(GPUTPCSectorOutput) + sizeof(GPUTPCTrack) * nOfTracks + sizeof(GPUTPCSectorOutCluster) * nOfTrackClusters; -} - -#ifndef GPUCA_GPUCODE -void GPUTPCSectorOutput::Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory) -{ - // Allocate All memory needed for sector output - const size_t memsize = EstimateSize(nTracks, nTrackHits); - - if (outputControl && outputControl->useExternal()) { - static std::atomic_flag lock = ATOMIC_FLAG_INIT; - while (lock.test_and_set(std::memory_order_acquire)) { - } - outputControl->checkCurrent(); - if (outputControl->size - ((char*)outputControl->ptrCurrent - (char*)outputControl->ptrBase) < memsize) { - outputControl->size = 1; - ptrOutput = nullptr; - lock.clear(std::memory_order_release); - return; - } - ptrOutput = reinterpret_cast(outputControl->ptrCurrent); - outputControl->ptrCurrent = (char*)outputControl->ptrCurrent + memsize; - lock.clear(std::memory_order_release); - } else { - if (internalMemory) { - free(internalMemory); - } - internalMemory = malloc(memsize); - ptrOutput = reinterpret_cast(internalMemory); - } - ptrOutput->SetMemorySize(memsize); -} -#endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h b/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h deleted file mode 100644 index cc02206dc09a7..0000000000000 --- a/GPU/GPUTracking/SectorTracker/GPUTPCSectorOutput.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCSectorOutput.h -/// \author Sergey Gorbunov, Ivan Kisel, David Rohr - -#ifndef GPUTPCSECTOROUTPUT_H -#define GPUTPCSECTOROUTPUT_H - -#include "GPUTPCDef.h" -#include "GPUTPCTrack.h" - -namespace o2::gpu -{ -struct GPUOutputControl; - -/** - * @class GPUTPCSectorOutput - * - * GPUTPCSectorOutput class is used to store the output of GPUTPCTracker{Component} - * and transport the output to GPUTPCGBMerger{Component} - * - * The class contains all the necessary information about TPC tracks, reconstructed in one sector. - * This includes the reconstructed track parameters and some compressed information - * about the assigned clusters: clusterId, position and amplitude. - * - */ -class GPUTPCSectorOutput -{ - public: - GPUhd() uint32_t NTracks() const - { - return mNTracks; - } - GPUhd() uint32_t NLocalTracks() const { return mNLocalTracks; } - GPUhd() uint32_t NTrackClusters() const { return mNTrackClusters; } - GPUhd() const GPUTPCTrack* GetFirstTrack() const - { - return (const GPUTPCTrack*)((const char*)this + sizeof(*this)); - } - GPUhd() GPUTPCTrack* FirstTrack() - { - return (GPUTPCTrack*)((char*)this + sizeof(*this)); - } - GPUhd() size_t Size() const - { - return (mMemorySize); - } - - static uint32_t EstimateSize(uint32_t nOfTracks, uint32_t nOfTrackClusters); - static void Allocate(GPUTPCSectorOutput*& ptrOutput, int32_t nTracks, int32_t nTrackHits, GPUOutputControl* outputControl, void*& internalMemory); - - GPUhd() void SetNTracks(uint32_t v) { mNTracks = v; } - GPUhd() void SetNLocalTracks(uint32_t v) { mNLocalTracks = v; } - GPUhd() void SetNTrackClusters(uint32_t v) { mNTrackClusters = v; } - - private: - GPUTPCSectorOutput() = delete; // NOLINT: Must be private or ROOT tries to use them! - ~GPUTPCSectorOutput() = delete; // NOLINT - GPUTPCSectorOutput(const GPUTPCSectorOutput&) = delete; // NOLINT - GPUTPCSectorOutput& operator=(const GPUTPCSectorOutput&) = delete; // NOLINT - - GPUhd() void SetMemorySize(size_t val) { mMemorySize = val; } - - uint32_t mNTracks; // number of reconstructed tracks - uint32_t mNLocalTracks; - uint32_t mNTrackClusters; // total number of track clusters - size_t mMemorySize; // Amount of memory really used -}; -} // namespace o2::gpu -#endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 4e815784f7cad..6c1b4eda0d7f5 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -18,7 +18,6 @@ #include "GPUCommonMath.h" #include "GPUTPCClusterData.h" -#include "GPUTPCSectorOutput.h" #include "GPUO2DataTypes.h" #include "GPUTPCTrackParam.h" #include "GPUParam.inc" @@ -39,12 +38,7 @@ using namespace o2::tpc; #if !defined(GPUCA_GPUCODE) -GPUTPCTracker::~GPUTPCTracker() -{ - if (mOutputMemory) { - free(mOutputMemory); - } -} +GPUTPCTracker::~GPUTPCTracker() = default; // ---------------------------------------------------------------------------------- void GPUTPCTracker::SetSector(int32_t iSector) { mISector = iSector; } @@ -173,124 +167,9 @@ GPUh() int32_t GPUTPCTracker::CheckEmptySector() // Check if the Sector is empty, if so set the output apropriate and tell the reconstuct procesdure to terminate if (NHitsTotal() < 1) { mCommonMem->nTracks = mCommonMem->nTrackHits = 0; - if (mOutput) { - WriteOutputPrepare(); - mOutput->SetNTracks(0); - mOutput->SetNTrackClusters(0); - } return 1; } return 0; } -GPUh() void GPUTPCTracker::WriteOutputPrepare() { GPUTPCSectorOutput::Allocate(mOutput, mCommonMem->nTracks, mCommonMem->nTrackHits, &mRec->OutputControl(), mOutputMemory); } - -template -static inline bool SortComparison(const T& a, const T& b) -{ - return (a.fSortVal < b.fSortVal); -} - -GPUh() void GPUTPCTracker::WriteOutput() -{ - mOutput->SetNTracks(0); - mOutput->SetNLocalTracks(0); - mOutput->SetNTrackClusters(0); - - if (mCommonMem->nTracks == 0) { - return; - } - if (mCommonMem->nTracks > GPUCA_MAX_SECTOR_NTRACK) { - GPUError("Maximum number of tracks exceeded, cannot store"); - return; - } - - int32_t nStoredHits = 0; - int32_t nStoredTracks = 0; - int32_t nStoredLocalTracks = 0; - - GPUTPCTrack* out = mOutput->FirstTrack(); - - trackSortData* trackOrder = new trackSortData[mCommonMem->nTracks]; - for (uint32_t i = 0; i < mCommonMem->nTracks; i++) { - trackOrder[i].fTtrack = i; - trackOrder[i].fSortVal = mTracks[trackOrder[i].fTtrack].NHits() / 1000.f + mTracks[trackOrder[i].fTtrack].Param().GetZ() * 100.f + mTracks[trackOrder[i].fTtrack].Param().GetY(); - } - std::sort(trackOrder, trackOrder + mCommonMem->nLocalTracks, SortComparison); // TODO: Check why this sorting affects the merging efficiency! - std::sort(trackOrder + mCommonMem->nLocalTracks, trackOrder + mCommonMem->nTracks, SortComparison); - - for (uint32_t iTrTmp = 0; iTrTmp < mCommonMem->nTracks; iTrTmp++) { - const int32_t iTr = trackOrder[iTrTmp].fTtrack; - GPUTPCTrack& iTrack = mTracks[iTr]; - - *out = iTrack; - int32_t nClu = 0; - int32_t iID = iTrack.FirstHitID(); - - for (int32_t ith = 0; ith < iTrack.NHits(); ith++) { - const GPUTPCHitId& ic = mTrackHits[iID + ith]; - int32_t iRow = ic.RowIndex(); - int32_t ih = ic.HitIndex(); - - const GPUTPCRow& row = mData.Row(iRow); - int32_t clusterIndex = mData.ClusterDataIndex(row, ih); -#ifdef GPUCA_ARRAY_BOUNDS_CHECKS - if (ih >= row.NHits() || ih < 0) { - GPUError("Array out of bounds access (Sector Row) (Hit %d / %d - NumC %d): Sector %d Row %d Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih); - fflush(stdout); - continue; - } - if (clusterIndex >= NHitsTotal() || clusterIndex < 0) { - GPUError("Array out of bounds access (Cluster Data) (Hit %d / %d - NumC %d): Sector %d Row %d Hit %d, Clusterdata Index %d", ith, iTrack.NHits(), NHitsTotal(), mISector, iRow, ih, clusterIndex); - fflush(stdout); - continue; - } -#endif - - float origX, origY, origZ; - uint8_t flags; - uint16_t amp; - int32_t id; - if (Param().par.earlyTpcTransform) { - origX = mData.ClusterData()[clusterIndex].x; - origY = mData.ClusterData()[clusterIndex].y; - origZ = mData.ClusterData()[clusterIndex].z; - flags = mData.ClusterData()[clusterIndex].flags; - amp = mData.ClusterData()[clusterIndex].amp; - id = mData.ClusterData()[clusterIndex].id; - } else { - const ClusterNativeAccess& cls = *mConstantMem->ioPtrs.clustersNative; - id = clusterIndex + cls.clusterOffset[mISector][0]; - GPUTPCConvertImpl::convert(*mConstantMem, mISector, iRow, cls.clustersLinear[id].getPad(), cls.clustersLinear[id].getTime(), origX, origY, origZ); - flags = cls.clustersLinear[id].getFlags(); - amp = cls.clustersLinear[id].qTot; - } - GPUTPCSectorOutCluster c; - c.Set(id, iRow, flags, amp, origX, origY, origZ); -#ifdef GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME - c.mPad = mData.ClusterData()[clusterIndex].pad; - c.mTime = mData.ClusterData()[clusterIndex].time; -#endif - out->SetOutTrackCluster(nClu, c); - nClu++; - } - - nStoredTracks++; - if (iTr < mCommonMem->nLocalTracks) { - nStoredLocalTracks++; - } - nStoredHits += nClu; - out->SetNHits(nClu); - out = out->NextTrack(); - } - delete[] trackOrder; - - mOutput->SetNTracks(nStoredTracks); - mOutput->SetNLocalTracks(nStoredLocalTracks); - mOutput->SetNTrackClusters(nStoredHits); - if (Param().par.debugLevel >= 3) { - GPUInfo("Sector %d, Output: Tracks %d, local tracks %d, hits %d", mISector, nStoredTracks, nStoredLocalTracks, nStoredHits); - } -} - #endif diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index 4a789b5adf6bf..e8aac872198f5 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -29,7 +29,6 @@ namespace o2::gpu { -class GPUTPCSectorOutput; struct GPUTPCClusterData; struct GPUParam; class GPUTPCTrack; @@ -50,8 +49,6 @@ class GPUTPCTracker : public GPUProcessor void InitializeRows(const GPUParam* param) { mData.InitializeRows(*param); } int32_t CheckEmptySector(); - void WriteOutputPrepare(); - void WriteOutput(); // Debugging Stuff void DumpTrackingData(std::ostream& out); // Dump Input Sector Data @@ -60,7 +57,6 @@ class GPUTPCTracker : public GPUProcessor void DumpHitWeights(std::ostream& out); //.... void DumpTrackHits(std::ostream& out); // Same for Track Hits void DumpTrackletHits(std::ostream& out); // Same for Track Hits - void DumpOutput(std::ostream& out); // Similar for output #endif struct StructGPUParameters { @@ -88,7 +84,6 @@ class GPUTPCTracker : public GPUProcessor return mData.ClusterData(); } GPUhdi() const GPUTPCRow& Row(const GPUTPCHitId& HitId) const { return mData.Row(HitId.RowIndex()); } - GPUhdi() GPUglobalref() GPUTPCSectorOutput* Output() const { return mOutput; } GPUhdni() GPUglobalref() commonMemoryStruct* CommonMemory() const { return (mCommonMem); @@ -268,10 +263,6 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCTrack* mTracks = nullptr; // reconstructed tracks GPUglobalref() GPUTPCHitId* mTrackHits = nullptr; // array of track hit numbers - // output - GPUglobalref() GPUTPCSectorOutput* mOutput; // address of pointer pointing to SectorOutput Object - void* mOutputMemory; // Pointer to output memory if stored internally - static int32_t StarthitSortComparison(const void* a, const void* b); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index ba1727fa602a4..7d83ff9abd91c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUTPCTracker.h" -#include "GPUTPCSectorOutput.h" #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" @@ -26,22 +25,6 @@ using namespace o2::gpu; -void GPUTPCTracker::DumpOutput(std::ostream& out) -{ - if (Param().par.earlyTpcTransform) { - out << "\nSector " << mISector << "\n"; - const GPUTPCTrack* track = (Output())->GetFirstTrack(); - for (uint32_t j = 0; j < (Output())->NTracks(); j++) { - out << "Track " << j << " (" << track->NHits() << "): "; - for (int32_t k = 0; k < track->NHits(); k++) { - out << "(" << track->OutTrackCluster(k).GetX() << "," << track->OutTrackCluster(k).GetY() << "," << track->OutTrackCluster(k).GetZ() << ") "; - } - out << " - (" << track->Param().Y() << " " << track->Param().Z() << " " << track->Param().SinPhi() << " " << track->Param().DzDs() << " " << track->Param().QPt() << "\n"; - track = track->GetNextTrack(); - } - } -} - void GPUTPCTracker::DumpTrackingData(std::ostream& out) { // Dump Sector Input Data to File diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 682e6913d58d4..d6279df7c9188 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -416,7 +416,6 @@ int32_t SetupReconstruction() } steps.outputs.clear(); - steps.outputs.setBits(GPUDataTypes::InOutType::TPCSectorTracks, false); steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TPCMerging)); steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUDataTypes::RecoStep::TPCCompression)); steps.outputs.setBits(GPUDataTypes::InOutType::TRDTracks, steps.steps.isSet(GPUDataTypes::RecoStep::TRDTracking)); From ee009a4291823f61960932bbfde88597e60a6d6d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 16:42:27 +0100 Subject: [PATCH 0206/1914] GPU Math: Make constexpr what possible --- GPU/Common/GPUCommonMath.h | 154 ++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 77 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index f4b9cd945799a..e977b3679a4ee 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -53,12 +53,12 @@ class GPUCommonMath GPUd() static float2 MakeFloat2(float x, float y); // TODO: Find better appraoch that is constexpr template - GPUhd() static T Min(const T x, const T y) + GPUhd() constexpr static T Min(const T x, const T y) { return GPUCA_CHOICE(std::min(x, y), min(x, y), min(x, y)); } template - GPUhd() static T Max(const T x, const T y) + GPUhd() constexpr static T Max(const T x, const T y) { return GPUCA_CHOICE(std::max(x, y), max(x, y), max(x, y)); } @@ -69,51 +69,51 @@ class GPUCommonMath template GPUd() static T MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, S refW, R& r); template - GPUdi() static T Clamp(const T v, const T lo, const T hi) + GPUdi() constexpr static T Clamp(const T v, const T lo, const T hi) { return Max(lo, Min(v, hi)); } - GPUhdni() static float Sqrt(float x); + GPUhdni() constexpr static float Sqrt(float x); GPUd() static float InvSqrt(float x); template - GPUhd() static T Abs(T x); - GPUd() static float ASin(float x); - GPUd() static float ACos(float x); - GPUd() static float ATan(float x); - GPUhd() static float ATan2(float y, float x); - GPUd() static float Sin(float x); - GPUd() static float Cos(float x); + GPUhd() constexpr static T Abs(T x); + GPUd() constexpr static float ASin(float x); + GPUd() constexpr static float ACos(float x); + GPUd() constexpr static float ATan(float x); + GPUhd() constexpr static float ATan2(float y, float x); + GPUd() constexpr static float Sin(float x); + GPUd() constexpr static float Cos(float x); GPUhdni() static void SinCos(float x, float& s, float& c); GPUhdni() static void SinCosd(double x, double& s, double& c); - GPUd() static float Tan(float x); - GPUd() static float Pow(float x, float y); - GPUd() static float Log(float x); - GPUd() static float Exp(float x); - GPUhdni() static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } - GPUd() static constexpr float TwoPi() { return 6.2831853f; } - GPUd() static constexpr float Pi() { return 3.1415927f; } - GPUd() static float Round(float x); - GPUd() static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } - GPUd() static uint32_t Float2UIntReint(const float& x); - GPUd() static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } - GPUd() static int32_t Float2IntRn(float x); - GPUd() static float Modf(float x, float y); - GPUd() static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } - GPUd() static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } - GPUd() static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } - GPUd() static uint32_t Clz(uint32_t val); - GPUd() static uint32_t Popcount(uint32_t val); + GPUd() constexpr static float Tan(float x); + GPUd() constexpr static float Pow(float x, float y); + GPUd() constexpr static float Log(float x); + GPUd() constexpr static float Exp(float x); + GPUhdni() constexpr static float Copysign(float x, float y) { return GPUCA_CHOICE(std::copysignf(x, y), copysignf(x, y), copysign(x, y)); } + GPUd() constexpr static float TwoPi() { return 6.2831853f; } + GPUd() constexpr static float Pi() { return 3.1415927f; } + GPUd() constexpr static float Round(float x); + GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } + GPUd() static uint32_t Float2UIntReint(float x); + GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } + GPUd() constexpr static int32_t Float2IntRn(float x); + GPUd() constexpr static float Modf(float x, float y); + GPUd() constexpr static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } + GPUd() constexpr static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } + GPUd() constexpr static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } + GPUd() constexpr static uint32_t Clz(uint32_t val); + GPUd() constexpr static uint32_t Popcount(uint32_t val); GPUd() static void memcpy(void* dst, const void* src, size_t size); - GPUhdi() static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } - GPUhdi() static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } - GPUhdi() static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } + GPUhdi() constexpr static float Hypot(float x, float y) { return Sqrt(x * x + y * y); } + GPUhdi() constexpr static float Hypot(float x, float y, float z) { return Sqrt(x * x + y * y + z * z); } + GPUhdi() constexpr static float Hypot(float x, float y, float z, float w) { return Sqrt(x * x + y * y + z * z + w * w); } template - GPUhd() static void Swap(T& a, T& b); + GPUhd() constexpr static void Swap(T& a, T& b); template GPUdi() static T AtomicExch(GPUglobalref() GPUgeneric() GPUAtomic(T) * addr, T val) @@ -162,14 +162,14 @@ class GPUCommonMath { GPUCommonMath::AtomicMinInternal(addr, val); } - GPUd() static int32_t Mul24(int32_t a, int32_t b); - GPUd() static float FMulRZ(float a, float b); + GPUd() constexpr static int32_t Mul24(int32_t a, int32_t b); + GPUd() constexpr static float FMulRZ(float a, float b); template GPUd() constexpr static T nextMultipleOf(T val); template - GPUhdni() static float Sum2(float w, Args... args); + GPUhdni() constexpr static float Sum2(float w, Args... args); private: template @@ -187,7 +187,7 @@ class GPUCommonMath typedef GPUCommonMath CAMath; template -GPUhdi() float GPUCommonMath::Sum2(float w, Args... args) +GPUhdi() constexpr float GPUCommonMath::Sum2(float w, Args... args) { if constexpr (sizeof...(Args) == 0) { return w * w; @@ -239,9 +239,9 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) #endif // GPUCA_GPUCODE } -GPUdi() float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } +GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } -GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) +GPUdi() uint32_t GPUCommonMath::Float2UIntReint(float x) { #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); @@ -253,37 +253,37 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) } #ifdef GPUCA_NO_FAST_MATH -GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } -GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } -GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } +GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } +GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } +GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } +GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE((float)atan((double)x), (float)atan((double)x), atan(x)); } +GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE((float)atan2((double)y, (double)x), (float)atan2((double)y, (double)x), atan2(y, x)); } +GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE((float)sin((double)x), (float)sin((double)x), sin(x)); } +GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE((float)cos((double)x), (float)cos((double)x), cos(x)); } +GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE((float)tanf((double)x), (float)tanf((double)x), tan(x)); } +GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE((float)pow((double)x, (double)y), pow((double)x, (double)y), pow(x, y)); } +GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((float)asin((double)x), (float)asin((double)x), asin(x)); } +GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } +GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } +GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } +GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } +GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } #else -GPUdi() float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } -GPUdi() int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } -GPUhdi() float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } -GPUdi() float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } -GPUhdi() float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } -GPUdi() float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } -GPUdi() float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } -GPUdi() float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } -GPUdi() float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } -GPUdi() float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } -GPUdi() float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } -GPUdi() float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } -GPUdi() float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } -GPUdi() bool GPUCommonMath::FiniteRelaxed(float x) { return true; } -GPUdi() bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } +GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } +GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } +GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } +GPUdi() constexpr float GPUCommonMath::ATan(float x) { return GPUCA_CHOICE(atanf(x), atanf(x), atan(x)); } +GPUhdi() constexpr float GPUCommonMath::ATan2(float y, float x) { return GPUCA_CHOICE(atan2f(y, x), atan2f(y, x), atan2(y, x)); } +GPUdi() constexpr float GPUCommonMath::Sin(float x) { return GPUCA_CHOICE(sinf(x), sinf(x), sin(x)); } +GPUdi() constexpr float GPUCommonMath::Cos(float x) { return GPUCA_CHOICE(cosf(x), cosf(x), cos(x)); } +GPUdi() constexpr float GPUCommonMath::Tan(float x) { return GPUCA_CHOICE(tanf(x), tanf(x), tan(x)); } +GPUdi() constexpr float GPUCommonMath::Pow(float x, float y) { return GPUCA_CHOICE(powf(x, y), powf(x, y), pow(x, y)); } +GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf(x), asinf(x), asin(x)); } +GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } +GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } +GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } +GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return true; } +GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) @@ -311,7 +311,7 @@ GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) #endif } -GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) +GPUdi() constexpr uint32_t GPUCommonMath::Clz(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) return x == 0 ? 32 : GPUCA_CHOICE(__builtin_clz(x), __clz(x), __builtin_clz(x)); // use builtin if available @@ -325,7 +325,7 @@ GPUdi() uint32_t GPUCommonMath::Clz(uint32_t x) #endif } -GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) +GPUdi() constexpr uint32_t GPUCommonMath::Popcount(uint32_t x) { #if (defined(__GNUC__) || defined(__clang__) || defined(__CUDACC__) || defined(__HIPCC__)) && !defined(__OPENCL__) // TODO: remove OPENCL when reported SPIR-V bug is fixed // use builtin if available @@ -338,7 +338,7 @@ GPUdi() uint32_t GPUCommonMath::Popcount(uint32_t x) } template -GPUhdi() void GPUCommonMath::Swap(T& a, T& b) +GPUhdi() constexpr void GPUCommonMath::Swap(T& a, T& b) { #ifndef GPUCA_GPUCODE_DEVICE std::swap(a, b); @@ -413,19 +413,19 @@ GPUdi() float GPUCommonMath::InvSqrt(float _x) } template <> -GPUhdi() float GPUCommonMath::Abs(float x) +GPUhdi() constexpr float GPUCommonMath::Abs(float x) { return GPUCA_CHOICE(fabsf(x), fabsf(x), fabs(x)); } template <> -GPUhdi() double GPUCommonMath::Abs(double x) +GPUhdi() constexpr double GPUCommonMath::Abs(double x) { return GPUCA_CHOICE(fabs(x), fabs(x), fabs(x)); } template <> -GPUhdi() int32_t GPUCommonMath::Abs(int32_t x) +GPUhdi() constexpr int32_t GPUCommonMath::Abs(int32_t x) { return GPUCA_CHOICE(abs(x), abs(x), abs(x)); } From 938108f38d43deaf499bc2fe96cd765b4288825a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 17:09:16 +0100 Subject: [PATCH 0207/1914] GPU TPC: Make GPUTPCGeometry fully constexpr --- GPU/GPUTracking/DataTypes/GPUTPCGeometry.h | 166 +++++++++--------- .../TPCClusterFinder/ClusterAccumulator.h | 2 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 2 - 3 files changed, 86 insertions(+), 84 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h index 461ac9366ca23..9ad83bff363ac 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGeometry.h @@ -28,119 +28,123 @@ namespace o2::gpu { -// Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper -// Should be unified, but cannot take the contants from the official headers for now, since we want it to be constexpr -class GPUTPCGeometry // TODO: Make values constexpr +// Copy of TPC constants from AliRoot:TPCGeometry / O2:TPC/Base/Mapper, cannot take the contants from the official headers for now, since we want it to be const / constexpr + +namespace gputpcgeometry_internal { #ifdef GPUCA_TPC_GEOMETRY_O2 - const float mX[GPUCA_ROW_COUNT] = {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, - 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, - 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, - 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, - 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, - 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, - 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}; - - const uint8_t mNPads[GPUCA_ROW_COUNT] = {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, - 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, - 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, - 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}; - - const uint8_t mRegion[GPUCA_ROW_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; - const uint8_t mRegionRows[10] = {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}; - const uint8_t mRegionStart[10] = {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}; - - const uint8_t mSampaMapping[10] = {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}; - const uint8_t mChannelOffset[10] = {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}; - const uint8_t mSectorFECOffset[5] = {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}; - - const float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; - const float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; - - static constexpr float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! - - public: - GPUd() int32_t GetRegion(int32_t row) const { return mRegion[row]; } - GPUd() int32_t GetRegionRows(int32_t region) const { return mRegionRows[region]; } - GPUd() int32_t GetRegionStart(int32_t region) const { return mRegionStart[region]; } - GPUd() int32_t GetSampaMapping(int32_t region) const { return mSampaMapping[region]; } - GPUd() int32_t GetChannelOffset(int32_t region) const { return mChannelOffset[region]; } - GPUd() int32_t GetSectorFECOffset(int32_t partition) const { return mSectorFECOffset[partition]; } - GPUd() int32_t GetROC(int32_t row) const { return row < 97 ? (row < 63 ? 0 : 1) : (row < 127 ? 2 : 3); } - GPUd() int32_t EndIROC() const { return 63; } - GPUd() int32_t EndOROC1() const { return 97; } - GPUd() int32_t EndOROC2() const { return 127; } +GPUconstexpr() float mX[GPUCA_ROW_COUNT] = {85.225f, 85.975f, 86.725f, 87.475f, 88.225f, 88.975f, 89.725f, 90.475f, 91.225f, 91.975f, 92.725f, 93.475f, 94.225f, 94.975f, 95.725f, 96.475f, 97.225f, 97.975f, 98.725f, 99.475f, 100.225f, 100.975f, + 101.725f, 102.475f, 103.225f, 103.975f, 104.725f, 105.475f, 106.225f, 106.975f, 107.725f, 108.475f, 109.225f, 109.975f, 110.725f, 111.475f, 112.225f, 112.975f, 113.725f, 114.475f, 115.225f, 115.975f, 116.725f, 117.475f, + 118.225f, 118.975f, 119.725f, 120.475f, 121.225f, 121.975f, 122.725f, 123.475f, 124.225f, 124.975f, 125.725f, 126.475f, 127.225f, 127.975f, 128.725f, 129.475f, 130.225f, 130.975f, 131.725f, 135.2f, 136.2f, 137.2f, + 138.2f, 139.2f, 140.2f, 141.2f, 142.2f, 143.2f, 144.2f, 145.2f, 146.2f, 147.2f, 148.2f, 149.2f, 150.2f, 151.2f, 152.2f, 153.2f, 154.2f, 155.2f, 156.2f, 157.2f, 158.2f, 159.2f, + 160.2f, 161.2f, 162.2f, 163.2f, 164.2f, 165.2f, 166.2f, 167.2f, 168.2f, 171.4f, 172.6f, 173.8f, 175.f, 176.2f, 177.4f, 178.6f, 179.8f, 181.f, 182.2f, 183.4f, 184.6f, 185.8f, + 187.f, 188.2f, 189.4f, 190.6f, 191.8f, 193.f, 194.2f, 195.4f, 196.6f, 197.8f, 199.f, 200.2f, 201.4f, 202.6f, 203.8f, 205.f, 206.2f, 209.65f, 211.15f, 212.65f, 214.15f, 215.65f, + 217.15f, 218.65f, 220.15f, 221.65f, 223.15f, 224.65f, 226.15f, 227.65f, 229.15f, 230.65f, 232.15f, 233.65f, 235.15f, 236.65f, 238.15f, 239.65f, 241.15f, 242.65f, 244.15f, 245.65f}; + +GPUconstexpr() uint8_t mNPads[GPUCA_ROW_COUNT] = {66, 66, 66, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, + 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 92, 92, 92, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, + 82, 84, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 102, 102, 102, 104, 104, 104, 106, 110, + 110, 112, 112, 112, 114, 114, 114, 116, 116, 116, 118, 118, 118, 118, 118, 120, 120, 122, 122, 124, 124, 124, 126, 126, 128, 128, 128, 130, 130, 132, 132, 132, 134, 134, 136, 136, 138, 138}; + +GPUconstexpr() uint8_t mRegion[GPUCA_ROW_COUNT] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}; +GPUconstexpr() uint8_t mRegionRows[10] = {17, 15, 16, 15, 18, 16, 16, 14, 13, 12}; +GPUconstexpr() uint8_t mRegionStart[10] = {0, 17, 32, 48, 63, 81, 97, 113, 127, 140}; + +GPUconstexpr() uint8_t mSampaMapping[10] = {0, 0, 1, 1, 2, 3, 3, 4, 4, 2}; +GPUconstexpr() uint8_t mChannelOffset[10] = {0, 16, 0, 16, 0, 0, 16, 0, 16, 16}; +GPUconstexpr() uint8_t mSectorFECOffset[5] = {0, 15, 15 + 18, 15 + 18 + 18, 15 + 18 + 18 + 20}; + +GPUconstexpr() float mPadHeight[10] = {.75f, .75f, .75f, .75f, 1.f, 1.f, 1.2f, 1.2f, 1.5f, 1.5f}; +GPUconstexpr() float mPadWidth[10] = {.416f, .420f, .420f, .436f, .6f, .6f, .608f, .588f, .604f, .607f}; + +constexpr float FACTOR_T2Z = 250.f / 512.f; // Used in compression, must remain constant at 250cm, 512 time bins! #else - const float mX[GPUCA_ROW_COUNT] = {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, - 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, - 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, - 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, - 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, - 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, - 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}; - - const uint8_t mNPads[GPUCA_ROW_COUNT] = {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, - 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, - 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, - 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}; - - const float mPadHeight[3] = {.75f, 1.f, 1.5f}; - const float mPadWidth[3] = {.4f, .6f, .6f}; +GPUconstexpr() float mX[GPUCA_ROW_COUNT] = {85.195f, 85.945f, 86.695f, 87.445f, 88.195f, 88.945f, 89.695f, 90.445f, 91.195f, 91.945f, 92.695f, 93.445f, 94.195f, 94.945f, 95.695f, 96.445f, 97.195f, 97.945f, 98.695f, 99.445f, 100.195f, 100.945f, 101.695f, + 102.445f, 103.195f, 103.945f, 104.695f, 105.445f, 106.195f, 106.945f, 107.695f, 108.445f, 109.195f, 109.945f, 110.695f, 111.445f, 112.195f, 112.945f, 113.695f, 114.445f, 115.195f, 115.945f, 116.695f, 117.445f, 118.195f, 118.945f, + 119.695f, 120.445f, 121.195f, 121.945f, 122.695f, 123.445f, 124.195f, 124.945f, 125.695f, 126.445f, 127.195f, 127.945f, 128.695f, 129.445f, 130.195f, 130.945f, 131.695f, 135.180f, 136.180f, 137.180f, 138.180f, 139.180f, 140.180f, + 141.180f, 142.180f, 143.180f, 144.180f, 145.180f, 146.180f, 147.180f, 148.180f, 149.180f, 150.180f, 151.180f, 152.180f, 153.180f, 154.180f, 155.180f, 156.180f, 157.180f, 158.180f, 159.180f, 160.180f, 161.180f, 162.180f, 163.180f, + 164.180f, 165.180f, 166.180f, 167.180f, 168.180f, 169.180f, 170.180f, 171.180f, 172.180f, 173.180f, 174.180f, 175.180f, 176.180f, 177.180f, 178.180f, 179.180f, 180.180f, 181.180f, 182.180f, 183.180f, 184.180f, 185.180f, 186.180f, + 187.180f, 188.180f, 189.180f, 190.180f, 191.180f, 192.180f, 193.180f, 194.180f, 195.180f, 196.180f, 197.180f, 198.180f, 199.430f, 200.930f, 202.430f, 203.930f, 205.430f, 206.930f, 208.430f, 209.930f, 211.430f, 212.930f, 214.430f, + 215.930f, 217.430f, 218.930f, 220.430f, 221.930f, 223.430f, 224.930f, 226.430f, 227.930f, 229.430f, 230.930f, 232.430f, 233.930f, 235.430f, 236.930f, 238.430f, 239.930f, 241.430f, 242.930f, 244.430f, 245.930f}; + +GPUconstexpr() uint8_t mNPads[GPUCA_ROW_COUNT] = {68, 68, 68, 68, 70, 70, 70, 72, 72, 72, 74, 74, 74, 76, 76, 76, 78, 78, 78, 80, 80, 80, 82, 82, 82, 84, 84, 84, 86, 86, 86, 88, 88, 88, 90, 90, 90, 92, 92, 92, + 94, 94, 94, 96, 96, 96, 98, 98, 98, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 108, 108, 74, 76, 76, 76, 76, 78, 78, 78, 80, 80, 80, 80, 82, 82, 82, 84, 84, + 84, 86, 86, 86, 86, 88, 88, 88, 90, 90, 90, 90, 92, 92, 92, 94, 94, 94, 96, 96, 96, 96, 98, 98, 98, 100, 100, 100, 100, 102, 102, 102, 104, 104, 104, 106, 106, 106, 106, 108, + 108, 108, 110, 110, 110, 110, 112, 112, 114, 114, 114, 116, 116, 118, 118, 120, 120, 122, 122, 122, 124, 124, 126, 126, 128, 128, 130, 130, 130, 132, 132, 134, 134, 136, 136, 138, 138, 138, 140}; + +GPUconstexpr() float mPadHeight[3] = {.75f, 1.f, 1.5f}; +GPUconstexpr() float mPadWidth[3] = {.4f, .6f, .6f}; + +constexpr float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! +#endif +} // namespace gputpcgeometry_internal - static constexpr float FACTOR_T2Z = 250.f / 1024.f; // Used in compression, must remain constant at 250cm, 1024 time bins! +class GPUTPCGeometry // TODO: Make values constexpr +{ + static constexpr float FACTOR_Z2T = 1.f / gputpcgeometry_internal::FACTOR_T2Z; public: - GPUd() int32_t GetRegion(int32_t row) const { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } - GPUd() int32_t GetRegionRows(int32_t region) const { return 0; } // dummy - GPUd() int32_t GetRegionStart(int32_t region) const { return 0; } // dummy - GPUd() int32_t GetROC(int32_t row) const { return GetRegion(row); } - GPUd() int32_t EndIROC() const { return 63; } - GPUd() int32_t EndOROC1() const { return 63 + 64; } - GPUd() int32_t EndOROC2() const { return GPUCA_ROW_COUNT; } +#ifdef GPUCA_TPC_GEOMETRY_O2 + GPUd() static constexpr int32_t GetRegion(int32_t row) { return gputpcgeometry_internal::mRegion[row]; } + GPUd() static constexpr int32_t GetRegionRows(int32_t region) { return gputpcgeometry_internal::mRegionRows[region]; } + GPUd() static constexpr int32_t GetRegionStart(int32_t region) { return gputpcgeometry_internal::mRegionStart[region]; } + GPUd() static constexpr int32_t GetSampaMapping(int32_t region) { return gputpcgeometry_internal::mSampaMapping[region]; } + GPUd() static constexpr int32_t GetChannelOffset(int32_t region) { return gputpcgeometry_internal::mChannelOffset[region]; } + GPUd() static constexpr int32_t GetSectorFECOffset(int32_t partition) { return gputpcgeometry_internal::mSectorFECOffset[partition]; } + GPUd() static constexpr int32_t GetROC(int32_t row) { return row < 97 ? (row < 63 ? 0 : 1) : (row < 127 ? 2 : 3); } + GPUd() static constexpr int32_t EndIROC() { return 63; } + GPUd() static constexpr int32_t EndOROC1() { return 97; } + GPUd() static constexpr int32_t EndOROC2() { return 127; } +#else + GPUd() static constexpr int32_t GetRegion(int32_t row) { return (row < 63 ? 0 : row < 63 + 64 ? 1 : 2); } + GPUd() static constexpr int32_t GetRegionRows(int32_t region) { return 0; } // dummy + GPUd() static constexpr int32_t GetRegionStart(int32_t region) { return 0; } // dummy + GPUd() static constexpr int32_t GetROC(int32_t row) { return GetRegion(row); } + GPUd() static constexpr int32_t EndIROC() { return 63; } + GPUd() static constexpr int32_t EndOROC1() { return 63 + 64; } + GPUd() static constexpr int32_t EndOROC2() { return GPUCA_ROW_COUNT; } #endif - private: - static constexpr float FACTOR_Z2T = 1.f / FACTOR_T2Z; - public: GPUd() static constexpr float TPCLength() { return 250.f - 0.275f; } - GPUd() float Row2X(int32_t row) const { return (mX[row]); } - GPUd() float PadHeight(int32_t row) const { return (mPadHeight[GetRegion(row)]); } - GPUd() float PadHeightByRegion(int32_t region) const { return (mPadHeight[region]); } - GPUd() float PadWidth(int32_t row) const { return (mPadWidth[GetRegion(row)]); } - GPUd() uint8_t NPads(int32_t row) const { return mNPads[row]; } + GPUd() static constexpr float Row2X(int32_t row) { return (gputpcgeometry_internal::mX[row]); } + GPUd() static constexpr float PadHeight(int32_t row) { return (gputpcgeometry_internal::mPadHeight[GetRegion(row)]); } + GPUd() static constexpr float PadHeightByRegion(int32_t region) { return (gputpcgeometry_internal::mPadHeight[region]); } + GPUd() static constexpr float PadWidth(int32_t row) { return (gputpcgeometry_internal::mPadWidth[GetRegion(row)]); } + GPUd() static constexpr uint8_t NPads(int32_t row) { return gputpcgeometry_internal::mNPads[row]; } - GPUd() float LinearPad2Y(int32_t sector, int32_t row, float pad) const + GPUd() static constexpr float LinearPad2Y(int32_t sector, int32_t row, float pad) { #ifdef GPUCA_TPC_GEOMETRY_O2 - const float u = (pad - 0.5f * (mNPads[row] - 1)) * PadWidth(row); + const float u = (pad - 0.5f * (gputpcgeometry_internal::mNPads[row] - 1)) * PadWidth(row); #else - const float u = (pad - 0.5f * mNPads[row]) * PadWidth(row); + const float u = (pad - 0.5f * gputpcgeometry_internal::mNPads[row]) * PadWidth(row); #endif return (sector >= GPUCA_NSECTORS / 2) ? -u : u; } - GPUd() static float LinearTime2Z(int32_t sector, float time) + GPUd() static constexpr float LinearTime2Z(int32_t sector, float time) { - const float v = 250.f - time * FACTOR_T2Z; // Used in compression, must remain constant at 250cm! + const float v = 250.f - time * gputpcgeometry_internal::FACTOR_T2Z; // Used in compression, must remain constant at 250cm! return (sector >= GPUCA_NSECTORS / 2) ? -v : v; } - GPUd() float LinearY2Pad(int32_t sector, int32_t row, float y) const + GPUd() static constexpr float LinearY2Pad(int32_t sector, int32_t row, float y) { const float u = (sector >= GPUCA_NSECTORS / 2) ? -y : y; #ifdef GPUCA_TPC_GEOMETRY_O2 - return u / PadWidth(row) + 0.5f * (mNPads[row] - 1); + return u / PadWidth(row) + 0.5f * (gputpcgeometry_internal::mNPads[row] - 1); #else - return u / PadWidth(row) + 0.5f * mNPads[row]; + return u / PadWidth(row) + 0.5f * gputpcgeometry_internal::mNPads[row]; #endif } - GPUd() static float LinearZ2Time(int32_t sector, float z) + GPUd() static constexpr float LinearZ2Time(int32_t sector, float z) { const float v = (sector >= GPUCA_NSECTORS / 2) ? -z : z; return (250.f - v) * FACTOR_Z2T; // Used in compression, must remain constant at 250cm } }; + } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 73f7cb439775a..b8c2bd81d8b37 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -31,8 +31,8 @@ namespace gpu { struct ChargePos; -class GPUTPCGeometry; struct GPUParam; +class GPUTPCGeometry; class ClusterAccumulator { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index f59102aa6b5c3..3e9ea2c6f608b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -48,8 +48,6 @@ struct TPCPadGainCalib; struct ChargePos; -class GPUTPCGeometry; - class GPUTPCClusterFinder : public GPUProcessor { public: From 5e8b7b5b23362a0036df67ae6b8e52296de5573c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 12 Mar 2025 23:50:11 +0100 Subject: [PATCH 0208/1914] GPU Math: Revert some changes which are UB --- GPU/Common/GPUCommonMath.h | 18 ++++++++---------- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index e977b3679a4ee..c412662fc0c64 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -94,14 +94,12 @@ class GPUCommonMath GPUd() constexpr static float Pi() { return 3.1415927f; } GPUd() constexpr static float Round(float x); GPUd() constexpr static float Floor(float x) { return GPUCA_CHOICE(floorf(x), floorf(x), floor(x)); } - GPUd() static uint32_t Float2UIntReint(float x); + GPUd() static uint32_t Float2UIntReint(const float& x); GPUd() constexpr static uint32_t Float2UIntRn(float x) { return (uint32_t)(int32_t)(x + 0.5f); } GPUd() constexpr static int32_t Float2IntRn(float x); GPUd() constexpr static float Modf(float x, float y); - GPUd() constexpr static bool Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } - GPUd() constexpr static bool IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } - GPUd() constexpr static bool FiniteRelaxed(float x); // always true if not using NO_FAST_MATH - GPUd() constexpr static bool IsNaNRelaxed(float x); // always true if not using NO_FAST_MATH + GPUd() constexpr static bool Finite(float x); + GPUd() constexpr static bool IsNaN(float x); GPUd() constexpr static float QuietNaN() { return GPUCA_CHOICE(std::numeric_limits::quiet_NaN(), __builtin_nanf(""), nan(0u)); } GPUd() constexpr static uint32_t Clz(uint32_t val); GPUd() constexpr static uint32_t Popcount(uint32_t val); @@ -241,7 +239,7 @@ GPUdi() float2 GPUCommonMath::MakeFloat2(float x, float y) GPUdi() constexpr float GPUCommonMath::Modf(float x, float y) { return GPUCA_CHOICE(fmodf(x, y), fmodf(x, y), fmod(x, y)); } -GPUdi() uint32_t GPUCommonMath::Float2UIntReint(float x) +GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) { #if defined(GPUCA_GPUCODE_DEVICE) && (defined(__CUDACC__) || defined(__HIPCC__)) return __float_as_uint(x); @@ -266,8 +264,8 @@ GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE((floa GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE((float)acos((double)x), (float)acos((double)x), acos(x)); } GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float)log((double)x), (float)log((double)x), log(x)); } GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } -GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return Finite(x); } -GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return IsNaN(x); } +GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } +GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } #else GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } @@ -282,8 +280,8 @@ GPUdi() constexpr float GPUCommonMath::ASin(float x) { return GPUCA_CHOICE(asinf GPUdi() constexpr float GPUCommonMath::ACos(float x) { return GPUCA_CHOICE(acosf(x), acosf(x), acos(x)); } GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x), logf(x), log(x)); } GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } -GPUdi() constexpr bool GPUCommonMath::FiniteRelaxed(float x) { return true; } -GPUdi() constexpr bool GPUCommonMath::IsNaNRelaxed(float x) { return false; } +GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; } +GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } #endif GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1c61316ed454e..b9620b9385c73 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -51,7 +51,7 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang endif() set(CMAKE_BUILD_TYPE DEBUG) else() From c3d005fe4386ec4b9da41bd63c75d1abb3b5353c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 09:35:43 +0100 Subject: [PATCH 0209/1914] GPU TPC CF: Split clusterizer CXX functions out into .inc file to be used externally --- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 231 +--------------- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 8 +- .../TPCClusterFinder/GPUTPCCFClusterizer.inc | 247 ++++++++++++++++++ 3 files changed, 253 insertions(+), 233 deletions(-) create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 407deb6a588d0..2131347decec6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -25,6 +25,8 @@ using namespace o2::gpu; using namespace o2::gpu::tpccf; +#include "GPUTPCCFClusterizer.inc" + template <> GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC) { @@ -35,232 +37,3 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); } - -GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, - processorType& clusterer, - const CfFragment& fragment, - GPUSharedMemory& smem, - const Array2D& chargeMap, - const ChargePos* filteredPeakPositions, - const GPUSettingsRec& calib, - MCLabelAccumulator* labelAcc, - uint32_t clusternum, - uint32_t maxClusterPerRow, - uint32_t* clusterInRow, - tpc::ClusterNative* clusterByRow, - uint32_t* clusterPosInRow) -{ - uint32_t idx = get_global_id(0); - - // For certain configurations dummy work items are added, so the total - // number of work items is dividable by 64. - // These dummy items also compute the last cluster but discard the result. - ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; - Charge charge = chargeMap[pos].unpack(); - - ClusterAccumulator pc; - CPU_ONLY(labelAcc->collect(pos, charge)); - - buildCluster( - calib, - chargeMap, - pos, - smem.posBcast, - smem.buf, - smem.innerAboveThreshold, - &pc, - labelAcc); - - if (idx >= clusternum) { - return; - } - if (fragment.isOverlap(pos.time())) { - if (clusterPosInRow) { - clusterPosInRow[idx] = maxClusterPerRow; - } - return; - } - tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); - - if (rejectCluster) { - if (clusterPosInRow) { - clusterPosInRow[idx] = maxClusterPerRow; - } - return; - } - - uint32_t rowIndex = 0; - if (clusterByRow != nullptr) { - rowIndex = sortIntoBuckets( - clusterer, - myCluster, - pos.row(), - maxClusterPerRow, - clusterInRow, - clusterByRow); - if (clusterPosInRow != nullptr) { - clusterPosInRow[idx] = rowIndex; - } - } else if (clusterPosInRow) { - rowIndex = clusterPosInRow[idx]; - } - - CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow)); -} - -GPUdii() void GPUTPCCFClusterizer::updateClusterInner( - const GPUSettingsRec& calib, - uint16_t lid, - uint16_t N, - const PackedCharge* buf, - const ChargePos& pos, - ClusterAccumulator* cluster, - MCLabelAccumulator* labelAcc, - uint8_t* innerAboveThreshold) -{ - uint8_t aboveThreshold = 0; - - GPUCA_UNROLL(U(), U()) - for (uint16_t i = 0; i < N; i++) { - Delta2 d = cfconsts::InnerNeighbors[i]; - - PackedCharge p = buf[N * lid + i]; - - Charge q = cluster->updateInner(p, d); - - CPU_ONLY(labelAcc->collect(pos.delta(d), q)); - - aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i); - } - - innerAboveThreshold[lid] = aboveThreshold; - - GPUbarrier(); -} - -GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( - uint16_t lid, - uint16_t N, - uint16_t M, - uint16_t offset, - const PackedCharge* buf, - const ChargePos& pos, - ClusterAccumulator* cluster, - MCLabelAccumulator* labelAcc) -{ - GPUCA_UNROLL(U(), U()) - for (uint16_t i = offset; i < M + offset; i++) { - PackedCharge p = buf[N * lid + i]; - - Delta2 d = cfconsts::OuterNeighbors[i]; - - Charge q = cluster->updateOuter(p, d); - static_cast(q); // Avoid unused varible warning on GPU. - - CPU_ONLY(labelAcc->collect(pos.delta(d), q)); - } -} - -GPUdii() void GPUTPCCFClusterizer::buildCluster( - const GPUSettingsRec& calib, - const Array2D& chargeMap, - ChargePos pos, - ChargePos* posBcast, - PackedCharge* buf, - uint8_t* innerAboveThreshold, - ClusterAccumulator* myCluster, - MCLabelAccumulator* labelAcc) -{ - uint16_t ll = get_local_id(0); - - posBcast[ll] = pos; - GPUbarrier(); - - CfUtils::blockLoad( - chargeMap, - SCRATCH_PAD_WORK_GROUP_SIZE, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 8, - cfconsts::InnerNeighbors, - posBcast, - buf); - updateClusterInner( - calib, - ll, - 8, - buf, - pos, - myCluster, - labelAcc, - innerAboveThreshold); - - uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2; - - bool inGroup1 = ll < wgSizeHalf; - - uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf); - - CfUtils::condBlockLoad( - chargeMap, - wgSizeHalf, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 16, - cfconsts::OuterNeighbors, - posBcast, - innerAboveThreshold, - buf); - - if (inGroup1) { - updateClusterOuter( - llhalf, - 16, - 16, - 0, - buf, - pos, - myCluster, - labelAcc); - } - -#if defined(GPUCA_GPUCODE) - CfUtils::condBlockLoad( - chargeMap, - wgSizeHalf, - SCRATCH_PAD_WORK_GROUP_SIZE, - ll, - 0, - 16, - cfconsts::OuterNeighbors, - posBcast + wgSizeHalf, - innerAboveThreshold + wgSizeHalf, - buf); - if (!inGroup1) { - updateClusterOuter( - llhalf, - 16, - 16, - 0, - buf, - pos, - myCluster, - labelAcc); - } -#endif -} - -GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets) -{ - uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u); - if (index < maxElemsPerBucket) { - buckets[maxElemsPerBucket * row + index] = cluster; - } else { - clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); - CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); - } - return index; -} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index 411c38c39459e..79f3325ed9ad2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -59,14 +59,14 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); + static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); + + static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); + private: static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*); - - static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); - - static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc new file mode 100644 index 0000000000000..c6d7a3b68c0a8 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -0,0 +1,247 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCCFClusterizer.cxx +/// \author Felix Weiglhofer + +#ifndef O2_GPU_CLUSTERIZER_INC_H +#define O2_GPU_CLUSTERIZER_INC_H + +GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, + processorType& clusterer, + const CfFragment& fragment, + GPUSharedMemory& smem, + const Array2D& chargeMap, + const ChargePos* filteredPeakPositions, + const GPUSettingsRec& calib, + MCLabelAccumulator* labelAcc, + uint32_t clusternum, + uint32_t maxClusterPerRow, + uint32_t* clusterInRow, + tpc::ClusterNative* clusterByRow, + uint32_t* clusterPosInRow) +{ + uint32_t idx = get_global_id(0); + + // For certain configurations dummy work items are added, so the total + // number of work items is dividable by 64. + // These dummy items also compute the last cluster but discard the result. + ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; + Charge charge = chargeMap[pos].unpack(); + + ClusterAccumulator pc; + CPU_ONLY(labelAcc->collect(pos, charge)); + + buildCluster( + calib, + chargeMap, + pos, + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &pc, + labelAcc); + + if (idx >= clusternum) { + return; + } + if (fragment.isOverlap(pos.time())) { + if (clusterPosInRow) { + clusterPosInRow[idx] = maxClusterPerRow; + } + return; + } + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); + + if (rejectCluster) { + if (clusterPosInRow) { + clusterPosInRow[idx] = maxClusterPerRow; + } + return; + } + + uint32_t rowIndex = 0; + if (clusterByRow != nullptr) { + rowIndex = sortIntoBuckets( + clusterer, + myCluster, + pos.row(), + maxClusterPerRow, + clusterInRow, + clusterByRow); + if (clusterPosInRow != nullptr) { + clusterPosInRow[idx] = rowIndex; + } + } else if (clusterPosInRow) { + rowIndex = clusterPosInRow[idx]; + } + + CPU_ONLY(labelAcc->commit(pos.row(), rowIndex, maxClusterPerRow)); +} + +GPUdii() void GPUTPCCFClusterizer::updateClusterInner( + const GPUSettingsRec& calib, + uint16_t lid, + uint16_t N, + const PackedCharge* buf, + const ChargePos& pos, + ClusterAccumulator* cluster, + MCLabelAccumulator* labelAcc, + uint8_t* innerAboveThreshold) +{ + uint8_t aboveThreshold = 0; + + GPUCA_UNROLL(U(), U()) + for (uint16_t i = 0; i < N; i++) { + Delta2 d = cfconsts::InnerNeighbors[i]; + + PackedCharge p = buf[N * lid + i]; + + Charge q = cluster->updateInner(p, d); + + CPU_ONLY(labelAcc->collect(pos.delta(d), q)); + + aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i); + } + + innerAboveThreshold[lid] = aboveThreshold; + + GPUbarrier(); +} + +GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( + uint16_t lid, + uint16_t N, + uint16_t M, + uint16_t offset, + const PackedCharge* buf, + const ChargePos& pos, + ClusterAccumulator* cluster, + MCLabelAccumulator* labelAcc) +{ + GPUCA_UNROLL(U(), U()) + for (uint16_t i = offset; i < M + offset; i++) { + PackedCharge p = buf[N * lid + i]; + + Delta2 d = cfconsts::OuterNeighbors[i]; + + Charge q = cluster->updateOuter(p, d); + static_cast(q); // Avoid unused varible warning on GPU. + + CPU_ONLY(labelAcc->collect(pos.delta(d), q)); + } +} + +GPUdii() void GPUTPCCFClusterizer::buildCluster( + const GPUSettingsRec& calib, + const Array2D& chargeMap, + ChargePos pos, + ChargePos* posBcast, + PackedCharge* buf, + uint8_t* innerAboveThreshold, + ClusterAccumulator* myCluster, + MCLabelAccumulator* labelAcc) +{ + uint16_t ll = get_local_id(0); + + posBcast[ll] = pos; + GPUbarrier(); + + CfUtils::blockLoad( + chargeMap, + SCRATCH_PAD_WORK_GROUP_SIZE, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 8, + cfconsts::InnerNeighbors, + posBcast, + buf); + updateClusterInner( + calib, + ll, + 8, + buf, + pos, + myCluster, + labelAcc, + innerAboveThreshold); + + uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2; + + bool inGroup1 = ll < wgSizeHalf; + + uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf); + + CfUtils::condBlockLoad( + chargeMap, + wgSizeHalf, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 16, + cfconsts::OuterNeighbors, + posBcast, + innerAboveThreshold, + buf); + + if (inGroup1) { + updateClusterOuter( + llhalf, + 16, + 16, + 0, + buf, + pos, + myCluster, + labelAcc); + } + +#if defined(GPUCA_GPUCODE) + CfUtils::condBlockLoad( + chargeMap, + wgSizeHalf, + SCRATCH_PAD_WORK_GROUP_SIZE, + ll, + 0, + 16, + cfconsts::OuterNeighbors, + posBcast + wgSizeHalf, + innerAboveThreshold + wgSizeHalf, + buf); + if (!inGroup1) { + updateClusterOuter( + llhalf, + 16, + 16, + 0, + buf, + pos, + myCluster, + labelAcc); + } +#endif +} + +GPUd() uint32_t GPUTPCCFClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets) +{ + uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u); + if (index < maxElemsPerBucket) { + buckets[maxElemsPerBucket * row + index] = cluster; + } else { + clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISector * 1000 + row, index, maxElemsPerBucket); + CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket); + } + return index; +} + +#endif // O2_GPU_CLUSTERIZER_INC_H From db7b2f057d285cd65a434885fe9cbdbcadcfe18b Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Thu, 13 Mar 2025 17:54:22 +0100 Subject: [PATCH 0210/1914] GPU ITS: Fix broken initialisation + cleanup (#14058) --- .../tracking/include/ITStracking/TrackingConfigParam.h | 6 +++--- .../ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 10 +++++----- Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 1 - GPU/Workflow/src/GPUWorkflowITS.cxx | 1 - 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index 0cf44d08cac19..ec96321765534 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -56,13 +56,13 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper { // Use TGeo for mat. budget static const int MaxIter = 4; - static const int MinTrackLenght = 4; - static const int MaxTrackLenght = 7; + static const int MinTrackLength = 4; + static const int MaxTrackLength = 7; bool useMatCorrTGeo = false; // use full geometry to corect for material budget accounting in the fits. Default is to use the material budget LUT. bool useFastMaterial = false; // use faster material approximation for material budget accounting in the fits. int deltaRof = 0; // configure the width of the window in ROFs to be considered for the tracking. int minTrackLgtIter[MaxIter] = {}; // minimum track length at each iteration, used only if >0, otherwise use code defaults - float minPtIterLgt[MaxIter * (MaxTrackLenght - MinTrackLenght + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults + float minPtIterLgt[MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer float maxChi2ClusterAttachment = -1.f; diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index b264ac46bc7b3..2c94c9bdb1f46 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -81,8 +81,8 @@ void ITSTrackingInterface::initialise() if (trackConf.minTrackLgtIter[ip] > 0) { param.MinTrackLength = trackConf.minTrackLgtIter[ip]; } - for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { - int lslot0 = (trackConf.MaxTrackLenght - ilg), lslot = lslot0 + ip * (trackConf.MaxTrackLenght - trackConf.MinTrackLenght + 1); + for (int ilg = trackConf.MaxTrackLength; ilg >= trackConf.MinTrackLength; ilg--) { + int lslot0 = (trackConf.MaxTrackLength - ilg), lslot = lslot0 + ip * (trackConf.MaxTrackLength - trackConf.MinTrackLength + 1); if (trackConf.minPtIterLgt[lslot] > 0.) { param.MinPt[lslot0] = trackConf.minPtIterLgt[lslot]; } @@ -127,8 +127,8 @@ void ITSTrackingInterface::initialise() for (size_t ip = 0; ip < trackParams.size(); ip++) { auto& param = trackParams[ip]; param.TrackletMinPt *= bFactor; - for (int ilg = trackConf.MaxTrackLenght; ilg >= trackConf.MinTrackLenght; ilg--) { - int lslot = trackConf.MaxTrackLenght - ilg; + for (int ilg = trackConf.MaxTrackLength; ilg >= trackConf.MinTrackLength; ilg--) { + int lslot = trackConf.MaxTrackLength - ilg; param.MinPt[lslot] *= bFactor; } } @@ -384,7 +384,7 @@ void ITSTrackingInterface::updateTimeDependentParams(framework::ProcessingContex geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::T2GRot, o2::math_utils::TransformType::T2G)); initialise(); getConfiguration(pc); - // + if (pc.services().get().inputTimesliceId == 0) { // print settings only for the 1st pipeling o2::its::VertexerParamConfig::Instance().printKeyValues(); o2::its::TrackerParamConfig::Instance().printKeyValues(); diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index dd4c40a2141d9..9e4c98ad6e9a1 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -44,7 +44,6 @@ void TrackerDPL::init(InitContext& ic) mITSTrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), mChainITS->GetITSTrackerTraits(), mChainITS->GetITSTimeframe()); - // mITSTrackingInterface.initialise() will be called from the ITSTrackingInterface::updateTimeDependentParams at 1st initialization since it needs some run conditions } void TrackerDPL::stop() diff --git a/GPU/Workflow/src/GPUWorkflowITS.cxx b/GPU/Workflow/src/GPUWorkflowITS.cxx index 552c5fca5b83e..db9303c431ae7 100644 --- a/GPU/Workflow/src/GPUWorkflowITS.cxx +++ b/GPU/Workflow/src/GPUWorkflowITS.cxx @@ -40,7 +40,6 @@ void GPURecoWorkflowSpec::initFunctionITS(o2::framework::InitContext& ic) mSpecConfig.itsOverrBeamEst); mGPUReco->GetITSTraits(trkTraits, vtxTraits, mITSTimeFrame); mITSTrackingInterface->setTraitsFromProvider(vtxTraits, trkTraits, mITSTimeFrame); - mITSTrackingInterface->initialise(); } void GPURecoWorkflowSpec::finaliseCCDBITS(o2::framework::ConcreteDataMatcher& matcher, void* obj) From b27c2a3ff29645f75f52eab793a5fb3558f1f7a3 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Thu, 13 Mar 2025 09:48:32 +0100 Subject: [PATCH 0211/1914] Making float16 variables compatible with GPU types --- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 126 ++++++++++-------- 1 file changed, 72 insertions(+), 54 deletions(-) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index db65328409d3c..76fd6734cf9db 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -5,10 +5,18 @@ // - https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_float16.h // - https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_cxx_api.h +#ifndef GPUORTFLOAT16_H +#define GPUORTFLOAT16_H + +#ifndef GPUCA_GPUCODE_DEVICE #include #include #include #include +#endif + +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" namespace o2 { @@ -50,19 +58,19 @@ struct Float16Impl { /// /// /// - constexpr static uint16_t ToUint16Impl(float v) noexcept; + GPUd() constexpr static uint16_t ToUint16Impl(float v) noexcept; /// /// Converts float16 to float /// /// float representation of float16 value - float ToFloatImpl() const noexcept; + GPUd() float ToFloatImpl() const noexcept; /// /// Creates an instance that represents absolute value. /// /// Absolute value - uint16_t AbsImpl() const noexcept + GPUd() uint16_t AbsImpl() const noexcept { return static_cast(val & ~kSignMask); } @@ -71,7 +79,7 @@ struct Float16Impl { /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - uint16_t NegateImpl() const noexcept + GPUd() uint16_t NegateImpl() const noexcept { return IsNaN() ? val : static_cast(val ^ kSignMask); } @@ -92,13 +100,13 @@ struct Float16Impl { uint16_t val{0}; - Float16Impl() = default; + GPUdDefault() Float16Impl() = default; /// /// Checks if the value is negative /// /// true if negative - bool IsNegative() const noexcept + GPUd() bool IsNegative() const noexcept { return static_cast(val) < 0; } @@ -107,7 +115,7 @@ struct Float16Impl { /// Tests if the value is NaN /// /// true if NaN - bool IsNaN() const noexcept + GPUd() bool IsNaN() const noexcept { return AbsImpl() > kPositiveInfinityBits; } @@ -116,7 +124,7 @@ struct Float16Impl { /// Tests if the value is finite /// /// true if finite - bool IsFinite() const noexcept + GPUd() bool IsFinite() const noexcept { return AbsImpl() < kPositiveInfinityBits; } @@ -125,7 +133,7 @@ struct Float16Impl { /// Tests if the value represents positive infinity. /// /// true if positive infinity - bool IsPositiveInfinity() const noexcept + GPUd() bool IsPositiveInfinity() const noexcept { return val == kPositiveInfinityBits; } @@ -134,7 +142,7 @@ struct Float16Impl { /// Tests if the value represents negative infinity /// /// true if negative infinity - bool IsNegativeInfinity() const noexcept + GPUd() bool IsNegativeInfinity() const noexcept { return val == kNegativeInfinityBits; } @@ -143,7 +151,7 @@ struct Float16Impl { /// Tests if the value is either positive or negative infinity. /// /// True if absolute value is infinity - bool IsInfinity() const noexcept + GPUd() bool IsInfinity() const noexcept { return AbsImpl() == kPositiveInfinityBits; } @@ -152,7 +160,7 @@ struct Float16Impl { /// Tests if the value is NaN or zero. Useful for comparisons. /// /// True if NaN or zero. - bool IsNaNOrZero() const noexcept + GPUd() bool IsNaNOrZero() const noexcept { auto abs = AbsImpl(); return (abs == 0 || abs > kPositiveInfinityBits); @@ -162,7 +170,7 @@ struct Float16Impl { /// Tests if the value is normal (not zero, subnormal, infinite, or NaN). /// /// True if so - bool IsNormal() const noexcept + GPUd() bool IsNormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -174,7 +182,7 @@ struct Float16Impl { /// Tests if the value is subnormal (denormal). /// /// True if so - bool IsSubnormal() const noexcept + GPUd() bool IsSubnormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -186,13 +194,13 @@ struct Float16Impl { /// Creates an instance that represents absolute value. /// /// Absolute value - Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } + GPUd() Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } /// /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } + GPUd() Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } /// /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check @@ -202,12 +210,12 @@ struct Float16Impl { /// first value /// second value /// True if both arguments represent zero - static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept + GPUd() static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept { return static_cast((lhs.val | rhs.val) & ~kSignMask) == 0; } - bool operator==(const Float16Impl& rhs) const noexcept + GPUd() bool operator==(const Float16Impl& rhs) const noexcept { if (IsNaN() || rhs.IsNaN()) { // IEEE defines that NaN is not equal to anything, including itself. @@ -216,9 +224,9 @@ struct Float16Impl { return val == rhs.val; } - bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); } + GPUd() bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); } - bool operator<(const Float16Impl& rhs) const noexcept + GPUd() bool operator<(const Float16Impl& rhs) const noexcept { if (IsNaN() || rhs.IsNaN()) { // IEEE defines that NaN is unordered with respect to everything, including itself. @@ -267,7 +275,7 @@ union float32_bits { }; // namespace detail template -inline constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept +GPUdi() constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept { detail::float32_bits f{}; f.f = v; @@ -316,7 +324,7 @@ inline constexpr uint16_t Float16Impl::ToUint16Impl(float v) noexcept } template -inline float Float16Impl::ToFloatImpl() const noexcept +GPUdi() float Float16Impl::ToFloatImpl() const noexcept { constexpr detail::float32_bits magic = {113 << 23}; constexpr unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift @@ -356,19 +364,19 @@ struct BFloat16Impl { /// /// /// - static uint16_t ToUint16Impl(float v) noexcept; + GPUd() static uint16_t ToUint16Impl(float v) noexcept; /// /// Converts bfloat16 to float /// /// float representation of bfloat16 value - float ToFloatImpl() const noexcept; + GPUd() float ToFloatImpl() const noexcept; /// /// Creates an instance that represents absolute value. /// /// Absolute value - uint16_t AbsImpl() const noexcept + GPUd() uint16_t AbsImpl() const noexcept { return static_cast(val & ~kSignMask); } @@ -377,7 +385,7 @@ struct BFloat16Impl { /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - uint16_t NegateImpl() const noexcept + GPUd() uint16_t NegateImpl() const noexcept { return IsNaN() ? val : static_cast(val ^ kSignMask); } @@ -400,13 +408,13 @@ struct BFloat16Impl { uint16_t val{0}; - BFloat16Impl() = default; + GPUdDefault() BFloat16Impl() = default; /// /// Checks if the value is negative /// /// true if negative - bool IsNegative() const noexcept + GPUd() bool IsNegative() const noexcept { return static_cast(val) < 0; } @@ -415,7 +423,7 @@ struct BFloat16Impl { /// Tests if the value is NaN /// /// true if NaN - bool IsNaN() const noexcept + GPUd() bool IsNaN() const noexcept { return AbsImpl() > kPositiveInfinityBits; } @@ -424,7 +432,7 @@ struct BFloat16Impl { /// Tests if the value is finite /// /// true if finite - bool IsFinite() const noexcept + GPUd() bool IsFinite() const noexcept { return AbsImpl() < kPositiveInfinityBits; } @@ -433,7 +441,7 @@ struct BFloat16Impl { /// Tests if the value represents positive infinity. /// /// true if positive infinity - bool IsPositiveInfinity() const noexcept + GPUd() bool IsPositiveInfinity() const noexcept { return val == kPositiveInfinityBits; } @@ -442,7 +450,7 @@ struct BFloat16Impl { /// Tests if the value represents negative infinity /// /// true if negative infinity - bool IsNegativeInfinity() const noexcept + GPUd() bool IsNegativeInfinity() const noexcept { return val == kNegativeInfinityBits; } @@ -451,7 +459,7 @@ struct BFloat16Impl { /// Tests if the value is either positive or negative infinity. /// /// True if absolute value is infinity - bool IsInfinity() const noexcept + GPUd() bool IsInfinity() const noexcept { return AbsImpl() == kPositiveInfinityBits; } @@ -460,7 +468,7 @@ struct BFloat16Impl { /// Tests if the value is NaN or zero. Useful for comparisons. /// /// True if NaN or zero. - bool IsNaNOrZero() const noexcept + GPUd() bool IsNaNOrZero() const noexcept { auto abs = AbsImpl(); return (abs == 0 || abs > kPositiveInfinityBits); @@ -470,7 +478,7 @@ struct BFloat16Impl { /// Tests if the value is normal (not zero, subnormal, infinite, or NaN). /// /// True if so - bool IsNormal() const noexcept + GPUd() bool IsNormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -482,7 +490,7 @@ struct BFloat16Impl { /// Tests if the value is subnormal (denormal). /// /// True if so - bool IsSubnormal() const noexcept + GPUd() bool IsSubnormal() const noexcept { auto abs = AbsImpl(); return (abs < kPositiveInfinityBits) // is finite @@ -494,13 +502,13 @@ struct BFloat16Impl { /// Creates an instance that represents absolute value. /// /// Absolute value - Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } + GPUd() Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); } /// /// Creates a new instance with the sign flipped. /// /// Flipped sign instance - Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } + GPUd() Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); } /// /// IEEE defines that positive and negative zero are equal, this gives us a quick equality check @@ -510,7 +518,7 @@ struct BFloat16Impl { /// first value /// second value /// True if both arguments represent zero - static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept + GPUd() static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept { // IEEE defines that positive and negative zero are equal, this gives us a quick equality check // for two values by or'ing the private bits together and stripping the sign. They are both zero, @@ -520,14 +528,17 @@ struct BFloat16Impl { }; template -inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept +GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept { uint16_t result; - if (std::isnan(v)) { + if (o2::gpu::CAMath::IsNaN(v)) { result = kPositiveQNaNBits; } else { auto get_msb_half = [](float fl) { uint16_t result; +#ifdef GPUCA_GPUCODE + o2::gpu::CAMath::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); +#else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) #else @@ -538,6 +549,7 @@ inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept } else { std::memcpy(&result, &fl, sizeof(uint16_t)); } +#endif return result; }; @@ -554,14 +566,18 @@ inline uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept } template -inline float BFloat16Impl::ToFloatImpl() const noexcept +GPUdi() float BFloat16Impl::ToFloatImpl() const noexcept { if (IsNaN()) { - return std::numeric_limits::quiet_NaN(); + return o2::gpu::CAMath::QuietNaN(); } float result; char* const first = reinterpret_cast(&result); char* const second = first + sizeof(uint16_t); +#ifdef GPUCA_GPUCODE + first[0] = first[1] = 0; + o2::gpu::CAMath::memcpy(second, &val, sizeof(uint16_t)); +#else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) #else @@ -574,6 +590,7 @@ inline float BFloat16Impl::ToFloatImpl() const noexcept std::memcpy(first, &val, sizeof(uint16_t)); std::memset(second, 0, sizeof(uint16_t)); } +#endif return result; } @@ -610,26 +627,26 @@ struct Float16_t : OrtDataType::Float16Impl { /// /// Default constructor /// - Float16_t() = default; + GPUdDefault() Float16_t() = default; /// /// Explicit conversion to uint16_t representation of float16. /// /// uint16_t bit representation of float16 /// new instance of Float16_t - constexpr static Float16_t FromBits(uint16_t v) noexcept { return Float16_t(v); } + GPUd() constexpr static Float16_t FromBits(uint16_t v) noexcept { return Float16_t(v); } /// /// __ctor from float. Float is converted into float16 16-bit representation. /// /// float value - explicit Float16_t(float v) noexcept { val = Base::ToUint16Impl(v); } + GPUd() explicit Float16_t(float v) noexcept { val = Base::ToUint16Impl(v); } /// /// Converts float16 to float /// /// float representation of float16 value - float ToFloat() const noexcept { return Base::ToFloatImpl(); } + GPUd() float ToFloat() const noexcept { return Base::ToFloatImpl(); } /// /// Checks if the value is negative @@ -710,7 +727,7 @@ struct Float16_t : OrtDataType::Float16Impl { /// /// User defined conversion operator. Converts Float16_t to float. /// - explicit operator float() const noexcept { return ToFloat(); } + GPUdi() explicit operator float() const noexcept { return ToFloat(); } using Base::operator==; using Base::operator!=; @@ -751,26 +768,26 @@ struct BFloat16_t : OrtDataType::BFloat16Impl { public: using Base = OrtDataType::BFloat16Impl; - BFloat16_t() = default; + GPUdDefault() BFloat16_t() = default; /// /// Explicit conversion to uint16_t representation of bfloat16. /// /// uint16_t bit representation of bfloat16 /// new instance of BFloat16_t - static constexpr BFloat16_t FromBits(uint16_t v) noexcept { return BFloat16_t(v); } + GPUd() static constexpr BFloat16_t FromBits(uint16_t v) noexcept { return BFloat16_t(v); } /// /// __ctor from float. Float is converted into bfloat16 16-bit representation. /// /// float value - explicit BFloat16_t(float v) noexcept { val = Base::ToUint16Impl(v); } + GPUd() explicit BFloat16_t(float v) noexcept { val = Base::ToUint16Impl(v); } /// /// Converts bfloat16 to float /// /// float representation of bfloat16 value - float ToFloat() const noexcept { return Base::ToFloatImpl(); } + GPUd() float ToFloat() const noexcept { return Base::ToFloatImpl(); } /// /// Checks if the value is negative @@ -851,7 +868,7 @@ struct BFloat16_t : OrtDataType::BFloat16Impl { /// /// User defined conversion operator. Converts BFloat16_t to float. /// - explicit operator float() const noexcept { return ToFloat(); } + GPUdi() explicit operator float() const noexcept { return ToFloat(); } // We do not have an inherited impl for the below operators // as the internal class implements them a little differently @@ -864,4 +881,5 @@ static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match"); } // namespace OrtDataType -} // namespace o2 \ No newline at end of file +} // namespace o2 +#endif \ No newline at end of file From 650a6ff5d636a634a13bb7b8e20f0ec30d0f1f13 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 11:41:02 +0100 Subject: [PATCH 0212/1914] GPU TPC: Simplify host code for driving the extrapolation tracking --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTracking.h | 4 +- .../Global/GPUChainTrackingSectorTracker.cxx | 116 +++++------------- 4 files changed, 33 insertions(+), 91 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index a0248180a5e2c..93310284d7564 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -200,7 +200,7 @@ class GPUReconstruction void SetOutputControl(void* ptr, size_t size); void SetInputControl(void* ptr, size_t size); GPUOutputControl& OutputControl() { return mOutputControl; } - int32_t NStreams() const { return mNStreams; } + uint32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } RecoStepField GetRecoSteps() const { return mRecoSteps.steps; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index c186f916891ba..6dcb6f1d7e514 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -260,7 +260,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("configured max time bin exceeds 256 orbits"); return false; } - if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > mRec->NStreams()) { + if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) { GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 194573981838e..492ee65d1c9c1 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -220,7 +220,7 @@ class GPUChainTracking : public GPUChain GPUChainTracking(GPUReconstruction* rec, uint32_t maxTPCHits = GPUCA_MAX_CLUSTERS, uint32_t maxTRDTracklets = GPUCA_MAX_TRD_TRACKLETS); - int32_t ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput = true); + int32_t ExtrapolationTracking(uint32_t iSector, bool blocking); int32_t PrepareProfile(); int32_t DoProfile(); @@ -278,7 +278,6 @@ class GPUChainTracking : public GPUChain // Synchronization and Locks eventStruct* mEvents = nullptr; - volatile int32_t mSectorSelectorReady = 0; std::array mExtrapolationTrackingDone; std::vector mOutputQueue; @@ -299,6 +298,7 @@ class GPUChainTracking : public GPUChain void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); + uint32_t StreamForSector(uint32_t sector) const; std::mutex mMutexUpdateCalib; std::unique_ptr mPipelineFinalizationCtx; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 522ccbad47e59..dd71a797f2744 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -24,12 +24,18 @@ using namespace o2::gpu; -int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput) +uint32_t GPUChainTracking::StreamForSector(uint32_t sector) const { - runKernel({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}}); - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams()); - if (synchronizeOutput) { - SynchronizeStream(iSector % mRec->NStreams()); + return sector % mRec->NStreams(); +} + +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, bool blocking) +{ + const uint32_t stream = StreamForSector(iSector); + runKernel({GetGridBlk(256, stream), {iSector}}); + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), stream); + if (blocking) { + SynchronizeStream(stream); } return (0); } @@ -153,7 +159,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; - int32_t useStream = (iSector % mRec->NStreams()); + int32_t useStream = StreamForSector(iSector); if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Creating Sector Data (Sector %d)", iSector); @@ -234,102 +240,38 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (doGPU || GetProcessingSettings().debugLevel >= 1) { - if (doGPU) { - ReleaseEvent(mEvents->init); - } - - mSectorSelectorReady = 0; - - std::array transferRunning; - transferRunning.fill(true); - if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed. - if (param().rec.tpc.extrapolationTracking) { - mExtrapolationTrackingDone.fill(0); - } - - uint32_t tmpSector = 0; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Transfering Tracks from GPU to Host"); - } - - if (tmpSector == iSector) { - SynchronizeEvents(&mEvents->sector[iSector]); - } - while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) { - ReleaseEvent(mEvents->sector[tmpSector]); - if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) { - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]); - } else { - transferRunning[tmpSector] = false; - } - tmpSector++; - } - - if (GetProcessingSettings().keepAllMemory) { - TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); - } - - if (transferRunning[iSector]) { - SynchronizeEvents(&mEvents->sector[iSector]); - } - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits()); - } - - if (GetProcessingSettings().debugLevel >= 3) { - GPUInfo("Data ready for sector %d", iSector); - } - mSectorSelectorReady = iSector; - - if (param().rec.tpc.extrapolationTracking) { - for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) { - uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a); - uint32_t sectorLeft, sectorRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); - - if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) { - ExtrapolationTracking(tmpSector2, 0); - mExtrapolationTrackingDone[tmpSector2] = 1; - } - } - } - } - } if (param().rec.tpc.extrapolationTracking) { std::vector blocking(NSECTORS * mRec->NStreams()); - for (int32_t i = 0; i < NSECTORS; i++) { - for (int32_t j = 0; j < mRec->NStreams(); j++) { - blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + for (uint32_t iStream = 0; iStream < mRec->NStreams(); iStream++) { + blocking[iSector * mRec->NStreams() + iStream] = StreamForSector(iSector) == iStream; } } for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); - if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { - uint32_t sectorLeft, sectorRight; - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]); - blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true; - } - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) { - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]); - blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true; - } + uint32_t sectorLeft, sectorRight; + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)]) { + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorLeft]); + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)] = true; + } + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)]) { + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorRight]); + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)] = true; } - ExtrapolationTracking(tmpSector, 0, false); + ExtrapolationTracking(tmpSector, false); } } - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - if (doGPU && transferRunning[iSector]) { + if (doGPU) { + ReleaseEvent(mEvents->init); + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { ReleaseEvent(mEvents->sector[iSector]); } } } else { - mSectorSelectorReady = NSECTORS; mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { if (param().rec.tpc.extrapolationTracking) { - ExtrapolationTracking(iSector, 0); + ExtrapolationTracking(iSector, true); } }); mRec->SetNActiveThreadsOuterLoop(1); From 705ebfb083c41183183c554c0cb17a6a9423e4c5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 09:05:24 +0100 Subject: [PATCH 0213/1914] GPU TPC CF: Split toNative back to finalize and toNative, to be used by NNClusterer --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 9 +++++++-- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h | 3 ++- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index 77dc6e119df7d..b6792ce3a9ef5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -58,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::ClusterNative& cn, const GPUParam& param, TPCTime timeOffset, const Array2D& chargeMap) +GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, TPCTime timeOffset) { mQtot += q; @@ -73,8 +73,13 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl Pad pad = pos.pad(); mPadMean += pad; mTimeMean += timeOffset + pos.time(); +} + +GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D& chargeMap) +{ + Pad pad = pos.pad(); - bool isEdgeCluster = pos.pad() < 2 || pos.pad() >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge if (isEdgeCluster) { bool leftEdge = (pad < 2); if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index b8c2bd81d8b37..f0c6ac47f3c8a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -41,7 +41,8 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); - GPUd() bool toNative(const ChargePos&, tpccf::Charge, tpc::ClusterNative&, const GPUParam&, tpccf::TPCTime, const Array2D&); + GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); + GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc index c6d7a3b68c0a8..8a6b73be8bd8d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -60,7 +60,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t return; } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), fragment.start, chargeMap); + pc.finalize(pos, charge, fragment.start); + bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterPosInRow) { From b183c5586b9b9cf2aa9a5b1c91a06589e5577690 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 14:52:24 +0100 Subject: [PATCH 0214/1914] GPU CMake: Auto-detect ROCm even if CMake prefix path not set --- dependencies/FindO2GPU.cmake | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 57c820fbe86b1..f8d41c032078f 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -198,7 +198,10 @@ endif() # Detect and enable HIP if(ENABLE_HIP) - if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm") + if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") + list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") + endif() + if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm") set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_HIP_STANDARD_REQUIRED TRUE) if(HIP_AMDGPUTARGET) @@ -206,7 +209,7 @@ if(ENABLE_HIP) set(GPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) endif() - set(TMP_ROCM_DIR_LIST $ENV{CMAKE_PREFIX_PATH}) + set(TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH}:$ENV{CMAKE_PREFIX_PATH}") string(REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST}") list(FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm) list(POP_FRONT TMP_ROCM_DIR_LIST TMP_ROCM_DIR) From 2626074691611ab3b40693eea263fc27808fa556 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 15:00:35 +0100 Subject: [PATCH 0215/1914] GPU Standalone: fix prepare script using O2 env modules --- GPU/GPUTracking/Standalone/cmake/prepare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Standalone/cmake/prepare.sh b/GPU/GPUTracking/Standalone/cmake/prepare.sh index 826cdb5efb56c..17474b5fc6956 100755 --- a/GPU/GPUTracking/Standalone/cmake/prepare.sh +++ b/GPU/GPUTracking/Standalone/cmake/prepare.sh @@ -10,7 +10,7 @@ else WORK_DIR="$ALIBUILD_WORK_DIR" fi eval "`alienv shell-helper`" -alienv load O2/latest -for i in Vc boost fmt CMake ms_gsl Clang ninja; do +# alienv load O2/latest +for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT; do source sw/$ALIARCH/$i/latest/etc/profile.d/init.sh done From b5ab60d021e934b92f335b6267f0891f098e4a65 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Fri, 14 Mar 2025 14:33:16 +0100 Subject: [PATCH 0216/1914] GPU clusterizer with neural networks (#13981) * Copying kernels to implement NN clusterizer * First version of clusterizer in GPU code * Adding a compiling and running version with single-threaded ONNX model executions. Clusters are not getting published yet (FIXME) * Clusters now working by a hack * Working implementation of settings via GPUSettings.h and --configKeyValues "GPU_proc.[setting]=...;..." * Modifying the onnx_interface to include the right headers * Adjusting initialization for new ONNXRuntime version * Adjusting global settings and CF code for several settings * Adding return statement if cluster is rejected * Adding some statements back * Update to latest status of gpu clusterization * Fixing uchar -> uint8_t * Adding utils header * Updating kernels.cmake to uint8_t * Please consider the following formatting changes * Adding an ONNX CPU library in the O2 framework * Please consider the following formatting changes * Fixing macOS build issues with calling O*.data() * Fixing compiler issues and char -> uint8_t * Fixing curly braces * Fixing std::make_shared * Changing order for * Bug-fixing file name * Making NN clusterizer more efficient * Changing constexpr * Fixing build issues * Major changes to make clusterizer parallelizable. Problem remains: different sizes of nnClusterizerBatchedMode lead to different number of clusters if nnClusterizerBatchedMode < clusterer.mPmemory->counters.nClusters * Adjusting for default CF regression * Bug-fix for application of CF regression and logging message * Adding is_boundary check earlier to avoid out-of-bounds access * Bug-fixes for boundary reading * Updating to use explicit calls to kernels instead of if-statements * Bug-fix for class label application * Explicit casting solves regression issues. To be done: Correct publishing for class2 regression * Bug-fixes * Adding some documentation * Please consider the following formatting changes * Modifying for Davids comments * Modifications from comments on PR * Please consider the following formatting changes * iSlice -> iSector * mISlice -> mISector * Minor bug-fixes * Adjusting for comments * Bug-fix for fullCI build * Adding GPUd() for on-device functions * Fixing compile issues, only thing mssing: conversion of float to float16 * Let's see if this does the trick * Making functions (constructors) GPUd() (GPUdDefault()) * GPU kernels should now be findable * Adding ifdefs for standalone build and header exclusions in GPUORTFloat16 * Modifying the approach to not use std:: types. Still needs to be tested and need to do proper memory allocation * New version of clusterizer. Compiles locally, but segfaults in fillInput kernel. Testing with the CI now. * Please consider the following formatting changes * Adjust for comments * Please consider the following formatting changes * Merging dev and adjusting build issues * Adjusting for comments * Fixing incorrect #endif * Please consider the following formatting changes * Fix indentation, remove duplicate define * Fixing one memory issue. Segfault / memory leak persists * Adjusting for new toNative function * Fixing .finalize * Adjusting CMakeLIsts and other bugs * Adding GPUCA_HAS_ONNX only to tracking * Changing to fixed size for number of clusters * Fixed segfault. Not producing the right number of clusters yet. * Network now accepts clusters over all sectors * Whitespaces... * Some weird formatting * Please consider the following formatting changes * Removing white-spaces * Adding necessary if-statement to avoid automatic model loading * Removing GPUConstantMem, adding interOpNumThreads option * Found the bug where I loose clusters * Editor configured for whitespaces at EOF --------- Co-authored-by: ALICE Action Bot Co-authored-by: David Rohr --- Common/ML/include/ML/OrtInterface.h | 9 +- Common/ML/src/OrtInterface.cxx | 164 +++----- GPU/GPUTracking/Base/GPUConstantMem.h | 10 +- GPU/GPUTracking/Base/GPUMemoryResource.h | 1 + GPU/GPUTracking/Base/GPUReconstruction.cxx | 3 + GPU/GPUTracking/CMakeLists.txt | 12 +- .../Definitions/GPUDefGPUParameters.h | 13 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 30 ++ .../GPUTrackingLinkDef_O2_DataTypes.h | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 6 + .../Global/GPUChainTrackingClusterizer.cxx | 112 ++++- GPU/GPUTracking/TPCClusterFinder/ChargePos.h | 1 + .../TPCClusterFinder/ClusterAccumulator.h | 11 + .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 55 +++ .../TPCClusterFinder/GPUTPCNNClusterizer.h | 77 ++++ .../GPUTPCNNClusterizerHost.cxx | 65 +++ .../GPUTPCNNClusterizerHost.h | 68 +++ .../GPUTPCNNClusterizerKernels.cxx | 386 ++++++++++++++++++ .../GPUTPCNNClusterizerKernels.h | 77 ++++ GPU/GPUTracking/kernels.cmake | 13 +- 20 files changed, 1002 insertions(+), 112 deletions(-) create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx create mode 100644 GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 89631d59a3846..93549178848ca 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -41,6 +41,7 @@ class OrtModel OrtModel(std::unordered_map optionsMap) { reset(optionsMap); } void init(std::unordered_map optionsMap) { reset(optionsMap); } void reset(std::unordered_map); + bool isInitialized() { return mInitialized; } virtual ~OrtModel() = default; @@ -55,6 +56,9 @@ class OrtModel template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h std::vector inference(std::vector>&); + template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h + void inference(I*, size_t, O*); + // template // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type // std::vector inference(std::vector&); @@ -79,8 +83,9 @@ class OrtModel std::vector> mInputShapes, mOutputShapes; // Environment settings - std::string modelPath, device = "cpu", dtype = "float"; // device options should be cpu, rocm, migraphx, cuda - int intraOpNumThreads = 0, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + bool mInitialized = false; + std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda + int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; std::string printShape(const std::vector&); }; diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index eb124ff6f12c9..fc784dd14d2dc 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -44,17 +44,20 @@ void OrtModel::reset(std::unordered_map optionsMap) if (!optionsMap.contains("model-path")) { LOG(fatal) << "(ORT) Model path cannot be empty!"; } - modelPath = optionsMap["model-path"]; - device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); - dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); - allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); - intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); - loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 2); - enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); - enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - - std::string dev_mem_str = "Hip"; + + if (!optionsMap["model-path"].empty()) { + modelPath = optionsMap["model-path"]; + device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); + dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); + deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); + allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); + intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); + interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); + loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); + enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); + enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); + + std::string dev_mem_str = "Hip"; #if defined(ORT_ROCM_BUILD) #if ORT_ROCM_BUILD == 1 if (device == "ROCM") { @@ -88,12 +91,15 @@ void OrtModel::reset(std::unordered_map optionsMap) if (device == "CPU") { (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - if (intraOpNumThreads > 1) { + (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); + if (intraOpNumThreads > 1 || interOpNumThreads > 1) { (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); } else if (intraOpNumThreads == 1) { (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); } - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " threads"; + if (loggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + } } (pImplOrt->sessionOptions).DisableMemPattern(); @@ -109,6 +115,9 @@ void OrtModel::reset(std::unordered_map optionsMap) } else { (pImplOrt->sessionOptions).DisableProfiling(); } + + mInitialized = true; + (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); @@ -154,16 +163,9 @@ void OrtModel::reset(std::unordered_map optionsMap) outputNamesChar.resize(mOutputNames.size(), nullptr); std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), [&](const std::string& str) { return str.c_str(); }); - - // Print names - LOG(info) << "\tInput Nodes:"; - for (size_t i = 0; i < mInputNames.size(); i++) { - LOG(info) << "\t\t" << mInputNames[i] << " : " << printShape(mInputShapes[i]); } - - LOG(info) << "\tOutput Nodes:"; - for (size_t i = 0; i < mOutputNames.size(); i++) { - LOG(info) << "\t\t" << mOutputNames[i] << " : " << printShape(mOutputShapes[i]); + if (loggingLevel < 2) { + LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")"; } } @@ -187,36 +189,6 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) } } -template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} - -template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h -std::vector OrtModel::inference(std::vector>& input) -{ - std::vector inputTensor; - for (auto i : input) { - std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); - } - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} - std::string OrtModel::printShape(const std::vector& v) { std::stringstream ss(""); @@ -227,74 +199,68 @@ std::string OrtModel::printShape(const std::vector& v) return ss.str(); } -template <> -std::vector OrtModel::inference(std::vector& input) +template +std::vector OrtModel::inference(std::vector& input) { std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + if constexpr (std::is_same_v) { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); + } else { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + } // input.clear(); auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - float* outputValues = outputTensors[0].template GetTensorMutableData(); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; + O* outputValues = outputTensors[0].template GetTensorMutableData(); + std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; outputTensors.clear(); return outputValuesVec; } -template <> -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - float* outputValues = outputTensors[0].template GetTensorMutableData(); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} +template std::vector OrtModel::inference(std::vector&); -template <> -std::vector OrtModel::inference(std::vector& input) -{ - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; -} +template std::vector OrtModel::inference(std::vector&); -template <> -std::vector OrtModel::inference(std::vector& input) +template std::vector OrtModel::inference(std::vector&); + +template +void OrtModel::inference(I* input, size_t input_size, O* output) { - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - std::vector inputTensor; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; + std::vector inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + Ort::Value inputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size, inputShape.data(), inputShape.size()); + } else { + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size()); + } + + std::vector outputShape{inputShape[0], mOutputShapes[0][1]}; + size_t outputSize = (int64_t)(input_size * mOutputShapes[0][1] / mInputShapes[0][1]); + Ort::Value outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size()); + + (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is correct here } -template <> -std::vector OrtModel::inference(std::vector>& input) +template void OrtModel::inference(OrtDataType::Float16_t*, size_t, float*); + +template void OrtModel::inference(float*, size_t, float*); + +template +std::vector OrtModel::inference(std::vector>& input) { std::vector inputTensor; for (auto i : input) { std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + if constexpr (std::is_same_v) { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + } else { + inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, i.data(), i.size(), inputShape.data(), inputShape.size())); + } } // input.clear(); auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - OrtDataType::Float16_t* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; + O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); + std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; outputTensors.clear(); return outputValuesVec; } diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 4f83fa48a64e0..8f1cc90f5ae93 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -34,12 +34,15 @@ #include "GPUKernelDebugOutput.h" #endif +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizer.h" +#endif + namespace o2::gpu { struct GPUConstantMem { GPUParam param; - GPUTPCTracker - tpcTrackers[GPUCA_NSECTORS]; + GPUTPCTracker tpcTrackers[GPUCA_NSECTORS]; GPUTPCConvert tpcConverter; GPUTPCCompression tpcCompressor; GPUTPCDecompression tpcDecompressor; @@ -55,6 +58,9 @@ struct GPUConstantMem { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT GPUKernelDebugOutput debugOutput; #endif +#ifdef GPUCA_HAS_ONNX + GPUTPCNNClusterizer tpcNNClusterer[GPUCA_NSECTORS]; +#endif template GPUd() auto& getTRDTracker(); diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 3bb2c363db2a9..06e350db0bfc7 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -28,6 +28,7 @@ struct GPUMemoryReuse { }; enum Group : uint16_t { ClustererScratch, + NNClusterer, ClustererZS, TrackerScratch, TrackerDataLinks, diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 656fa37fb6a4c..df9a7380834ce 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -93,6 +93,9 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place processors()->tpcClusterer[i].mISector = i; +#ifdef GPUCA_HAS_ONNX + processors()->tpcNNClusterer[i].mISector = i; +#endif } #ifndef GPUCA_NO_ROOT mROOTDump = GPUROOTDumpCore::getAndCreate(); diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index c97742ac1d47f..d5a90dbd65ea3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -159,8 +159,8 @@ set(HDRS_INSTALL ) set(SRCS_NO_CINT ${SRCS_NO_CINT} display/GPUDisplayInterface.cxx) -set(SRCS_NO_CINT - ${SRCS_NO_CINT} + +set(SRCS_NO_CINT ${SRCS_NO_CINT} Global/GPUChainITS.cxx ITS/GPUITSFitter.cxx ITS/GPUITSFitterKernels.cxx @@ -191,6 +191,10 @@ set(SRCS_NO_CINT Refit/GPUTrackingRefitKernel.cxx Merger/GPUTPCGMO2Output.cxx) +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") + list(APPEND SRCS_NO_CINT TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx TPCClusterFinder/GPUTPCNNClusterizer.cxx TPCClusterFinder/GPUTPCNNClusterizerHost.cxx) +endif() + set(SRCS_DATATYPES ${SRCS_DATATYPES} DataTypes/TPCPadGainCalib.cxx @@ -273,6 +277,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PRIVATE_LINK_LIBRARIES O2::DataFormatsTPC SOURCES ${SRCS_DATATYPES}) target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) + o2_target_root_dictionary(GPUDataTypes HEADERS ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} LINKDEF GPUTrackingLinkDef_O2_DataTypes.h) @@ -292,6 +297,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::TPCFastTransformation O2::DetectorsRaw O2::Steer + O2::ML PUBLIC_INCLUDE_DIRECTORIES . Definitions DataTypes @@ -317,7 +323,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${targetName} PRIVATE $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1) o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 3ed6c25762405..55f2e76344bd5 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -477,6 +477,9 @@ #ifndef GPUCA_LB_GPUTPCCFClusterizer #define GPUCA_LB_GPUTPCCFClusterizer 512 #endif + #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels 512 + #endif #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 256 #endif @@ -495,6 +498,16 @@ #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression + +#ifdef GPUCA_HAS_ONNX +#define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels +#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels +#endif + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9b6be7743e485..63fcf51004eae 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -222,6 +222,35 @@ AddOption(tpcTriggerHandling, bool, true, "", 0, "Enable TPC trigger handling") AddHelp("help", 'h') EndConfig() +BeginSubConfig(GPUSettingsProcessingNNclusterizer, nn, configStandalone.proc, "NN", 0, "Processing settings for neural network clusterizer", proc_nn) +AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural network clusterizer should be used.") +AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)") +AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id") +AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference") +AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") +AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") +AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347") +AddOption(nnInferenceOrtProfiling, int, 0, "", 0, "Enables profiling of model execution in ONNX Runtime") +AddOption(nnInferenceOrtProfilingPath, std::string, ".", "", 0, "If nnInferenceOrtProfiling is set, the path to store the profiling data") +AddOption(nnInferenceVerbosity, int, 1, "", 0, "0: No messages; 1: Warnings; 2: Warnings + major debugs; >3: All debugs") +AddOption(nnClusterizerAddIndexData, int, 1, "", 0, "If normalized index data (sector, row, pad), should be appended to the input") +AddOption(nnClusterizerSizeInputRow, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerSizeInputPad, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerSizeInputTime, int, 3, "", 0, "Size of the input to the NN (currently calcualted as (length-1)/2") +AddOption(nnClusterizerUseCfRegression, int, 0, "", 0, "(bool, default = false) If true, use the regression from the native clusterizer and not the NN") +AddOption(nnClusterizerApplyCfDeconvolution, int, 0, "", 0, "Applies the CFDeconvolution kernel before the digits to the network are filled") +AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1) If >1, the NN is evaluated on batched input of size specified in this variable") +AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed") +AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN") +AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled") +AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path") +AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.") +AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path") +AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") +AddHelp("help", 'h') +EndConfig() + BeginSubConfig(GPUSettingsProcessing, proc, configStandalone, "PROC", 0, "Processing settings", proc) AddOption(deviceNum, int32_t, -1, "gpuDevice", 0, "Set GPU device to use (-1: automatic, -2: for round-robin usage in timeslice-pipeline)") AddOption(gpuDeviceOnly, bool, false, "", 0, "Use only GPU as device (i.e. no CPU for OpenCL)") @@ -299,6 +328,7 @@ AddOption(printSettings, bool, false, "", 0, "Print all settings when initializi AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) +AddSubConfig(GPUSettingsProcessingNNclusterizer, nn) AddHelp("help", 'h') EndConfig() #endif // __OPENCL__ diff --git a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h index ab60827655a43..35ebbabe41672 100644 --- a/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h +++ b/GPU/GPUTracking/GPUTrackingLinkDef_O2_DataTypes.h @@ -30,6 +30,7 @@ #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessing + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingParam + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingRTC + ; +#pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsProcessingNNclusterizer + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplay + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplayLight + ; #pragma link C++ class o2::gpu::GPUConfigurableParamGPUSettingsDisplayHeavy + ; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 6dcb6f1d7e514..37ad164d20a60 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -103,6 +103,9 @@ void GPUChainTracking::RegisterPermanentMemoryAndProcessors() if (GetRecoSteps() & RecoStep::TPCClusterFinding) { for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUProcessor(&processors()->tpcClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); +#ifdef GPUCA_HAS_ONNX + mRec->RegisterGPUProcessor(&processors()->tpcNNClusterer[i], GetRecoStepsGPU() & RecoStep::TPCClusterFinding); +#endif } } if (GetRecoSteps() & RecoStep::Refit) { @@ -148,6 +151,9 @@ void GPUChainTracking::RegisterGPUProcessors() if (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) { for (uint32_t i = 0; i < NSECTORS; i++) { mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcClusterer[i], &processors()->tpcClusterer[i]); +#ifdef GPUCA_HAS_ONNX + mRec->RegisterGPUDeviceProcessor(&processorsShadow()->tpcNNClusterer[i], &processors()->tpcNNClusterer[i]); +#endif } } if (GetRecoStepsGPU() & RecoStep::Refit) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 62a4a524d67df..63d56da37595b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -39,6 +39,11 @@ #include #endif +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCNNClusterizerHost.h" +#endif + using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -149,7 +154,8 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) + : 0; for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC @@ -606,6 +612,41 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) } +#ifdef GPUCA_HAS_ONNX + if (GetProcessingSettings().nn.applyNNclusterizer) { + uint32_t maxClusters = -1; + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters); + } + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression; + clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow; + clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad; + clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime; + clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData; + clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode; + clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue; + clustererNN.nnClusterizerTotalClusters = maxClusters; + clustererNN.nnClassThreshold = nn_settings.nnClassThreshold; + clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.nnSigmoidTrafoClassThreshold) { + clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold)); + } + if (nn_settings.nnClusterizerVerbosity < 0) { + clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity; + } else { + clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity; + } + clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos; + GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + AllocateRegisteredMemory(clustererNN.mMemoryId); + } + } +#endif + if (doGPU && mIOPtrs.tpcZS) { processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); @@ -854,6 +895,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) uint32_t iSector = iSectorBase + lane; GPUTPCClusterFinder& clusterer = processors()->tpcClusterer[iSector]; GPUTPCClusterFinder& clustererShadow = doGPU ? processorsShadow()->tpcClusterer[iSector] : clusterer; + if (doGPU) { SynchronizeStream(lane); } @@ -871,17 +913,77 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) return; } - runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + if (GetProcessingSettings().nn.applyNNclusterizer) { +#ifdef GPUCA_HAS_ONNX + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + + if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + } + + float time_clusterizer = 0, time_fill = 0; + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNN.nnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + + auto start0 = std::chrono::high_resolution_clock::now(); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data + + auto stop0 = std::chrono::high_resolution_clock::now(); + auto start1 = std::chrono::high_resolution_clock::now(); + nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype); + if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + } else { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + } + + if (!clustererNN.nnClusterizerUseCfRegression) { + nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1 + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) { + nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype); + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2 + } + } + auto stop1 = std::chrono::high_resolution_clock::now(); + + time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; + } + auto start1 = std::chrono::high_resolution_clock::now(); + if (clustererNN.nnClusterizerUseCfRegression) { + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + } + auto stop1 = std::chrono::high_resolution_clock::now(); + time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + if (clustererNN.nnClusterizerVerbosity < 3) { + int acceptedClusters = 0; + for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { + acceptedClusters += clustererNN.outputDataClass[i]; + } + LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + } +#else + GPUFatal("Project not compiled with neural network clusterization. Aborting."); +#endif + } else { + runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); + DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); + } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); if (doGPU && propagateMCLabels) { TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mScratchId, lane); if (doGPU) { SynchronizeStream(lane); } - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 1); // Computes MC labels } + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Sector %02d Fragment %02d Lane %d: Found clusters: digits %u peaks %u clusters %u", iSector, fragment.index, lane, (int32_t)clusterer.mPmemory->counters.nPositions, (int32_t)clusterer.mPmemory->counters.nPeaks, (int32_t)clusterer.mPmemory->counters.nClusters); } diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h index b4a4752b0f932..cdd489e0ef938 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/ChargePos.h @@ -45,6 +45,7 @@ struct ChargePos { GPUdi() tpccf::Row row() const { return gpad / TPC_PADS_PER_ROW_PADDED; } GPUdi() tpccf::Pad pad() const { return gpad % TPC_PADS_PER_ROW_PADDED - GPUCF_PADDING_PAD; } GPUdi() tpccf::TPCFragmentTime time() const { return timePadded - GPUCF_PADDING_TIME; } + GPUdi() tpccf::TPCFragmentTime globalTime() const { return timePadded; } private: // Maps the position of a pad given as row and index in that row to a unique diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index f0c6ac47f3c8a..90d977372b201 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -41,6 +41,17 @@ class ClusterAccumulator GPUd() tpccf::Charge updateInner(PackedCharge, tpccf::Delta2); GPUd() tpccf::Charge updateOuter(PackedCharge, tpccf::Delta2); + GPUd() void setFull(float qtot, float padMean, float padSigma, float timeMean, float timeSigma, uint8_t splitInPad, uint8_t splitInTime) + { + mQtot = qtot; + mPadMean = padMean; + mPadSigma = padSigma; + mTimeMean = timeMean; + mTimeSigma = timeSigma; + mSplitInPad = splitInPad; + mSplitInTime = splitInTime; + } + GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx new file mode 100644 index 0000000000000..6a9b6f546ae07 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -0,0 +1,55 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizer.cxx +/// \author Christian Sonnabend + +#include "GPUReconstruction.h" +#include "ML/3rdparty/GPUORTFloat16.h" +#include "GPUTPCNNClusterizer.h" + +using namespace o2::gpu; + +void GPUTPCNNClusterizer::InitializeProcessor() {} + +void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} + +void* GPUTPCNNClusterizer::setIOPointers(void* mem) +{ + if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize); + } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize); + } + computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode); + computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode); + computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } + return mem; +} + +void GPUTPCNNClusterizer::RegisterMemoryAllocation() +{ + AllocateAndInitializeLate(); + int32_t memType = GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK; + mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCNNClusterizer::setIOPointers, memType, "TPCNNClusterer", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::NNClusterer, (uint16_t)(mISector % mRec->GetProcessingSettings().nTPCClustererLanes)}); +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h new file mode 100644 index 0000000000000..ea6340dfd48bc --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -0,0 +1,77 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizer.h +/// \author Christian Sonnabend + +#ifndef O2_GPUTPCNNCLUSTERIZER_H +#define O2_GPUTPCNNCLUSTERIZER_H + +#include "ChargePos.h" +#include "GPUProcessor.h" + +namespace o2::OrtDataType +{ +struct Float16_t; +} + +namespace o2::gpu +{ + +class GPUTPCNNClusterizer : public GPUProcessor +{ + public: + GPUTPCNNClusterizer() = default; + void* setIOPointers(void*); + void RegisterMemoryAllocation(); + void InitializeProcessor(); + void SetMaxData(const GPUTrackingInOutPointers&); + + // Neural network clusterization + + int nnClusterizerSizeInputRow = 3; + int nnClusterizerSizeInputPad = 3; + int nnClusterizerSizeInputTime = 3; + int nnClusterizerElementSize = -1; + bool nnClusterizerAddIndexData = true; + float nnClassThreshold = 0.16; + bool nnSigmoidTrafoClassThreshold = 1; + int nnClusterizerUseCfRegression = 0; + int nnClusterizerBatchedMode = 1; + int nnClusterizerTotalClusters = 1; + int nnClusterizerVerbosity = 0; + int nnClusterizerBoundaryFillValue = -1; + int nnClusterizerDumpDigits = 0; + int nnClusterizerApplyCfDeconvolution = 0; + int nnClusterizerModelClassNumOutputNodes = -1; + int nnClusterizerModelReg1NumOutputNodes = -1; + int nnClusterizerModelReg2NumOutputNodes = -1; + int nnClusterizerDtype = 0; // 0: float16, 1: float32 + int mISector = -1; + + // Memory allocation for neural network + uint class2_elements = 0; + float* inputData32 = nullptr; + OrtDataType::Float16_t* inputData16 = nullptr; + float* outputDataClass = nullptr; + float* modelProbabilities = nullptr; + float* outputDataReg1 = nullptr; + float* outputDataReg2 = nullptr; + + ChargePos* peakPositions = nullptr; + bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx + float* centralCharges = nullptr; + int16_t mMemoryId = -1; +}; // class GPUTPCNNClusterizer + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx new file mode 100644 index 0000000000000..5002c63524020 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -0,0 +1,65 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerHost.cxx +/// \author Christian Sonnabend + +#include "GPUTPCNNClusterizerHost.h" +#include "GPUTPCNNClusterizer.h" +#include "GPUSettings.h" +#include "ML/3rdparty/GPUORTFloat16.h" + +using namespace o2::gpu; + +GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer) +{ + OrtOptions = { + {"model-path", settings.nnClassificationPath}, + {"device", settings.nnInferenceDevice}, + {"device-id", std::to_string(settings.nnInferenceDeviceId)}, + {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, + {"dtype", settings.nnInferenceDtype}, + {"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)}, + {"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)}, + {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)}, + {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)}, + {"profiling-output-path", settings.nnInferenceOrtProfilingPath}, + {"logging-level", std::to_string(settings.nnInferenceVerbosity)}}; + + model_class.init(OrtOptions); + clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + + reg_model_paths = splitString(settings.nnRegressionPath, ":"); + + if (!settings.nnClusterizerUseCfRegression) { + if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) { + OrtOptions["model-path"] = reg_model_paths[0]; + model_reg_1.init(OrtOptions); + clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + } else { + OrtOptions["model-path"] = reg_model_paths[0]; + model_reg_1.init(OrtOptions); + clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["model-path"] = reg_model_paths[1]; + model_reg_2.init(OrtOptions); + clusterer.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + } + } +} + +void GPUTPCNNClusterizerHost::networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype) +{ + if (dtype == 0) { + model.inference(clusterer.inputData16, size * clusterer.nnClusterizerElementSize, output); + } else { + model.inference(clusterer.inputData32, size * clusterer.nnClusterizerElementSize, output); + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h new file mode 100644 index 0000000000000..7efa0edecb893 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -0,0 +1,68 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerHost.h +/// \author Christian Sonnabend + +#ifndef O2_GPUTPCNNCLUSTERIZERHOST_H +#define O2_GPUTPCNNCLUSTERIZERHOST_H + +#include +#include +#include +#include "ML/OrtInterface.h" + +using namespace o2::ml; + +namespace o2::OrtDataType +{ +struct Float16_t; +} + +namespace o2::gpu +{ + +class GPUTPCNNClusterizer; +struct GPUSettingsProcessingNNclusterizer; + +class GPUTPCNNClusterizerHost +{ + public: + GPUTPCNNClusterizerHost() = default; + GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); + + void networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype); + + std::unordered_map OrtOptions; + o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters + std::vector reg_model_paths; + + private: + // Avoid including CommonUtils/StringUtils.h + std::vector splitString(const std::string& input, const std::string& delimiter) + { + std::vector tokens; + std::size_t pos = 0; + std::size_t found; + + while ((found = input.find(delimiter, pos)) != std::string::npos) { + tokens.push_back(input.substr(pos, found - pos)); + pos = found + delimiter.length(); + } + tokens.push_back(input.substr(pos)); + + return tokens; + } +}; // class GPUTPCNNClusterizerHost + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx new file mode 100644 index 0000000000000..25cd2497fbf62 --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -0,0 +1,386 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerKernels.cxx +/// \author Christian Sonnabend + +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCCFClusterizer.h" + +using namespace o2::gpu; +using namespace o2::gpu::tpccf; + +#include "CfConsts.h" +#include "CfUtils.h" +#include "ClusterAccumulator.h" +#include "ML/3rdparty/GPUORTFloat16.h" + +#if !defined(GPUCA_GPUCODE) +#include "GPUHostDataTypes.h" +#include "MCLabelAccumulator.h" +#endif + +#ifdef GPUCA_GPUCODE +#include "GPUTPCCFClusterizer.inc" +#endif + +// Defining individual thread functions for data filling, determining the class label and running the CF clusterizer +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + if (clustererNN.outputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices + return; + } + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; + GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem_new, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + GPUTPCNNClusterizerKernels::fillInputData(nBlocks, nThreads, iBlock, iThread, processors, sector, dtype, batchStart); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcNNClusterer[sector]; + uint glo_idx = get_global_id(0); + uint elem_iterator = glo_idx * clusterer.nnClusterizerModelClassNumOutputNodes; + float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] + uint class_label = 0; + for (int pIdx = elem_iterator; pIdx < elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes; pIdx++) { + if (pIdx == elem_iterator) { + current_max_prob = clusterer.modelProbabilities[pIdx]; + } else { + class_label = (clusterer.modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label); + } + } + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clusterer.outputDataClass[glo_idx + batchStart] = class_label; +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { + return; + } + GPUTPCNNClusterizerKernels::publishClustersReg1(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { + return; + } + GPUTPCNNClusterizerKernels::publishClustersReg2(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); +} + +// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary +GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current, const GPUTPCGeometry& geo) +{ + return (int)((geo.NPads(row_current) - geo.NPads(row_ref)) / 2); +} + +GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) +{ + return (row > 62 ? global_shift : 0); +} + +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift, const GPUTPCGeometry& geo) +{ + if (pad < 0 || row < 0) { // Faster short-circuit + return true; + } else if (row < 63) { + return (pad >= static_cast(geo.NPads(row))); + } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network + return true; + } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { + return (pad >= static_cast(geo.NPads(row - global_shift))); + } else { + return true; + } +} + +// Filling the input data for the neural network where there is no boundary +GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& processors, uint8_t sector, int8_t dtype, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); + + uint write_idx = glo_idx * clustererNN.nnClusterizerElementSize; // Potential optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors + float central_charge = static_cast(chargeMap[peak].unpack()); + + clustererNN.peakPositions[glo_idx] = peak; + clustererNN.centralCharges[glo_idx] = central_charge; + clustererNN.outputDataClass[glo_idx + batchStart] = -1; + + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); +#ifndef GPUCA_GPUCODE + GPUCA_UNROLL(U(), U()); +#endif + for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { + bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); + int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r, clusterer.Param().tpcGeometry); + for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry); + for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { + if (!is_boundary) { + ChargePos tmp_pos(row + r, pad + p, time + t); + if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization + clustererNN.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + } + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else { + clustererNN.inputData32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + } + } else { + // Filling boundary just to make sure that no values are left unintentionally + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } + write_idx++; + } + } + } + if (clustererNN.nnClusterizerAddIndexData) { + if (dtype == 0) { + clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); + clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row)); + } else { + clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; + clustererNN.inputData32[write_idx + 1] = row / 152.f; + clustererNN.inputData32[write_idx + 2] = static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row); + } + } +} + +GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); + MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + uint full_glo_idx = glo_idx + batchStart; + int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg1NumOutputNodes; + + // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); + + if (clustererNN.outputDataClass[full_glo_idx] == 1) { + + ClusterAccumulator pc; + + // Publishing logic is taken from default clusterizer + if (onlyMC) { + ClusterAccumulator dummy_pc; + CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + GPUTPCCFClusterizer::buildCluster( + clusterer.Param().rec, + chargeMap, + clustererNN.peakPositions[glo_idx], + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &dummy_pc, + labelAcc); + } + + if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg1[model_output_index + 4], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index], + clustererNN.outputDataReg1[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1], + clustererNN.outputDataReg1[model_output_index + 3], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + uint rowIndex = 0; + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + } else { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } +} + +GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); + MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); + tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + uint full_glo_idx = glo_idx + batchStart; + int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg2NumOutputNodes; + + // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg2NumOutputNodes << " -- " << clustererNN.peakPositions.size() << " -- " << clustererNN.centralCharges.size(); + + if (clustererNN.outputDataClass[full_glo_idx] > 0) { + + ClusterAccumulator pc; + + if (onlyMC) { + ClusterAccumulator dummy_pc; + CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + GPUTPCCFClusterizer::buildCluster( + clusterer.Param().rec, + chargeMap, + clustererNN.peakPositions[glo_idx], + smem.posBcast, + smem.buf, + smem.innerAboveThreshold, + &dummy_pc, + labelAcc); + } + + if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + // Cluster 1 + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 8], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index], + clustererNN.outputDataReg2[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2], + clustererNN.outputDataReg2[model_output_index + 6], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + tpc::ClusterNative myCluster; + bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + uint rowIndex = 0; + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + + // Cluster 2 + pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 9], + static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1], + clustererNN.outputDataReg2[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3], + clustererNN.outputDataReg2[model_output_index + 7], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + + rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (rejectCluster) { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } + + if (clusterer.mPclusterByRow != nullptr) { + rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( + clusterer, + myCluster, + clustererNN.peakPositions[glo_idx].row(), + clusterer.mNMaxClusterPerRow, + clusterer.mPclusterInRow, + clusterOut); + if (clusterer.mPclusterPosInRow != nullptr) { + clusterer.mPclusterPosInRow[full_glo_idx] = rowIndex; + } + } else if (clusterer.mPclusterPosInRow) { + rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; + } + // CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? + } else { + if (clusterer.mPclusterPosInRow) { + clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; + } + return; + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h new file mode 100644 index 0000000000000..c7bd18115d61f --- /dev/null +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -0,0 +1,77 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUTPCNNClusterizerKernels.h +/// \author Christian Sonnabend + +#ifndef O2_GPU_NN_CLUSTERIZER_H +#define O2_GPU_NN_CLUSTERIZER_H + +#include "clusterFinderDefs.h" +#include "GPUGeneralKernels.h" +#include "GPUConstantMem.h" +#include "GPUTPCClusterFinder.h" +#include "Array2D.h" +#include "PackedCharge.h" +#include "GPUTPCNNClusterizer.h" + +namespace o2::tpc +{ +struct ClusterNative; +} // namespace o2::tpc + +namespace o2::gpu +{ + +class ClusterAccumulator; +class MCLabelAccumulator; + +class GPUTPCNNClusterizerKernels : public GPUKernelTemplate +{ + public: + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels); + struct GPUSharedMemory { + // Regular cluster finder + ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; + uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; + }; + + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() + { + return GPUDataTypes::RecoStep::TPCClusterFinding; + } + + enum K : int32_t { + runCfClusterizer = 0, + fillInputNN = 1, + determineClass1Labels = 2, + determineClass2Labels = 3, + publishClass1Regression = 4, + publishClass2Regression = 5, + }; + + template + GPUd() static void Thread(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, processorType&, uint8_t = 0, int8_t = 0, int8_t = 0, uint = 0, Args...); + + private: + static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, uint8_t, int8_t, uint); + static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + + static GPUd() int padOffset(int, int, const GPUTPCGeometry&); + static GPUd() int rowOffset(int, int); + static GPUd() bool isBoundary(int, int, int, const GPUTPCGeometry&); +}; + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 4b7aab75519fa..ad348a84264f0 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -24,6 +24,9 @@ o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizati o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) +endif() o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) @@ -111,7 +114,15 @@ o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUS o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +endif() +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) From 911f7dce4f1a36bf7031959822253ccb39f4b02a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 15 Mar 2025 19:35:20 +0100 Subject: [PATCH 0217/1914] DPL: account for IO time correctly (#14064) --- .../AnalysisSupport/src/DataInputDirector.cxx | 56 +++++++++++++------ 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/Framework/AnalysisSupport/src/DataInputDirector.cxx b/Framework/AnalysisSupport/src/DataInputDirector.cxx index 2c51360cd9923..cfb5ca34b062b 100644 --- a/Framework/AnalysisSupport/src/DataInputDirector.cxx +++ b/Framework/AnalysisSupport/src/DataInputDirector.cxx @@ -387,18 +387,53 @@ int DataInputDescriptor::findDFNumber(int file, std::string dfName) return it - dfList.begin(); } +struct CalculateDelta { + CalculateDelta(uint64_t& target) + : mTarget(target) + { + start = uv_hrtime(); + } + ~CalculateDelta() + { + if (!active) { + return; + } + O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { + void (*dump_)(const char*); + if (void* sym = dlsym(nullptr, "igprof_dump_now")) { + dump_ = __extension__(void (*)(const char*)) sym; + if (dump_) { + std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); + dump_(filename.c_str()); + } + } + }); + mTarget += (uv_hrtime() - start); + } + + void deactivate() { + active = false; + } + + bool active = true; + uint64_t& mTarget; + uint64_t start; + uint64_t stop; +}; + bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh, int counter, int numTF, std::string treename, size_t& totalSizeCompressed, size_t& totalSizeUncompressed) { - auto ioStart = uv_hrtime(); - + CalculateDelta t(mIOTime); auto folder = getFileFolder(counter, numTF); if (!folder.filesystem()) { + t.deactivate(); return false; } auto rootFS = std::dynamic_pointer_cast(folder.filesystem()); if (!rootFS) { + t.deactivate(); throw std::runtime_error(fmt::format(R"(Not a TFile filesystem!)")); } // FIXME: Ugly. We should detect the format from the treename, good enough for now. @@ -420,6 +455,7 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh // FIXME: we should distinguish between an actually missing object and one which has a non compatible // format. if (!format) { + t.deactivate(); LOGP(debug, "Could not find tree {}. Trying in parent file.", fullpath.path()); auto parentFile = getParentFile(counter, numTF, treename); if (parentFile != nullptr) { @@ -460,19 +496,6 @@ bool DataInputDescriptor::readTree(DataAllocator& outputs, header::DataHeader dh f2b->setLabel(treename.c_str()); f2b->fill(datasetSchema, format); - mIOTime += (uv_hrtime() - ioStart); - - O2_SIGNPOST_ACTION(reader_memory_dump, [](void*) { - void (*dump_)(const char*); - if (void* sym = dlsym(nullptr, "igprof_dump_now")) { - dump_ = __extension__(void (*)(const char*)) sym; - if (dump_) { - std::string filename = fmt::format("reader-memory-dump-{}.gz", uv_hrtime()); - dump_(filename.c_str()); - } - } - }); - return true; } @@ -820,7 +843,8 @@ bool DataInputDirector::readTree(DataAllocator& outputs, header::DataHeader dh, treename = aod::datamodel::getTreeName(dh); } - return didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); + auto result = didesc->readTree(outputs, dh, counter, numTF, treename, totalSizeCompressed, totalSizeUncompressed); + return result; } void DataInputDirector::closeInputFiles() From 360be36ab9c948f133b745fbde7e6a822b47e8df Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 15 Mar 2025 20:03:50 +0100 Subject: [PATCH 0218/1914] GPU TPC CF: Add option to select 1pad or 2pad edge flag definition (#14067) --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + .../TPCClusterFinder/ClusterAccumulator.cxx | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 63fcf51004eae..b7f761c73ffc0 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -128,6 +128,7 @@ AddOptionRTC(cfInnerThreshold, uint8_t, 0, "", 0, "Cluster Finder extends cluste AddOptionRTC(cfMinSplitNum, uint8_t, 1, "", 0, "Minimum number of split charges in a cluster for the cluster to be marked as split") AddOptionRTC(cfNoiseSuppressionEpsilon, uint8_t, 10, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression") AddOptionRTC(cfNoiseSuppressionEpsilonRelative, uint8_t, 76, "", 0, "Cluster Finder: Difference between peak and charge for the charge to count as a minima during noise suppression, relative as fraction of 255") +AddOptionRTC(cfEdgeTwoPads, uint8_t, 1, "", 0, "Flag clusters with peak on the 2 pads closes to the sector edge as edge cluster") AddOptionRTC(nWays, uint8_t, 3, "", 0, "Do N fit passes in final fit of merger") AddOptionRTC(nWaysOuter, int8_t, 0, "", 0, "Store outer param") AddOptionRTC(trackFitRejectMode, int8_t, 5, "", 0, "0: no limit on rejection or missed hits, >0: break after n rejected hits, <0: reject at max -n hits") diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index b6792ce3a9ef5..b3b3c64095017 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -79,14 +79,19 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t { Pad pad = pos.pad(); - bool isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge - if (isEdgeCluster) { - bool leftEdge = (pad < 2); - if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { - isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. - } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { - mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak + bool isEdgeCluster; + if (param.rec.tpc.cfEdgeTwoPads) { + isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + if (isEdgeCluster) { + bool leftEdge = (pad < 2); + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. + } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { + mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak + } } + } else { + isEdgeCluster = pad == 0 || pad == param.tpcGeometry.NPads(pos.row()) - 1; } cn.qTot = CAMath::Float2UIntRn(mQtot); From b0776cad0fb5c1598bbe80628ff3b2ae492dc2aa Mon Sep 17 00:00:00 2001 From: pillot Date: Sat, 15 Mar 2025 20:11:06 +0100 Subject: [PATCH 0219/1914] [MCH] optional setting of CCDB file time window (#14059) --- Detectors/MUON/MCH/Conditions/README.md | 2 + .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 54 +++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/README.md b/Detectors/MUON/MCH/Conditions/README.md index d35fdcd0a0958..21892a7478d86 100644 --- a/Detectors/MUON/MCH/Conditions/README.md +++ b/Detectors/MUON/MCH/Conditions/README.md @@ -73,6 +73,8 @@ Usage: change HV thresholds -d [ --duration ] arg (=0) minimum duration (ms) of HV/LV issues to consider + -i [ --interval ] arg (=30) creation time interval (minutes) between + CCDB files -w [ --warning ] arg (=1) warning level (0, 1 or 2) -p [ --print ] arg (=1) print level (0, 1, 2 or 3) -o [ --output ] arg (=scan.root) output root file name diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 32cd365916c63..307759c97a0c3 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -153,6 +153,20 @@ std::string getTime(uint64_t ts) return time; } +//---------------------------------------------------------------------------- +std::string getDuration(uint64_t tStart, uint64_t tStop) +{ + /// get the duration (dd hh:mm:ss) between the two time stamps (ms) + + auto dt = ms2s(tStop - tStart); + auto s = dt % 60; + auto m = (dt / 60) % 60; + auto h = (dt / 3600) % 24; + auto d = dt / 86400; + + return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); +} + //---------------------------------------------------------------------------- std::set getRuns(std::string runList) { @@ -283,15 +297,17 @@ void drawRunBoudaries(const RBMAP& runBoundaries, TCanvas* c) } //---------------------------------------------------------------------------- -DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, uint64_t tStart, uint64_t tStop) +DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, + uint64_t tStart, uint64_t tStop, uint64_t timeInterval) { /// get the time boundaries of every HV/LV files found in the time range - // add extra margin (ms) of ± 1 min to the creation time, which occurs every 30 min - static const uint64_t timeMarging[2] = {60000, 1860000}; + // add an extra margin (ms) of ± 1 min to the creation time, + // which corresponds to the end of the time interval covered by the file + static const uint64_t timeMarging = 60000; std::istringstream fileInfo(api.list(what.c_str(), false, "text/plain", - tStop + timeMarging[1], tStart - timeMarging[0])); + tStop + timeInterval + timeMarging, tStart - timeMarging)); DPBMAP dpBoundaries{}; std::string dummy{}; @@ -357,7 +373,7 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, } //---------------------------------------------------------------------------- -void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) +void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t timeInterval) { /// print the time boundaries of every HV/LV files found in the full time range @@ -365,7 +381,13 @@ void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV) printf("------------------------------------\n"); for (auto [tStart, tStop] : dpBoundaries) { - printf("%llu - %llu (%s - %s)\n", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + printf("%llu - %llu (%s - %s)", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + if (tStop - tStart < 60000 * (timeInterval - 1) || tStop - tStart > 60000 * (timeInterval + 1)) { + printf("\e[0;31m ! warning: validity range %s != %llu±1 min\e[0m\n", + getDuration(tStart, tStop).c_str(), timeInterval); + } else { + printf("\n"); + } } printf("------------------------------------\n"); @@ -400,20 +422,6 @@ void drawLimit(double limit, TCanvas* c) l->Draw(); } -//---------------------------------------------------------------------------- -std::string getDuration(uint64_t tStart, uint64_t tStop) -{ - /// get the duration (dd hh:mm:ss) between the two time stamps (ms) - - auto dt = ms2s(tStop - tStart); - auto s = dt % 60; - auto m = (dt / 60) % 60; - auto h = (dt / 3600) % 24; - auto d = dt / 86400; - - return fmt::format("{:02}d {:02}:{:02}:{:02}", d, h, m, s); -} - //---------------------------------------------------------------------------- double getValue(DPVAL dp) { @@ -943,6 +951,7 @@ int main(int argc, char** argv) std::string what = ""; std::string config = ""; uint64_t minDuration = 0; + uint64_t timeInterval = 30; int warningLevel = 1; int printLevel = 1; std::string outFileName = ""; @@ -955,6 +964,7 @@ int main(int argc, char** argv) ("channels,c",po::value(&what)->default_value(""),R"(channel(s) to scan ("HV" or "LV" or comma separated list of (part of) DCS aliases))") ("configKeyValues",po::value(&config)->default_value(""),"Semicolon separated key=value strings to change HV thresholds") ("duration,d",po::value(&minDuration)->default_value(0),"minimum duration (ms) of HV/LV issues to consider") + ("interval,i",po::value(&timeInterval)->default_value(30),"creation time interval (minutes) between CCDB files") ("warning,w",po::value(&warningLevel)->default_value(1),"warning level (0, 1 or 2)") ("print,p",po::value(&printLevel)->default_value(1),"print level (0, 1, 2 or 3)") ("output,o",po::value(&outFileName)->default_value("scan.root"),"output root file name") @@ -1021,9 +1031,9 @@ int main(int argc, char** argv) // extract the time boundaries for each HV/LV file in the full time range auto dpBoundaries = getDPBoundaries(api, path.c_str(), runBoundaries.begin()->second.first, - runBoundaries.rbegin()->second.second); + runBoundaries.rbegin()->second.second, timeInterval * 60000); if (printLevel > 0) { - printDPBoundaries(dpBoundaries, scanHV); + printDPBoundaries(dpBoundaries, scanHV, timeInterval); } checkDPBoundaries(dpBoundaries, scanHV, runBoundaries.begin()->second.first, runBoundaries.rbegin()->second.second); From 4de0c6c5b3c7a259c182c62666cf8f211277d009 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:12:30 +0100 Subject: [PATCH 0220/1914] GPU: Provide general GPUFailedMsg functionality also externally --- GPU/Common/CMakeLists.txt | 1 + GPU/Common/GPUCommonChkErr.h | 21 +++++++++++++++++++ GPU/Common/GPUCommonDefAPI.h | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 +++++++++++++ GPU/GPUTracking/Base/GPUReconstruction.h | 3 +++ .../Base/GPUReconstructionDeviceBase.cxx | 6 +++++- .../Base/GPUReconstructionDeviceBase.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 16 ++------------ .../Base/cuda/GPUReconstructionCUDA.h | 4 ++-- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 4 +--- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 4 +++- .../Base/opencl/GPUReconstructionOCL.cxx | 17 +++------------ .../Base/opencl/GPUReconstructionOCL.h | 3 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 4 +--- 15 files changed, 61 insertions(+), 42 deletions(-) create mode 100644 GPU/Common/GPUCommonChkErr.h diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8466035d74ef7..8b0a75679479f 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,6 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h + GPUCommonChkErr.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h new file mode 100644 index 0000000000000..df007b31dab64 --- /dev/null +++ b/GPU/Common/GPUCommonChkErr.h @@ -0,0 +1,21 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonChkErr.h +/// \author David Rohr + +#ifndef GPUCOMMONCHKERR_H +#define GPUCOMMONCHKERR_H + +#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) +#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) + +#endif diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index 0cd3c4ebddb7f..f7efbf7e976d4 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -36,7 +36,7 @@ #define GPUdni() // Device function, not-to-be-inlined #define GPUdnii() inline // Device function, not-to-be-inlined on device, inlined on host #define GPUh() // Host-only function - // NOTE: All GPUd*() functions are also compiled on the host during GCC compilation. + // NOTE: All GPUd*() functions are also compiled on the host during host compilation. // The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation. // Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()! #define GPUhi() inline // to-be-inlined host-only function diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index df9a7380834ce..28241cb7aeec5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,6 +1078,21 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } +int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + return 0; + } + if (failOnError) { + if (mInitialized && mInErrorHandling == false) { + mInErrorHandling = true; + CheckErrorCodes(false, true); + } + throw std::runtime_error("GPU Backend Failure"); + } + return 1; +} + void GPUReconstruction::DumpSettings(const char* dir) { std::string f; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 93310284d7564..1fe08d08a8058 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,6 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} + int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -246,6 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; @@ -327,6 +329,7 @@ class GPUReconstruction // Others bool mInitialized = false; + bool mInErrorHandling = false; uint32_t mStatNEvents = 0; uint32_t mNEventsProcessed = 0; double mStatKernelTime = 0.; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index d1091f59b784a..b389e99a0b2bb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -175,7 +175,11 @@ void GPUReconstructionDeviceBase::runConstantRegistrators() { auto& list = getDeviceConstantMemRegistratorsVector(); for (uint32_t i = 0; i < list.size(); i++) { - mDeviceConstantMemList.emplace_back(list[i]()); + auto* ptr = list[i](); + if (ptr == nullptr) { + GPUFatal("Error registering constant memory"); + } + mDeviceConstantMemList.emplace_back(ptr); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index 6cd3813ff1431..c4595bed4c3fb 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,6 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d2adc3cc1fd19..40e3fa9b90eae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -61,9 +61,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } } -int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); +int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) { - // Check for CUDA Error and in the case of an error display the corresponding error string if (error == cudaSuccess) { return (0); } @@ -71,18 +71,6 @@ int32_t GPUReconstructionCUDABackend::GPUFailedMsgAI(const int64_t error, const return 1; } -void GPUReconstructionCUDABackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("CUDA Failure"); - } -} - GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) { mDeviceBackendSettings.deviceType = DeviceType::CUDA; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index dde70b9076e08..e04e14bd383d3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 6bcafe565e930..521ca2182c9bb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer) != cudaSuccess) { + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 49c466103c593..a6d55c2d729fd 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,13 +22,11 @@ #include #include #include +#include "GPUCommonChkErr.h" namespace o2::gpu { -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - struct GPUReconstructionCUDAInternals { std::vector> kernelModules; // module for RTC compilation std::vector> kernelFunctions; // vector of ptrs to RTC kernels diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 0f8d9bf219ba6..a5ab353f3d43f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,9 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - GPUReconstructionCUDA::GPUFailedMsgI(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer)); + if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + throw std::runtime_error("Could not obtain GPU constant memory symbol"); + } return retVal; }); #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index c7a8be62a12ea..6639c78b113e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -48,28 +48,17 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } } -int32_t GPUReconstructionOCLBackend::GPUFailedMsgAI(const int64_t error, const char* file, int32_t line) +static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); +int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { return (0); } - GPUError("OCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); return 1; } -void GPUReconstructionOCLBackend::GPUFailedMsgA(const int64_t error, const char* file, int32_t line) -{ - if (GPUFailedMsgAI(error, file, line)) { - static bool runningCallbacks = false; - if (IsInitialized() && runningCallbacks == false) { - runningCallbacks = true; - CheckErrorCodes(false, true); - } - throw std::runtime_error("OpenCL Failure"); - } -} - void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() { GPUCA_GPUReconstructionUpdateDefaults(); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 5132baa444cd9..79f54274cd32c 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,8 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - int32_t GPUFailedMsgAI(const int64_t error, const char* file, int32_t line); - void GPUFailedMsgA(const int64_t error, const char* file, int32_t line); + virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index aec5708a80f3c..9c8cdbe87c7c1 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,6 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" +#include "GPUCommonChkErr.h" using namespace o2::gpu; @@ -36,9 +37,6 @@ using namespace o2::gpu; #include #include -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__) -#define GPUFailedMsgI(x) GPUFailedMsgAI(x, __FILE__, __LINE__) - namespace o2::gpu { struct GPUReconstructionOCLInternals { From 78933b830a4d46b86b4a1360f2f9a8340962a0c7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 13 Mar 2025 18:13:23 +0100 Subject: [PATCH 0221/1914] GPU: Rename GPUFailedMsg to GPUChkErr --- GPU/Common/GPUCommonChkErr.h | 13 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 4 +- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionDeviceBase.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 154 +++++++++--------- .../Base/cuda/GPUReconstructionCUDA.h | 4 +- .../GPUReconstructionCUDAExternalProvider.cu | 2 +- .../cuda/GPUReconstructionCUDAInternals.h | 10 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 12 +- .../Base/opencl/GPUReconstructionOCL.cxx | 72 ++++---- .../Base/opencl/GPUReconstructionOCL.h | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 16 +- 12 files changed, 152 insertions(+), 143 deletions(-) diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h index df007b31dab64..00cb9e50d302f 100644 --- a/GPU/Common/GPUCommonChkErr.h +++ b/GPU/Common/GPUCommonChkErr.h @@ -12,10 +12,19 @@ /// \file GPUCommonChkErr.h /// \author David Rohr +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// The Macros must be called ona GPUReconstruction instance, e.g.: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); + #ifndef GPUCOMMONCHKERR_H #define GPUCOMMONCHKERR_H -#define GPUFailedMsg(x) GPUFailedMsgA(x, __FILE__, __LINE__, true) -#define GPUFailedMsgI(x) GPUFailedMsgA(x, __FILE__, __LINE__, false) +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 28241cb7aeec5..2bd4c0e937c20 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -1078,9 +1078,9 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s return retVal; } -int32_t GPUReconstruction::GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError) +int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError) { - if (error == 0 || !GPUFailedMsgInternal(error, file, line)) { + if (error == 0 || !GPUChkErrInternal(error, file, line)) { return 0; } if (failOnError) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 1fe08d08a8058..f363f3f58aa6f 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -143,7 +143,7 @@ class GPUReconstruction virtual void* getGPUPointer(void* ptr) { return ptr; } virtual void startGPUProfiling() {} virtual void endGPUProfiling() {} - int32_t GPUFailedMsgA(const int64_t error, const char* file, int32_t line, bool failOnError); + int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError); int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr); void RunPipelineWorker(); void TerminatePipelineWorker(); @@ -247,7 +247,7 @@ class GPUReconstruction void UpdateMaxMemoryUsed(); int32_t EnqueuePipeline(bool terminate = false); GPUChain* GetNextChainInQueue(); - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const { return 0; } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; } virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0; virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index c4595bed4c3fb..f0e19f588e0f1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -46,7 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU virtual int32_t InitDevice_Runtime() = 0; int32_t ExitDevice() override; virtual int32_t ExitDevice_Runtime() = 0; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override = 0; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0; int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override; int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 40e3fa9b90eae..d30eb51bd4938 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -62,7 +62,7 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) { if (error == cudaSuccess) { return (0); @@ -123,7 +123,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaDeviceProp deviceProp; int32_t count, bestDevice = -1; double bestDeviceSpeed = -1, deviceSpeed; - if (GPUFailedMsgI(cudaGetDeviceCount(&count))) { + if (GPUChkErrI(cudaGetDeviceCount(&count))) { GPUError("Error getting CUDA Device Count"); return (1); } @@ -139,9 +139,9 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } size_t free, total; #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaInitDevice(i, 0, 0))) { + if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { #else // HIP - if (GPUFailedMsgI(hipSetDevice(i))) { + if (GPUChkErrI(hipSetDevice(i))) { #endif if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); @@ -149,21 +149,21 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() continue; } contextCreated = true; - if (GPUFailedMsgI(cudaMemGetInfo(&free, &total))) { + if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (mProcessingSettings.debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); continue; } if (count > 1) { - GPUFailedMsg(cudaDeviceReset()); + GPUChkErr(cudaDeviceReset()); contextCreated = false; } if (mProcessingSettings.debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } - if (GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, i))) { + if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) { continue; } if (mProcessingSettings.debugLevel >= 4) { @@ -221,13 +221,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } if (noDevice) { if (contextCreated) { - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); } return (1); } mDeviceId = bestDevice; - GPUFailedMsgI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); + GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); if (mProcessingSettings.debugLevel >= 2) { GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name); @@ -280,27 +280,27 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #endif #ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUFailedMsgI(cudaInitDevice(mDeviceId, 0, 0))) { + if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { #else // HIP - if (contextCreated == 0 && GPUFailedMsgI(hipSetDevice(mDeviceId))) { + if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { #endif GPUError("Could not set CUDA Device!"); return (1); } #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } - if (GPUFailedMsgI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { GPUError("Error setting CUDA stack size"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } #else // HIP - if (GPUFailedMsgI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { + if (GPUChkErrI(hipSetDeviceFlags(hipDeviceScheduleBlockingSync))) { GPUError("Could not set HIP Device flags!"); return (1); } @@ -319,35 +319,35 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on GPU"); } - if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUFailedMsgI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { + if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { size_t free, total; - GPUFailedMsg(cudaMemGetInfo(&free, &total)); + GPUChkErr(cudaMemGetInfo(&free, &total)); GPUError("CUDA Memory Allocation Error (trying %ld bytes, %ld available on GPU, %ld free)", (int64_t)mDeviceMemorySize, (int64_t)deviceProp.totalGlobalMem, (int64_t)free); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Allocating memory on Host"); } - if (GPUFailedMsgI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { + if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { GPUError("Error allocating Page Locked Host Memory (trying %ld bytes)", (int64_t)mHostMemorySize); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } if (mProcessingSettings.debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); - if (GPUFailedMsgI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { + if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { GPUError("Error during CUDA memset"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { + if (GPUChkErrI(cudaStreamCreateWithFlags(&mInternals->Streams[i], cudaStreamNonBlocking))) { GPUError("Error creating CUDA Stream"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return (1); } } @@ -365,7 +365,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() else { #define GPUCA_KRNL(x_class, ...) \ mInternals->kernelModules.emplace_back(std::make_unique()); \ - GPUFailedMsg(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); + GPUChkErr(cuModuleLoadData(mInternals->kernelModules.back().get(), GPUCA_M_CAT3(_binary_cuda_kernel_module_fatbin_krnl_, GPUCA_M_KRNL_NAME(x_class), GPUCA_M_CAT(PER_KERNEL_OBJECT_EXT, _start)))); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL loadKernelModules(true); @@ -382,11 +382,11 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() CUdeviceptr tmp = nullptr; // HIP just uses void* #endif size_t tmpSize = 0; - GPUFailedMsg(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); + GPUChkErr(cuModuleGetGlobal(&tmp, &tmpSize, *mInternals->kernelModules[i], "gGPUConstantMemBuffer")); mDeviceConstantMemList.emplace_back((void*)tmp); } #else - GPUFailedMsg(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); + GPUChkErr(cudaMalloc(&devPtrConstantMem, gGPUConstantMemBufferSize)); #endif mDeviceConstantMem = (GPUConstantMem*)devPtrConstantMem; @@ -402,7 +402,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceConstantMemList.resize(master->mDeviceConstantMemList.size()); std::copy(master->mDeviceConstantMemList.begin(), master->mDeviceConstantMemList.end(), mDeviceConstantMemList.begin()); mInternals = master->mInternals; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); GPUInfo("CUDA Initialisation successfull (from master)"); } @@ -411,12 +411,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { #ifndef __HIPCC__ // CUDA - if (GPUFailedMsgI(cudaEventCreate(&events[j]))) { + if (GPUChkErrI(cudaEventCreate(&events[j]))) { #else - if (GPUFailedMsgI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { + if (GPUChkErrI(hipEventCreateWithFlags(&events[j], hipEventBlockingSync))) { #endif GPUError("Error creating event"); - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); return 1; } } @@ -435,7 +435,7 @@ void GPUReconstructionCUDA::genAndLoadRTC() for (uint32_t i = 0; i < nCompile; i++) { if (mProcessingSettings.rtc.runTest != 2) { mInternals->kernelModules.emplace_back(std::make_unique()); - GPUFailedMsg(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); + GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); @@ -449,33 +449,33 @@ void GPUReconstructionCUDA::genAndLoadRTC() int32_t GPUReconstructionCUDA::ExitDevice_Runtime() { // Uninitialize CUDA - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); SynchronizeGPU(); unregisterRemainingRegisteredMemory(); for (uint32_t i = 0; i < mEvents.size(); i++) { cudaEvent_t* events = (cudaEvent_t*)mEvents[i].data(); for (uint32_t j = 0; j < mEvents[i].size(); j++) { - GPUFailedMsgI(cudaEventDestroy(events[j])); + GPUChkErrI(cudaEventDestroy(events[j])); } } if (mMaster == nullptr) { - GPUFailedMsgI(cudaFree(mDeviceMemoryBase)); + GPUChkErrI(cudaFree(mDeviceMemoryBase)); #ifdef GPUCA_NO_CONSTANT_MEMORY - GPUFailedMsgI(cudaFree(mDeviceConstantMem)); + GPUChkErrI(cudaFree(mDeviceConstantMem)); #endif for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsgI(cudaStreamDestroy(mInternals->Streams[i])); + GPUChkErrI(cudaStreamDestroy(mInternals->Streams[i])); } - GPUFailedMsgI(cudaFreeHost(mHostMemoryBase)); + GPUChkErrI(cudaFreeHost(mHostMemoryBase)); for (uint32_t i = 0; i < mInternals->kernelModules.size(); i++) { - GPUFailedMsg(cuModuleUnload(*mInternals->kernelModules[i])); + GPUChkErr(cuModuleUnload(*mInternals->kernelModules[i])); } - GPUFailedMsgI(cudaDeviceReset()); + GPUChkErrI(cudaDeviceReset()); GPUInfo("CUDA Uninitialized"); } mDeviceMemoryBase = nullptr; @@ -491,18 +491,18 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, } if (stream == -1) { SynchronizeGPU(); - GPUFailedMsg(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); + GPUChkErr(cudaMemcpy(dst, src, size, toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost)); } else { if (evList == nullptr) { nEvents = 0; } for (int32_t k = 0; k < nEvents; k++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUFailedMsg(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); } if (ev) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -518,13 +518,13 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s continue; } if (stream == -1) { - GPUFailedMsg(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); + GPUChkErr(cudaMemcpy(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice)); } else { - GPUFailedMsg(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(((char*)basePtr) + offset, src, size, cudaMemcpyHostToDevice, mInternals->Streams[stream])); } } if (ev && stream != -1) { - GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); + GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); @@ -533,28 +533,28 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s } void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} -void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } +void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); return GPUReconstructionProcessing::GetThreadContext(); } -void GPUReconstructionCUDA::SynchronizeGPU() { GPUFailedMsg(cudaDeviceSynchronize()); } -void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUFailedMsg(cudaStreamSynchronize(mInternals->Streams[stream])); } +void GPUReconstructionCUDA::SynchronizeGPU() { GPUChkErr(cudaDeviceSynchronize()); } +void GPUReconstructionCUDA::SynchronizeStream(int32_t stream) { GPUChkErr(cudaStreamSynchronize(mInternals->Streams[stream])); } void GPUReconstructionCUDA::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaEventSynchronize(evList[i].get())); + GPUChkErr(cudaEventSynchronize(evList[i].get())); } } void GPUReconstructionCUDA::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[i].get(), 0)); } } @@ -565,7 +565,7 @@ bool GPUReconstructionCUDA::IsEventDone(deviceEvent* evList, int32_t nEvents) if (retVal == cudaErrorNotReady) { return false; } - GPUFailedMsg(retVal); + GPUChkErr(retVal); } return (true); } @@ -582,7 +582,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool if (!force && mProcessingSettings.debugLevel <= 0) { return (0); } - if (GPUFailedMsgI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { + if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream); return (1); } @@ -597,23 +597,23 @@ int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, si if (mProcessingSettings.debugLevel >= 3) { GPUInfo("Registering %zu bytes of memory for GPU", size); } - return GPUFailedMsgI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); + return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); } int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr) { - return GPUFailedMsgI(cudaHostUnregister((void*)ptr)); + return GPUChkErrI(cudaHostUnregister((void*)ptr)); } void GPUReconstructionCUDABackend::PrintKernelOccupancies() { int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; - GPUFailedMsg(cudaSetDevice(mDeviceId)); + GPUChkErr(cudaSetDevice(mDeviceId)); for (uint32_t i = 0; i < mInternals->kernelFunctions.size(); i++) { - GPUFailedMsg(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy - GPUFailedMsg(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); - GPUFailedMsg(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); - GPUFailedMsg(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); + GPUChkErr(cuOccupancyMaxPotentialBlockSize(&suggestedBlocks, &threads, *mInternals->kernelFunctions[i], 0, 0, 0)); // NOLINT: failure in clang-tidy + GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); + GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); + GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); } } @@ -621,14 +621,14 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; -#define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ - mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ - if (mProcessingSettings.debugLevel >= 3) { \ - GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ - } \ - GPUFailedMsg(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ +#define GPUCA_KRNL(x_class, ...) \ + getRTCkernelNum(mInternals->kernelFunctions.size()); \ + mInternals->kernelFunctions.emplace_back(new CUfunction); \ + mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ + if (mProcessingSettings.debugLevel >= 3) { \ + GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ + } \ + GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -644,27 +644,27 @@ int32_t GPUReconstructionCUDA::PrepareTextures() #ifdef GPUCA_USE_TEXTURES cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); size_t offset; - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUFailedMsg(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); + GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); #endif return (0); } void GPUReconstructionCUDA::startGPUProfiling() { - GPUFailedMsg(cudaProfilerStart()); + GPUChkErr(cudaProfilerStart()); } void GPUReconstructionCUDA::endGPUProfiling() { - GPUFailedMsg(cudaProfilerStop()); + GPUChkErr(cudaProfilerStop()); } #else // HIP void* GPUReconstructionHIP::getGPUPointer(void* ptr) { void* retVal = nullptr; - GPUFailedMsg(hipHostGetDevicePointer(&retVal, ptr, 0)); + GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); return retVal; } #endif // __HIPCC__ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index e04e14bd383d3..02e8f92bb2328 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,13 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUFailedMsgStatic(const int64_t error, const char* file, int32_t line); + static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override { return GPUFailedMsgStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index 521ca2182c9bb..bc1d573385598 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -36,7 +36,7 @@ using namespace o2::gpu; #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index a6d55c2d729fd..027a9d5445b2c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -49,7 +49,7 @@ class GPUDebugTiming { if (mDo) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[0].get(), mStreams[mXYZ.x.stream])); } else { mTimer.ResetStart(); } @@ -59,13 +59,13 @@ class GPUDebugTiming { if (mDo && mXYZ.t == 0.) { if (mDeviceTimers) { - mRec->GPUFailedMsg(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); - mRec->GPUFailedMsg(cudaEventSynchronize(mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventRecord(mDeviceTimers[1].get(), mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaEventSynchronize(mDeviceTimers[1].get())); float v; - mRec->GPUFailedMsg(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); + mRec->GPUChkErr(cudaEventElapsedTime(&v, mDeviceTimers[0].get(), mDeviceTimers[1].get())); mXYZ.t = v * 1.e-3f; } else { - mRec->GPUFailedMsg(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); + mRec->GPUChkErr(cudaStreamSynchronize(mStreams[mXYZ.x.stream])); mXYZ.t = mTimer.GetCurrentElapsedTime(); } } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index a5ab353f3d43f..f60f00c13710d 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -34,7 +34,7 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC template <> inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - GPUFailedMsg(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); + GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } template @@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUFailedMsg(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -67,16 +67,16 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgsStreams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); + GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); } } { GPUDebugTiming timer(mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); } - GPUFailedMsg(cudaGetLastError()); + GPUChkErr(cudaGetLastError()); if (z.ev) { - GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); + GPUChkErr(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream])); } } @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUFailedMsgStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 6639c78b113e5..e52494937f8bf 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -49,7 +49,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); -int32_t GPUReconstructionOCLBackend::GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error == CL_SUCCESS) { @@ -69,7 +69,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (mMaster == nullptr) { cl_int ocl_error; cl_uint num_platforms; - if (GPUFailedMsgI(clGetPlatformIDs(0, nullptr, &num_platforms))) { + if (GPUChkErrI(clGetPlatformIDs(0, nullptr, &num_platforms))) { GPUErrorReturn("Error getting OpenCL Platform Count"); } if (num_platforms == 0) { @@ -82,7 +82,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() // Query platforms and devices std::unique_ptr platforms; platforms.reset(new cl_platform_id[num_platforms]); - if (GPUFailedMsgI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { + if (GPUChkErrI(clGetPlatformIDs(num_platforms, platforms.get(), nullptr))) { GPUErrorReturn("Error getting OpenCL Platforms"); } @@ -227,7 +227,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUErrorReturn("Did not find compatible OpenCL Platform / Device, aborting OPENCL Initialisation"); } mInternals->platform = platforms[bestPlatform]; - GPUFailedMsg(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); + GPUChkErr(clGetDeviceIDs(mInternals->platform, CL_DEVICE_TYPE_ALL, devices.size(), devices.data(), nullptr)); mInternals->device = devices[bestDevice]; queryDevice(mInternals->device); @@ -267,7 +267,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mMaxBackendThreads = std::max(mMaxBackendThreads, deviceMaxWorkGroup * mBlockCount); mInternals->context = clCreateContext(nullptr, 1, &mInternals->device, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Could not create OPENCL Device Context!"); } @@ -280,13 +280,13 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } mInternals->mem_gpu = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE, mDeviceMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Memory Allocation Error"); } mInternals->mem_constant = clCreateBuffer(mInternals->context, CL_MEM_READ_ONLY, gGPUConstantMemBufferSize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { clReleaseMemObject(mInternals->mem_gpu); clReleaseContext(mInternals->context); GPUErrorReturn("OPENCL Constant Memory Allocation Error"); @@ -314,44 +314,44 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() #else mInternals->command_queue[i] = clCreateCommandQueue(mInternals->context, mInternals->device, 0, &ocl_error); #endif - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating OpenCL command queue"); } } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_gpu, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } - if (GPUFailedMsgI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { + if (GPUChkErrI(clEnqueueMigrateMemObjects(mInternals->command_queue[0], 1, &mInternals->mem_constant, 0, 0, nullptr, nullptr))) { GPUErrorReturn("Error migrating buffer"); } mInternals->mem_host = clCreateBuffer(mInternals->context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mHostMemorySize, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating pinned host memory"); } const char* krnlGetPtr = "__kernel void krnlGetPtr(__global char* gpu_mem, __global char* constant_mem, __global size_t* host_mem) {if (get_global_id(0) == 0) {host_mem[0] = (size_t) gpu_mem; host_mem[1] = (size_t) constant_mem;}}"; cl_program program = clCreateProgramWithSource(mInternals->context, 1, (const char**)&krnlGetPtr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating program object"); } ocl_error = clBuildProgram(program, 1, &mInternals->device, "", nullptr, nullptr); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { char build_log[16384]; clGetProgramBuildInfo(program, mInternals->device, CL_PROGRAM_BUILD_LOG, 16384, build_log, nullptr); GPUImportant("Build Log:\n\n%s\n\n", build_log); GPUErrorReturn("Error compiling program"); } cl_kernel kernel = clCreateKernel(program, "krnlGetPtr", &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error creating kernel"); } - if (GPUFailedMsgI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || - GPUFailedMsgI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || - GPUFailedMsgI(clFinish(mInternals->command_queue[0])) || - GPUFailedMsgI(clReleaseKernel(kernel)) || - GPUFailedMsgI(clReleaseProgram(program))) { + if (GPUChkErrI(OCLsetKernelParameters(kernel, mInternals->mem_gpu, mInternals->mem_constant, mInternals->mem_host)) || + GPUChkErrI(clExecuteKernelA(mInternals->command_queue[0], kernel, 16, 16, nullptr)) || + GPUChkErrI(clFinish(mInternals->command_queue[0])) || + GPUChkErrI(clReleaseKernel(kernel)) || + GPUChkErrI(clReleaseProgram(program))) { GPUErrorReturn("Error obtaining device memory ptr"); } @@ -359,7 +359,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUInfo("Mapping hostmemory"); } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUErrorReturn("Error allocating Page Locked Host Memory"); } @@ -435,14 +435,14 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t } if (size == 0) { if (ev || nEvents) { // Workaround for OCL runtimes, which can throw an error in case size = 0 - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream == -1 ? 0 : stream], nEvents, evList->getEventList(), ev->getEventList())); } } else if (toGPU == -2) { - GPUFailedMsg(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueCopyBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, mInternals->mem_gpu, (char*)src - (char*)mDeviceMemoryBase, (char*)dst - (char*)mDeviceMemoryBase, size, nEvents, evList->getEventList(), ev->getEventList())); } else if (toGPU) { - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)dst - (char*)mDeviceMemoryBase, size, src, nEvents, evList->getEventList(), ev->getEventList())); } else { - GPUFailedMsg(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); + GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); } if (mProcessingSettings.serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); @@ -455,16 +455,16 @@ size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const v if (stream == -1) { SynchronizeGPU(); } - GPUFailedMsg(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); + GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); if (mProcessingSettings.serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; } -void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUFailedMsg(clReleaseEvent(ev.get())); } +void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } -void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) { @@ -490,18 +490,18 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve void GPUReconstructionOCLBackend::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { - GPUFailedMsg(clFinish(mInternals->command_queue[i])); + GPUChkErr(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUFailedMsg(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUFailedMsg(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { - GPUFailedMsg(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); + GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } @@ -509,7 +509,7 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { - GPUFailedMsg(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); + GPUChkErr(clGetEventInfo(evList[i].get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(eventdone), &eventdone, nullptr)); if (eventdone != CL_COMPLETE) { return false; } @@ -524,7 +524,7 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, return (0); } for (int32_t i = 0; i < mNStreams; i++) { - if (GPUFailedMsgI(clFinish(mInternals->command_queue[i]))) { + if (GPUChkErrI(clFinish(mInternals->command_queue[i]))) { GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i); } } @@ -554,14 +554,14 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() mInternals->program = clCreateProgramWithSource(mInternals->context, (cl_uint)1, (const char**)&programs_sources, program_sizes, &ocl_error); } - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OpenCL program from binary"); return 1; } - if (GPUFailedMsgI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { + if (GPUChkErrI(clBuildProgram(mInternals->program, 1, &mInternals->device, oclBuildFlags, nullptr, nullptr))) { cl_build_status status; - if (GPUFailedMsgI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { + if (GPUChkErrI(clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_STATUS, sizeof(status), &status, nullptr)) == 0 && status == CL_BUILD_ERROR) { size_t log_size; clGetProgramBuildInfo(mInternals->program, mInternals->device, CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size); std::unique_ptr build_log(new char[log_size + 1]); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 79f54274cd32c..2abae229c74bb 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -39,7 +39,7 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t ExitDevice_Runtime() override; void UpdateAutomaticProcessingSettings() override; - virtual int32_t GPUFailedMsgInternal(const int64_t error, const char* file, int32_t line) const override; + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; void SynchronizeGPU() override; int32_t DoStuckProtection(int32_t stream, deviceEvent event) override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 8a6c889773cb0..4f6a8725b4be5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -18,7 +18,7 @@ template <> inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; - GPUFailedMsg(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); + GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); } template @@ -28,7 +28,7 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu auto& x = _xyz.x; auto& y = _xyz.y; auto& z = _xyz.z; - GPUFailedMsg(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); + GPUChkErr(OCLsetKernelParameters(k, mInternals->mem_gpu, mInternals->mem_constant, y.index, args...)); cl_event ev; cl_event* evr; @@ -39,15 +39,15 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu } else { evr = (cl_event*)z.ev; } - GPUFailedMsg(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); + GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { cl_ulong time_start, time_end; - GPUFailedMsg(clWaitForEvents(1, evr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); - GPUFailedMsg(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); + GPUChkErr(clWaitForEvents(1, evr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); + GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, nullptr)); _xyz.t = (time_end - time_start) * 1.e-9f; if (tmpEvent) { - GPUFailedMsg(clReleaseEvent(ev)); + GPUChkErr(clReleaseEvent(ev)); } } } @@ -80,7 +80,7 @@ int32_t GPUReconstructionOCLBackend::AddKernel() cl_int ocl_error; cl_kernel krnl = clCreateKernel(mInternals->program, kname.c_str(), &ocl_error); - if (GPUFailedMsgI(ocl_error)) { + if (GPUChkErrI(ocl_error)) { GPUError("Error creating OPENCL Kernel: %s", name.c_str()); return 1; } From 2fef8797a1b38b4fa1fbb7316481b8bc3062db10 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 14:52:53 +0100 Subject: [PATCH 0222/1914] Fix compiler warning --- Generators/src/GeneratorFromFile.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index 6d4e85afa6721..e37a3886c24e1 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -361,7 +361,7 @@ namespace std::vector executeCommand(const std::string& command) { std::vector result; - std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); if (!pipe) { throw std::runtime_error("Failed to open pipe"); } From 20f1352fa03206c836a48dcb3089a9db1a526f68 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:29 +0100 Subject: [PATCH 0223/1914] GPU: Clean up more of C++ < 11 compatibility code --- GPU/GPUTracking/Base/GPUConstantMem.h | 2 +- GPU/GPUTracking/Base/GPUGeneralKernels.h | 6 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 4 +- .../Base/GPUReconstructionKernels.h | 8 +-- GPU/GPUTracking/DataTypes/GPUDataTypes.h | 60 ++++++++----------- GPU/GPUTracking/Global/GPUChain.cxx | 12 ++-- GPU/GPUTracking/Global/GPUChain.h | 12 ++-- .../SectorTracker/GPUTPCCreateTrackingData.h | 2 +- .../GPUTPCExtrapolationTracking.h | 4 +- .../SectorTracker/GPUTPCNeighboursCleaner.h | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsFinder.h | 2 +- .../SectorTracker/GPUTPCStartHitsSorter.h | 2 +- .../SectorTracker/GPUTPCTrackletConstructor.h | 2 +- .../SectorTracker/GPUTPCTrackletSelector.h | 2 +- .../TRDTracking/GPUTRDTrackerKernels.h | 2 +- 16 files changed, 58 insertions(+), 66 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index 8f1cc90f5ae93..e0b06f0a3ea55 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -96,7 +96,7 @@ static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + si #endif } // namespace o2::gpu #if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) -GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; +GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index 71980d38fdc9e..ce93e2e5eead8 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -79,7 +79,7 @@ class GPUKernelTemplate }; typedef GPUconstantref() GPUConstantMem processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return &processors; @@ -94,7 +94,7 @@ class GPUKernelTemplate class GPUMemClean16 : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size); }; @@ -103,7 +103,7 @@ class GPUMemClean16 : public GPUKernelTemplate class GPUitoa : public GPUKernelTemplate { public: - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index f90820281c74d..fd999ec2304e1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -134,8 +134,8 @@ template inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) { HighResTimer* t = nullptr; - GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep(); - if (myStep == GPUCA_RECO_STEP::NoRecoStep) { + GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); + if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { throw std::runtime_error("Failure running general kernel without defining RecoStep"); } int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index ba30f38e902ad..b8f3e3746c743 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -30,14 +30,14 @@ struct classArgument { }; struct krnlExec { - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUCA_RECO_STEP::NoRecoStep) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} uint32_t nBlocks; uint32_t nThreads; int32_t stream; GPUReconstruction::krnlDeviceType device; - GPUCA_RECO_STEP step; + GPUDataTypes::RecoStep step; }; struct krnlRunRange { constexpr krnlRunRange() = default; diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index f7bfe38be988d..6cc1e7266e722 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -96,9 +96,6 @@ struct TPCPadGainCalib; struct TPCZSLinkMapping; #include "utils/bitfield.h" -#define ENUM_CLASS class -#define ENUM_UINT : uint32_t -#define GPUCA_RECO_STEP GPUDataTypes::RecoStep class GPUTPCTrack; class GPUTPCHitId; @@ -117,36 +114,33 @@ struct GPUSettingsTF; class GPUDataTypes { public: - enum ENUM_CLASS GeometryType ENUM_UINT{RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2}; - enum DeviceType ENUM_UINT { INVALID_DEVICE = 0, - CPU = 1, - CUDA = 2, - HIP = 3, - OCL = 4 }; - enum ENUM_CLASS GeneralStep { Prepare = 1, - QA = 2 }; + // clang-format off + enum class GeometryType : uint32_t { RESERVED_GEOMETRY = 0, ALIROOT = 1, O2 = 2 }; + enum DeviceType : uint32_t { INVALID_DEVICE = 0, CPU = 1, CUDA = 2, HIP = 3, OCL = 4 }; + enum class GeneralStep { Prepare = 1, QA = 2 }; + // clang-format on - enum ENUM_CLASS RecoStep { TPCConversion = 1, - TPCSectorTracking = 2, - TPCMerging = 4, - TPCCompression = 8, - TRDTracking = 16, - ITSTracking = 32, - TPCdEdx = 64, - TPCClusterFinding = 128, - TPCDecompression = 256, - Refit = 512, - AllRecoSteps = 0x7FFFFFFF, - NoRecoStep = 0 }; - enum ENUM_CLASS InOutType { TPCClusters = 1, - OBSOLETE = 2, - TPCMergedTracks = 4, - TPCCompressedClusters = 8, - TRDTracklets = 16, - TRDTracks = 32, - TPCRaw = 64, - ITSClusters = 128, - ITSTracks = 256 }; + enum class RecoStep { TPCConversion = 1, + TPCSectorTracking = 2, + TPCMerging = 4, + TPCCompression = 8, + TRDTracking = 16, + ITSTracking = 32, + TPCdEdx = 64, + TPCClusterFinding = 128, + TPCDecompression = 256, + Refit = 512, + AllRecoSteps = 0x7FFFFFFF, + NoRecoStep = 0 }; + enum class InOutType { TPCClusters = 1, + OBSOLETE = 2, + TPCMergedTracks = 4, + TPCCompressedClusters = 8, + TRDTracklets = 16, + TRDTracks = 32, + TPCRaw = 64, + ITSClusters = 128, + ITSTracks = 256 }; #ifndef __OPENCL__ static constexpr const char* const DEVICE_TYPE_NAMES[] = {"INVALID", "CPU", "CUDA", "HIP", "OCL"}; static constexpr const char* const RECO_STEP_NAMES[] = {"TPC Transformation", "TPC Sector Tracking", "TPC Track Merging and Fit", "TPC Compression", "TRD Tracking", "ITS Tracking", "TPC dEdx Computation", "TPC Cluster Finding", "TPC Decompression", "Global Refit"}; @@ -312,8 +306,6 @@ struct GPUTrackingInOutPointers { const GPUSettingsTF* settingsTF = nullptr; }; -#undef ENUM_CLASS -#undef ENUM_UINT } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Global/GPUChain.cxx b/GPU/GPUTracking/Global/GPUChain.cxx index 6990d5e08b638..300de31a509ba 100644 --- a/GPU/GPUTracking/Global/GPUChain.cxx +++ b/GPU/GPUTracking/Global/GPUChain.cxx @@ -18,33 +18,33 @@ using namespace o2::gpu; constexpr GPUChain::krnlRunRange GPUChain::krnlRunRangeNone; constexpr GPUChain::krnlEvent GPUChain::krnlEventNone; -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { const uint32_t nBlocks = (totalItems + nThreads - 1) / nThreads; return {nBlocks, nThreads, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-1, totalItems, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-2, nBlocks, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } -GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, d, st}; } -GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st) +GPUChain::krnlExec GPUChain::GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st) { return {(uint32_t)-3, 0, stream, GPUReconstruction::krnlDeviceType::Auto, st}; } diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index b9da1c9a330d3..fff5d2efe0270 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -192,15 +192,15 @@ class GPUChain return mRec->getTimer(name, num); } // Get GRID with NBLOCKS minimal such that nThreads * NBLOCS >= totalItems - krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, uint32_t nThreads, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with NBLOCKS minimal such that ideal number of threads * NBLOCKS >= totalItems - krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGrid(uint32_t totalItems, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with specified number of blocks, each block with ideal number of threads - krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridBlk(uint32_t nBlocks, int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridBlkStep(uint32_t nBlocks, int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); // Get GRID with ideal number of threads / blocks for GPU - krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); - krnlExec GetGridAutoStep(int32_t stream, GPUCA_RECO_STEP st = GPUCA_RECO_STEP::NoRecoStep); + krnlExec GetGridAuto(int32_t stream, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); + krnlExec GetGridAutoStep(int32_t stream, GPUDataTypes::RecoStep st = GPUDataTypes::RecoStep::NoRecoStep); inline uint32_t BlockCount() const { return mRec->mBlockCount; } inline uint32_t WarpSize() const { return mRec->mWarpSize; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h index 9327699c9404b..dc1beacf79d02 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCCreateTrackingData.h @@ -32,7 +32,7 @@ class GPUTPCCreateTrackingData : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h index 2d2b275d06399..91a33d132f136 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCExtrapolationTracking.h @@ -30,7 +30,7 @@ class GPUTPCExtrapolationTracking : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; @@ -50,7 +50,7 @@ class GPUTPCExtrapolationTrackingCopyNumbers : public GPUKernelTemplate { public: typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h index 7af6e8eb1a582..de79b268aea78 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursCleaner.h @@ -38,7 +38,7 @@ class GPUTPCNeighboursCleaner : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 54dc0876f8a55..41b5eb8a4ffb8 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -49,7 +49,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h index 5e620180570c8..c834b17369f0f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsFinder : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h index b0349d660dbc1..0e2fd96dd2690 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsSorter.h @@ -38,7 +38,7 @@ class GPUTPCStartHitsSorter : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 8757ed87072da..0f8314ee0fad4 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -97,7 +97,7 @@ class GPUTPCTrackletConstructor GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index bb969d866ef29..5009c672b030e 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -42,7 +42,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate }; typedef GPUconstantref() GPUTPCTracker processorType; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TPCSectorTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSectorTracking; } GPUhdi() static processorType* Processor(GPUConstantMem& processors) { return processors.tpcTrackers; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h index 70b525420f294..21135ddc48dfa 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.h @@ -26,7 +26,7 @@ class GPUTRDTrackerKernels : public GPUKernelTemplate enum K { defaultKernel = 0, gpuVersion = 0, o2Version = 1 }; - GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::TRDTracking; } + GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TRDTracking; } template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, T* externalInstance = nullptr); }; From 315cfa4216eeda737371a8d4eac108b81e23d881 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 15:06:57 +0100 Subject: [PATCH 0224/1914] GPU: Provide static versions of GPUChkErr() macros test2 GPU: Provide static versions of GPUChkErr() macros --- GPU/Common/CMakeLists.txt | 2 +- GPU/Common/GPUCommonChkErr.h | 30 --------- GPU/Common/GPUCommonDef.h | 24 +++++-- GPU/Common/GPUCommonHelpers.h | 62 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 9 +-- .../Base/cuda/GPUReconstructionCUDA.h | 3 +- .../GPUReconstructionCUDAExternalProvider.cu | 3 +- .../cuda/GPUReconstructionCUDAHelpers.inc | 31 ++++++++++ .../cuda/GPUReconstructionCUDAInternals.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +- .../Base/opencl/GPUReconstructionOCL.cxx | 7 +-- .../opencl/GPUReconstructionOCLIncludesHost.h | 2 +- 14 files changed, 127 insertions(+), 56 deletions(-) delete mode 100644 GPU/Common/GPUCommonChkErr.h create mode 100644 GPU/Common/GPUCommonHelpers.h create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 8b0a75679479f..bacf4454c39fd 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -15,7 +15,7 @@ set(HDRS_INSTALL GPUCommonAlgorithm.h GPUCommonDef.h GPUCommonDefAPI.h - GPUCommonChkErr.h + GPUCommonHelpers.h GPUCommonDefSettings.h GPUCommonConstants.h GPUCommonLogger.h diff --git a/GPU/Common/GPUCommonChkErr.h b/GPU/Common/GPUCommonChkErr.h deleted file mode 100644 index 00cb9e50d302f..0000000000000 --- a/GPU/Common/GPUCommonChkErr.h +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUCommonChkErr.h -/// \author David Rohr - -// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). -// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code -// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. -// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. -// The Macros must be called ona GPUReconstruction instance, e.g.: -// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } -// gpuRecObj.GPUChkErr(cudaMalloc(...)); - -#ifndef GPUCOMMONCHKERR_H -#define GPUCOMMONCHKERR_H - -// Please #include "GPUReconstruction.h" in your code, if you use these 2! -#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) -#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) - -#endif diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index ae8c48b500b69..b4a788e66a81c 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -35,13 +35,25 @@ #define GPUCA_GPUCODE // Compiled by GPU compiler #endif - #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) - #define GPUCA_GPUCODE_DEVICE // Executed on device + #if defined(GPUCA_GPUCODE) + #if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__) + #define GPUCA_GPUCODE_DEVICE // Executed on device + #endif + #if defined(__CUDACC__) + #define GPUCA_GPUTYPE CUDA + #elif defined(__HIPCC__) + #define GPUCA_GPUTYPE HIP + #elif defined(__OPENCL__) || defined(__OPENCL_HOST__) + #define GPUCA_GPUTYPE OCL + #endif #endif #endif +#ifndef GPUCA_GPUTYPE + #define GPUCA_GPUTYPE CPU +#endif #if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE) - #define GPUCA_ALIGPUCODE + #define GPUCA_ALIGPUCODE // Part of GPUTracking library but not of interface #endif #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) @@ -51,13 +63,13 @@ #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER) -#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ + #define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__ #else -#define GPUCA_DEBUG_STREAMER_CHECK(...) + #define GPUCA_DEBUG_STREAMER_CHECK(...) #endif #ifndef GPUCA_RTC_SPECIAL_CODE -#define GPUCA_RTC_SPECIAL_CODE(...) + #define GPUCA_RTC_SPECIAL_CODE(...) #endif // API Definitions for GPU Compilation diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h new file mode 100644 index 0000000000000..ad876db0d6c3a --- /dev/null +++ b/GPU/Common/GPUCommonHelpers.h @@ -0,0 +1,62 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUCommonHelpers.h +/// \author David Rohr + +// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user). +// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code +// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured. +// GPUChkErrI will return 0 or 1, depending on whether an error has occurred. +// These Macros must be called ona GPUReconstruction instance. +// The GPUChkErrS and GPUChkErrSI are similar but static, without required GPUReconstruction instance. +// Examples: +// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); } +// gpuRecObj.GPUChkErr(cudaMalloc(...)); +// if (GPUChkErrSI(cudaMalloc(..))) { exit(1); } + +#ifndef GPUCOMMONHELPERS_H +#define GPUCOMMONHELPERS_H + +// Please #include "GPUReconstruction.h" in your code, if you use these 2! +#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true) +#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false) +#define GPUChkErrS(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, true) +#define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false) + +#include "GPUCommonDef.h" +#include + +namespace o2::gpu::internal +{ +#define GPUCOMMON_INTERNAL_CAT_A(a, b, c) a##b##c +#define GPUCOMMON_INTERNAL_CAT(...) GPUCOMMON_INTERNAL_CAT_A(__VA_ARGS__) +extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(const int64_t error, const char* file, int32_t line); +inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error) { + GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line); + } + return error != 0; +} +static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* file, int32_t line, bool failOnError) +{ + int32_t retVal = error && GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(error, file, line); + if (retVal && failOnError) { + throw std::runtime_error("GPU API Call Failure"); + } + return error; +} +#undef GPUCOMMON_INTERNAL_CAT_A +#undef GPUCOMMON_INTERNAL_CAT +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index a33234db49a27..5b2e53179e50c 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d30eb51bd4938..d0d5ef4680fac 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -22,6 +22,7 @@ #include "CUDAThrustHelpers.h" #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" +#include "GPUReconstructionCUDAHelpers.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" @@ -62,13 +63,9 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUChkErrStatic(const int64_t error, const char* file, int32_t line) +int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { - if (error == cudaSuccess) { - return (0); - } - GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); - return 1; + return internal::GPUReconstructionCUDAChkErr(error, file, line); } GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 02e8f92bb2328..f78270d40146c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -33,13 +33,12 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase { public: ~GPUReconstructionCUDABackend() override; - static int32_t GPUChkErrStatic(const int64_t error, const char* file, int32_t line); protected: GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); void PrintKernelOccupancies() override; - virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override { return GPUChkErrStatic(error, file, line); } + virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template void runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index bc1d573385598..f341a778076b8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -32,11 +32,12 @@ using namespace o2::gpu; #include "TrackParametrizationWithError.cxx" #include "Propagator.cxx" #include "TrackLTIntegral.cxx" +#include "GPUReconstructionCUDAHelpers.inc" #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc new file mode 100644 index 0000000000000..a34f940a1337a --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc @@ -0,0 +1,31 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDAHelpers.inc +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONCUDAHELPERS_INC_H +#define GPURECONSTRUCTIONCUDAHELPERS_INC_H + +#include "GPUCommonHelpers.h" + +namespace o2::gpu::internal +{ +int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, const char* file, int32_t line) +{ + if (error != cudaSuccess) { + GPUError("CUDA Error: %ld / %s (%s:%d)", error, cudaGetErrorString((cudaError_t)error), file, line); + } + return error != cudaSuccess; +} +} // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 027a9d5445b2c..c85d98d85420e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -22,7 +22,7 @@ #include #include #include -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f60f00c13710d..0c83223ba238a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -138,7 +138,7 @@ void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& k #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { void* retVal = nullptr; - if (GPUReconstructionCUDA::GPUChkErrStatic(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer), __FILE__, __LINE__)) { + if (GPUChkErrS(cudaGetSymbolAddress(&retVal, gGPUConstantMemBuffer))) { throw std::runtime_error("Could not obtain GPU constant memory symbol"); } return retVal; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 30f6683ff93c5..21a641c0cc7c0 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e52494937f8bf..e92205b9864e6 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -52,11 +52,10 @@ static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string - if (error == CL_SUCCESS) { - return (0); + if (error != CL_SUCCESS) { + GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); } - GPUError("OpenCL Error: %ld / %s (%s:%d)", error, convertErrorToString(error), file, line); - return 1; + return error != CL_SUCCESS; } void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 9c8cdbe87c7c1..97316cf9aa32e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -28,7 +28,7 @@ #include "GPUReconstructionOCL.h" #include "GPUReconstructionIncludes.h" -#include "GPUCommonChkErr.h" +#include "GPUCommonHelpers.h" using namespace o2::gpu; From b8feb4d10f27015e45df85ec17cba5758ad523a5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:15:23 +0100 Subject: [PATCH 0225/1914] GPU: Automatically derive GPUReconstruction backend class from preprocessor constant --- GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h | 4 ++-- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h | 1 - .../Base/opencl/GPUReconstructionOCLKernels.cxx | 1 - GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 5 files changed, 7 insertions(+), 9 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index cd1180cbc9991..0b1a501ebc094 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -68,7 +68,7 @@ // GPU Host wrappers for kernel #define GPUCA_KRNL_HOST(x_class, ...) \ GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal { \ + template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ @@ -80,7 +80,7 @@ }; #define GPUCA_KRNL_PROP(x_class, x_attributes) \ - template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend() { \ + template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend() { \ gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h index 7f77925ca3aaa..4ed352279fb90 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h @@ -34,6 +34,5 @@ #define GPUCA_CONSMEM_CALL me->mDeviceConstantMem, #define GPUCA_CONSMEM ((GPUConstantMem&)(*gGPUConstantMemBuffer)) #endif -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionCUDABackend #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 4f6a8725b4be5..ce6b6553ae1f7 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -109,6 +109,5 @@ int32_t GPUReconstructionOCLBackend::AddKernels() #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ GPUCA_KRNL_PROP(x_class, x_attributes) \ template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); -#define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index fa85d796baeba..e6312d767a496 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -769,7 +769,7 @@ struct MergeBorderTracks_compMin { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); ThrustVolatileAsyncAllocator alloc(this); @@ -1873,7 +1873,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); ThrustVolatileAsyncAllocator alloc(this); @@ -1881,7 +1881,7 @@ inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); ThrustVolatileAsyncAllocator alloc(this); @@ -2106,7 +2106,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); ThrustVolatileAsyncAllocator alloc(this); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 45293bae9820b..13f204d0f940a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp { }; template <> -inline void GPUCA_KRNL_BACKEND_CLASS::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); ThrustVolatileAsyncAllocator alloc(this); From 154ffd467127ab9eb92649e78cbd2fdfa90f6f68 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 16:33:57 +0100 Subject: [PATCH 0226/1914] GPU: Plenty of clang-format fixes --- GPU/GPUTracking/Base/GPUParam.h | 8 +-- GPU/GPUTracking/Base/GPUReconstruction.h | 4 +- .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/DataTypes/GPUOutputControl.h | 2 +- GPU/GPUTracking/DataTypes/GPUSettings.h | 6 +- GPU/GPUTracking/DataTypes/GPUTRDTrack.h | 6 +- GPU/GPUTracking/Global/GPUChain.h | 2 +- GPU/GPUTracking/Global/GPUChainTracking.h | 2 +- .../Global/GPUChainTrackingClusterizer.cxx | 3 +- .../Global/GPUChainTrackingRefit.cxx | 4 +- GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- .../GPUO2InterfaceConfigurableParam.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 16 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 6 +- .../Merger/GPUTPCGMPolynomialFieldManager.h | 4 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- .../SectorTracker/GPUTPCTracklet.h | 2 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 12 ++-- .../GPUTPCCFNoiseSuppression.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 4 +- GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h | 18 +++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 24 ++++--- GPU/GPUTracking/TRDTracking/GPUTRDTracker.h | 62 +++++++++---------- .../TRDTracking/GPUTRDTrackletWord.h | 2 +- .../display/frontend/GPUDisplayFrontend.h | 8 +-- .../frontend/GPUDisplayFrontendWindows.cxx | 12 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 6 +- GPU/GPUTracking/qa/genEvents.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 +-- GPU/GPUTracking/utils/threadserver.h | 4 +- GPU/GPUbenchmark/cuda/Kernels.cu | 6 +- GPU/TPCFastTransformation/BandMatrixSolver.h | 2 +- GPU/TPCFastTransformation/ChebyshevFit1D.cxx | 4 +- .../CorrectionMapsHelper.h | 28 ++++----- .../NDPiecewisePolynomials.h | 14 ++--- GPU/TPCFastTransformation/Spline1DHelper.cxx | 8 +-- GPU/TPCFastTransformation/Spline1DHelperOld.h | 20 +++--- GPU/TPCFastTransformation/Spline1DSpec.cxx | 2 +- GPU/TPCFastTransformation/Spline1DSpec.h | 12 ++-- GPU/TPCFastTransformation/Spline2DSpec.cxx | 2 +- GPU/TPCFastTransformation/SplineHelper.cxx | 44 ++++++------- GPU/TPCFastTransformation/SplineHelper.h | 6 +- GPU/TPCFastTransformation/SplineSpec.h | 14 ++--- .../TPCFastSpaceChargeCorrection.h | 2 +- .../IrregularSpline2D3DCalibrator.cxx | 2 +- .../devtools/RegularSpline1D.h | 12 ++-- .../devtools/SemiregularSpline2D3D.cxx | 8 +-- .../devtools/SemiregularSpline2D3D.h | 32 +++++----- .../test/testMultivarPolynomials.cxx | 4 +- GPU/Utils/FlatObject.h | 12 ++-- GPU/Workflow/helper/src/GPUWorkflowHelper.cxx | 26 ++++---- 54 files changed, 250 insertions(+), 255 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 9bdf705dfeb59..fbce6246de112 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -59,10 +59,10 @@ struct GPUParam_t { int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; - GPUTPCGeometry tpcGeometry; // TPC Geometry - GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM - const uint32_t* occupancyMap; // Ptr to TPC occupancy map - uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) + GPUTPCGeometry tpcGeometry; // TPC Geometry + GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM + const uint32_t* occupancyMap; // Ptr to TPC occupancy map + uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) GPUParamSector SectorParam[GPUCA_NSECTORS]; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f363f3f58aa6f..5e03c77f08230 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -52,7 +52,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent; class threadContext; -} +} // namespace gpu_reconstruction_kernels class GPUReconstruction { @@ -193,7 +193,7 @@ class GPUReconstruction bool IsInitialized() const { return mInitialized; } void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr); void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr); - void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() + void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings() void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateDynamicSettings(const GPUSettingsRecDynamic* d); diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index 8f5cab6807050..bc760f6188caa 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -923,7 +923,7 @@ void zsEncoderDenseLinkBased::decodePage(std::vector& outputBuff if (decLinkX & 0b00100000) { bitmaskL2.set(); } else { - bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t) * ((const uint8_t*)decPagePtr)); + bitmaskL2 = std::bitset<10>(((((uint16_t)decLinkX) & 0b11000000) << 2) | (uint16_t)*((const uint8_t*)decPagePtr)); decPagePtr += sizeof(uint8_t); } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4ccfb9ff10311..43560616782db 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -28,7 +28,7 @@ namespace gpu_reconstruction_kernels { struct deviceEvent { constexpr deviceEvent() = default; - constexpr deviceEvent(std::nullptr_t p) : v(nullptr){}; + constexpr deviceEvent(std::nullptr_t p) : v(nullptr) {}; template void set(T val) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d0d5ef4680fac..202edd49bc44c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -496,7 +496,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, for (int32_t k = 0; k < nEvents; k++) { GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[stream], evList[k].get(), 0)); } - GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost, mInternals->Streams[stream])); + GPUChkErr(cudaMemcpyAsync(dst, src, size, toGPU == -2 ? cudaMemcpyDeviceToDevice : (toGPU ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToHost), mInternals->Streams[stream])); } if (ev) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); diff --git a/GPU/GPUTracking/DataTypes/GPUOutputControl.h b/GPU/GPUTracking/DataTypes/GPUOutputControl.h index 799fd25330ab4..0495f7ed1d0ff 100644 --- a/GPU/GPUTracking/DataTypes/GPUOutputControl.h +++ b/GPU/GPUTracking/DataTypes/GPUOutputControl.h @@ -78,7 +78,7 @@ struct GPUTrackingOutputs { static constexpr size_t count() { return sizeof(GPUTrackingOutputs) / sizeof(GPUOutputControl); } GPUOutputControl* asArray() { return (GPUOutputControl*)this; } size_t getIndex(const GPUOutputControl& v) { return &v - (const GPUOutputControl*)this; } - static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::*v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } + static int32_t getIndex(GPUOutputControl GPUTrackingOutputs::* v) { return &(((GPUTrackingOutputs*)(0x10000))->*v) - (GPUOutputControl*)(0x10000); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUSettings.h b/GPU/GPUTracking/DataTypes/GPUSettings.h index 05888770ef9e5..c81a8e20e9926 100644 --- a/GPU/GPUTracking/DataTypes/GPUSettings.h +++ b/GPU/GPUTracking/DataTypes/GPUSettings.h @@ -73,9 +73,9 @@ struct GPUSettingsTF { // Settings defining the setup of the GPUReconstruction processing (basically selecting the device / class instance) struct GPUSettingsDeviceBackend { - uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA - uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU - GPUReconstruction* master = nullptr; // GPUReconstruction master object + uint32_t deviceType = GPUDataTypes::DeviceType::CPU; // Device type, shall use GPUDataTypes::DEVICE_TYPE constants, e.g. CPU / CUDA + uint8_t forceDeviceType = 1; // Fail if device initialization fails, otherwise falls back to CPU + GPUReconstruction* master = nullptr; // GPUReconstruction master object }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h index 18f7c61e01fc3..b358e8b82d480 100644 --- a/GPU/GPUTracking/DataTypes/GPUTRDTrack.h +++ b/GPU/GPUTracking/DataTypes/GPUTRDTrack.h @@ -107,9 +107,9 @@ class GPUTRDTrack_t : public T GPUd() void setHasPadrowCrossing() { mIsCrossingNeighbor |= (1U << 7); } protected: - float mChi2; // total chi2. - float mSignal{-1.f}; // electron Likelihood for track - uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) + float mChi2; // total chi2. + float mSignal{-1.f}; // electron Likelihood for track + uint32_t mRefGlobalTrackId; // raw GlobalTrackID of the seeding track (either ITS-TPC or TPC) int32_t mAttachedTracklets[kNLayers]; // indices of the tracklets attached to this track; -1 means no tracklet in that layer int16_t mCollisionId; // the collision ID of the tracklets attached to this track; is used to retrieve the BC information for this track after the tracking is done uint8_t mFlags; // bits 0 to 5 indicate whether track is findable in layer 0 to 5, bit 6 indicates an ambiguous track and bit 7 flags if the track is stopped in the TRD diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index fff5d2efe0270..290ae32cafca8 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -46,7 +46,7 @@ class GPUChain virtual int32_t Finalize() = 0; virtual int32_t RunChain() = 0; virtual void MemorySize(size_t& gpuMem, size_t& pageLockedHostMem) = 0; - virtual void PrintMemoryStatistics(){}; + virtual void PrintMemoryStatistics() {}; virtual int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector>* fillErrors = nullptr) { return 0; } virtual bool SupportsDoublePipeline() { return false; } virtual int32_t FinalizePipelinedProcessing() { return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 492ee65d1c9c1..5779cec31130c 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -43,7 +43,7 @@ class MatLayerCylSet; namespace o2::gpu { -//class GPUTRDTrackerGPU; +// class GPUTRDTrackerGPU; class GPUTPCGPUTracker; class GPUDisplayInterface; class GPUQA; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 63d56da37595b..a48050a6cacbc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -154,8 +154,7 @@ std::pair GPUChainTracking::TPCClusterizerDecodeZSCount(uint uint32_t endpointAdcSamples[GPUTrackingInOutZS::NENDPOINTS]; memset(endpointAdcSamples, 0, sizeof(endpointAdcSamples)); bool doGPU = mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding; - int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : (mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) - : 0; + int32_t firstHBF = (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasTfStartOrbit) ? mIOPtrs.settingsTF->tfStartOrbit : ((mIOPtrs.tpcZS->sector[iSector].count[0] && mIOPtrs.tpcZS->sector[iSector].nZSPtr[0][0]) ? o2::raw::RDHUtils::getHeartBeatOrbit(*(const o2::header::RAWDataHeader*)mIOPtrs.tpcZS->sector[iSector].zsPtr[0][0]) : 0); for (uint16_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { #ifndef GPUCA_NO_VC diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 9e7085b31849e..8d1efd7011227 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -31,13 +31,13 @@ int32_t GPUChainTracking::RunRefit() RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); - //TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } else { runKernel(GetGrid(mIOPtrs.nMergedTracks, 0)); } - //TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); + // TransferMemoryResourcesToHost(RecoStep::Refit, &Refit, 0); SynchronizeStream(0); return 0; } diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 34cd5b7280dc3..4dac56afed671 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -46,7 +46,7 @@ struct GPUO2Interface_Internals { }; } // namespace o2::gpu -GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals){}; +GPUO2Interface::GPUO2Interface() : mInternals(new GPUO2Interface_Internals) {}; GPUO2Interface::~GPUO2Interface() { Deinitialize(); } diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h index 425c8b880b4e3..ebb426b7a8cfe 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceConfigurableParam.h @@ -50,7 +50,7 @@ #define AddSubConfig(name, instance) #define BeginSubConfig(name, instance, parent, preoptname, preoptnameshort, descr, o2prefix) \ struct GPUCA_M_CAT(GPUConfigurableParam, name) : public o2::conf::ConfigurableParamHelper { \ - O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: + O2ParamDef(GPUCA_M_CAT(GPUConfigurableParam, name), GPUCA_M_STR(GPUCA_M_CAT(GPU_, o2prefix))) public: #define BeginHiddenConfig(name, instance) struct GPUCA_M_CAT(GPUConfigurableParam, name) { #define EndConfig() \ } \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 64d8549312736..578fe1eeb4ca7 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -106,17 +106,17 @@ class GPUTPCGMMergedTrack GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } private: - GPUTPCGMTrackParam mParam; //* fitted track parameters + GPUTPCGMTrackParam mParam; //* fitted track parameters gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param - float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z - uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays + float mAlpha; //* alpha angle + float mLastX; //* outer X + float mLastY; //* outer Y + float mLastZ; //* outer Z + uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit - uint32_t mNClusters; //* number of track clusters - uint32_t mNClustersFitted; //* number of clusters used in fit + uint32_t mNClusters; //* number of track clusters + uint32_t mNClustersFitted; //* number of clusters used in fit uint8_t mFlags; uint8_t mLegs; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e6312d767a496..1c2a8e2b29a9c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -2140,7 +2140,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } const float d2xy = CAMath::Sum2(params[i].x - params[j].x, params[i].y - params[j].y); if (d2xy > 15.f) { - //bs |= 1; + // bs |= 1; continue; } const auto& trk1 = mOutputTracks[params[i].id]; @@ -2148,7 +2148,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { - //bs |= 2; + // bs |= 2; continue; } @@ -2170,7 +2170,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, } } if (!dzcorrok) { - //bs |= 4; + // bs |= 4; continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h index 88f0882a79f03..4a608fcc97068 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.h @@ -39,11 +39,11 @@ class GPUTPCGMPolynomialFieldManager GPUTPCGMPolynomialFieldManager() = default; /* Get appropriate pre-calculated polynomial field for the given field value nominalFieldkG - */ + */ static int32_t GetPolynomialField(float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); /* Get pre-calculated polynomial field of type "type", scaled with respect to nominalFieldkG - */ + */ static int32_t GetPolynomialField(StoredField_t type, float nominalFieldkG, o2::gpu::GPUTPCGMPolynomialField& field); }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index e15d6fe8b17bd..0c171a74d4e42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -716,7 +716,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict const float ImP1 = mP[1] + Ik11 * Iz1; const float ImC0 = mC[0] - Ik00 * mC[0]; const float ImC2 = mC[2] - Ik11 * mC[2]; - //printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); + // printf("\t%21sInterpo ----- abde artaf%16s Y %8.3f, Z %8.3f (Errors %f <-- (%f, %f) %f <-- (%f, %f))\n", "", "", ImP0, ImP1, sqrtf(ImC0), sqrtf(mC[0]), sqrtf(inter->errorY), sqrtf(ImC2), sqrtf(mC[2]), sqrtf(inter->errorZ)); const float Jz0 = posY - ImP0; const float Jz1 = posZ - ImP1; const float Jw0 = 1.f / (ImC0 + err2Y); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 9d10d40107b8f..502a70cb57762 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -256,7 +256,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov int32_t nAvgCharge = 0; for (int32_t i = start; i != stop; i += cl ? 0 : direction) { - float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise + float x = 0, y = 0, z = 0, charge = 0; // FIXME: initialization unneeded, but GCC incorrectly produces uninitialized warnings otherwise float time = 0.f, invCharge = 0.f, invSqrtCharge = 0.f; // Same here... int32_t clusters = 0; while (true) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h index 10ff0a32aeaf3..5bb63d6a10254 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracklet.h @@ -30,7 +30,7 @@ class GPUTPCTracklet { public: #if !defined(GPUCA_GPUCODE) - GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0){}; + GPUTPCTracklet() : mFirstRow(0), mLastRow(0), mParam(), mHitWeight(0), mFirstHit(0) {}; #endif //! GPUCA_GPUCODE GPUhd() int32_t FirstRow() const { return mFirstRow; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index e7634fa397bae..f1fd95d696f5d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -101,7 +101,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared const int32_t nRows = (endpoint & 1) ? (s.nRowsRegion - s.nRowsRegion / 2) : (s.nRowsRegion / 2); for (int32_t l = 0; l < hdr->nTimeBinSpan; l++) { // TODO: Parallelize over time bins - pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment + pagePtr += (pagePtr - page) & 1; // Ensure 16 bit alignment const TPCZSTBHDR* tbHdr = reinterpret_cast(pagePtr); if ((tbHdr->rowMask & 0x7FFF) == 0) { pagePtr += 2; @@ -324,8 +324,8 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3 uint32_t rawFECChannel = 0; @@ -705,7 +705,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread( #define PEEK_OVERFLOW(pagePtr, offset) \ (*(PayloadExtendsToNextPage && (pagePtr) < nextPage && (pagePtr) + (offset) >= payloadEnd \ - ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset)-payloadEnd) \ + ? nextPage + sizeof(header::RAWDataHeader) + ((pagePtr) + (offset) - payloadEnd) \ : (pagePtr) + (offset))) #define TEST_BIT(x, bit) static_cast((x) & (1 << (bit))) @@ -931,8 +931,8 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread( bits -= DECODE_BITS; nSamplesWritten++; rawFECChannel++; // Ensure we don't decode same channel twice - } // while (bits >= DECODE_BITS) - } // while (nSamplesWritten < nAdc) + } // while (bits >= DECODE_BITS) + } // while (nSamplesWritten < nAdc) assert(PayloadExtendsToNextPage || adcData <= page); assert(PayloadExtendsToNextPage || page <= payloadEnd); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index 05fddda5bec68..f3a914cbfcaee 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -316,8 +316,8 @@ GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks( uint8_t* bufp = (uint8_t*)buf; /************************************** - * Look for peaks - **************************************/ + * Look for peaks + **************************************/ CfUtils::blockLoad( peakMap, diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index 24624e60ceba7..f6b8bea29822a 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -45,7 +45,7 @@ class propagatorInterface { public: typedef o2::base::Propagator propagatorParam; - GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop){}; + GPUd() propagatorInterface(const propagatorParam* prop) : mProp(prop) {}; GPUd() propagatorInterface(const propagatorInterface&) = delete; GPUd() propagatorInterface& operator=(const propagatorInterface&) = delete; @@ -200,7 +200,7 @@ class propagatorInterface : public GPUTPCGMPropagator } GPUd() bool propagateToX(float x, float maxSnp, float maxStep) { - //bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; + // bool ok = PropagateToXAlpha(x, GetAlpha(), true) == 0 ? true : false; int32_t retVal = PropagateToXAlpha(x, GetAlpha(), true); bool ok = (retVal == 0) ? true : false; ok = mTrack->CheckNumericalQuality(); diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h index 6a6e13fe84e36..ee7d7a30b1c55 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackData.h @@ -18,15 +18,15 @@ #define GPUTRDTRACKDATA_H struct GPUTRDTrackDataRecord { - float mAlpha; // azimuthal angle of reference frame - float fX; // x: radial distance - float fY; // local Y-coordinate of a track (cm) - float fZ; // local Z-coordinate of a track (cm) - float mSinPhi; // local sine of the track momentum azimuthal angle - float fTgl; // tangent of the track momentum dip angle - float fq1Pt; // 1/pt (1/(GeV/c)) - float fC[15]; // covariance matrix - int32_t fTPCTrackID; // id of corresponding TPC track + float mAlpha; // azimuthal angle of reference frame + float fX; // x: radial distance + float fY; // local Y-coordinate of a track (cm) + float fZ; // local Z-coordinate of a track (cm) + float mSinPhi; // local sine of the track momentum azimuthal angle + float fTgl; // tangent of the track momentum dip angle + float fq1Pt; // 1/pt (1/(GeV/c)) + float fC[15]; // covariance matrix + int32_t fTPCTrackID; // id of corresponding TPC track int32_t fAttachedTracklets[6]; // IDs for attached tracklets sorted by layer uint8_t mIsPadrowCrossing; // bits 0 to 5 indicate whether a padrow was crossed diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index fa0711887f60f..c633f10adae38 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -12,7 +12,7 @@ /// \file GPUTRDTracker.cxx /// \author Ole Schmidt -//#define ENABLE_GPUTRDDEBUG +// #define ENABLE_GPUTRDDEBUG #define ENABLE_WARNING 0 #define ENABLE_INFO 0 @@ -326,7 +326,6 @@ GPUd() int32_t GPUTRDTracker_t::LoadTrack(const TRDTRK& trk, uint3 return (0); } - template GPUd() void GPUTRDTracker_t::DumpTracks() { @@ -439,19 +438,19 @@ GPUd() bool GPUTRDTracker_t::CalculateSpacePoints(int32_t iCollisi int32_t trkltIdxStart = trkltIdxOffset + iFirstTrackletInDet; for (int32_t trkltIdx = trkltIdxStart; trkltIdx < trkltIdxStart + nTrackletsInDet; ++trkltIdx) { int32_t trkltZbin = tracklets[trkltIdx].GetZbin(); - float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates - float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates + float xTrkltDet[3] = {0.f}; // trklt position in chamber coordinates + float xTrkltSec[3] = {0.f}; // trklt position in sector coordinates xTrkltDet[0] = mGeo->AnodePos() + sRadialOffset; xTrkltDet[1] = tracklets[trkltIdx].GetY(); xTrkltDet[2] = pp->GetRowPos(trkltZbin) - pp->GetRowSize(trkltZbin) / 2.f - pp->GetRowPos(pp->GetNrows() / 2); - //GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); + // GPUInfo("Space point local %i: x=%f, y=%f, z=%f", trkltIdx, xTrkltDet[0], xTrkltDet[1], xTrkltDet[2]); matrix->LocalToMaster(xTrkltDet, xTrkltSec); mSpacePoints[trkltIdx].setX(xTrkltSec[0]); mSpacePoints[trkltIdx].setY(xTrkltSec[1]); mSpacePoints[trkltIdx].setZ(xTrkltSec[2]); mSpacePoints[trkltIdx].setDy(tracklets[trkltIdx].GetdY()); - //GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); + // GPUInfo("Space point global %i: x=%f, y=%f, z=%f", trkltIdx, mSpacePoints[trkltIdx].getX(), mSpacePoints[trkltIdx].getY(), mSpacePoints[trkltIdx].getZ()); } } return result; @@ -475,10 +474,10 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK float zShiftTrk = 0.f; if (mProcessPerTimeFrame) { zShiftTrk = (mTrackAttribs[iTrk].mTime - GetConstantMem()->ioPtrs.trdTriggerTimes[collisionId]) * mTPCVdrift * mTrackAttribs[iTrk].mSide; - //float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; - // increase Z error based on time window - // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) - //t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned + // float addZerr = (mTrackAttribs[iTrk].mTimeAddMax + mTrackAttribs[iTrk].mTimeSubMax) * .5f * mTPCVdrift; + // increase Z error based on time window + // -> this is here since it was done before, but the efficiency seems to be better if the covariance is not updated (more tracklets are attached) + // t->updateCovZ2(addZerr * addZerr); // TODO check again once detailed performance study tools are available, maybe this can be tuned } const GPUTRDpadPlane* pad = nullptr; const GPUTRDTrackletWord* tracklets = GetConstantMem()->ioPtrs.trdTracklets; @@ -637,7 +636,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK } Hypothesis hypo(trkWork->getNlayersFindable(), iCandidate, trkltIdx, trkWork->getChi2() + chi2); InsertHypothesis(hypo, nCurrHypothesis, hypothesisIdxOffset); - } // end tracklet in window + } // end tracklet in window } // tracklet loop } // chamber loop @@ -723,7 +722,7 @@ GPUd() bool GPUTRDTracker_t::FollowProlongation(PROP* prop, TRDTRK #ifdef ENABLE_GPUTRDDEBUG prop->setTrack(&trackNoUp); prop->rotate(GetAlphaOfSector(trkltSec)); - //prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); + // prop->propagateToX(spacePoints[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].getX(), .8f, 2.f); prop->propagateToX(mR[tracklets[mHypothesis[iUpdate + hypothesisIdxOffset].mTrackletId].GetDetector()], .8f, 2.f); prop->setTrack(trkWork); #endif @@ -1103,7 +1102,6 @@ GPUd() bool GPUTRDTracker_t::IsGeoFindable(const TRDTRK* t, const return true; } - #ifndef GPUCA_GPUCODE namespace o2::gpu { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h index 29a9b529b0558..431fa357e8b89 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.h @@ -85,7 +85,7 @@ class GPUTRDTracker_t : public GPUProcessor int32_t mLayers; // number of layers with TRD space point int32_t mCandidateId; // to which track candidate the hypothesis belongs int32_t mTrackletId; // tracklet index to be used for update (global index within tracklet array) - float mChi2; // predicted chi2 for given space point + float mChi2; // predicted chi2 for given space point GPUd() float GetReducedChi2() { return mLayers > 0 ? mChi2 / mLayers : mChi2; } GPUd() Hypothesis() : mLayers(0), mCandidateId(-1), mTrackletId(-1), mChi2(9999.f) {} @@ -148,32 +148,32 @@ class GPUTRDTracker_t : public GPUProcessor GPUd() const typename PROP::propagatorParam* getPropagatorParam(); protected: - float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm - bool mIsInitialized; // flag is set upon initialization - bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker - bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis - int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms - float mAngleHistogramRange; // range of impact angles covered by each histogram - int16_t mMemoryPermanent; // memory id of permanent memory for the tracker - int16_t mMemoryTracklets; // memory id of memory for TRD tracklets - int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) - int32_t mNMaxCollisions; // max number of collisions to process (per time frame) - int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) - int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) - TRDTRK* mTracks; // array of trd-updated tracks - HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes - int32_t mNCandidates; // max. track hypothesis per layer - int32_t mNTracks; // number of TPC tracks to be matched - int32_t mNEvents; // number of processed events - int32_t mMaxBackendThreads; // maximum number of supported threads + float* mR; // radial position of each TRD chamber, alignment taken into account, radial spread within chambers < 7mm + bool mIsInitialized; // flag is set upon initialization + bool mGenerateSpacePoints; // if true, only tracklets are provided as input and they will be converted into space points by the tracker + bool mProcessPerTimeFrame; // if true, tracking is done per time frame instead of on a single events basis + int16_t mNAngleHistogramBins; // number of bins per chamber for the angular difference histograms + float mAngleHistogramRange; // range of impact angles covered by each histogram + int16_t mMemoryPermanent; // memory id of permanent memory for the tracker + int16_t mMemoryTracklets; // memory id of memory for TRD tracklets + int16_t mMemoryTracks; // memory id of memory for tracks (used for i/o) + int32_t mNMaxCollisions; // max number of collisions to process (per time frame) + int32_t mNMaxTracks; // max number of tracks the tracker can handle (per event) + int32_t mNMaxSpacePoints; // max number of space points hold by the tracker (per event) + TRDTRK* mTracks; // array of trd-updated tracks + HelperTrackAttributes* mTrackAttribs; // array with additional (transient) track attributes + int32_t mNCandidates; // max. track hypothesis per layer + int32_t mNTracks; // number of TPC tracks to be matched + int32_t mNEvents; // number of processed events + int32_t mMaxBackendThreads; // maximum number of supported threads // index of first tracklet for each chamber within tracklets array, last entry is total number of tracklets for given collision // the array has (kNChambers + 1) * numberOfCollisions entries // note, that for collision iColl one has to add an offset corresponding to the index of the first tracklet of iColl to the index stored in mTrackletIndexArray int32_t* mTrackletIndexArray; - Hypothesis* mHypothesis; // array with multiple track hypothesis - TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking - GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame - const GPUTRDGeometry* mGeo; // TRD geometry + Hypothesis* mHypothesis; // array with multiple track hypothesis + TRDTRK* mCandidates; // array of tracks for multiple hypothesis tracking + GPUTRDSpacePoint* mSpacePoints; // array with tracklet coordinates in global tracking frame + const GPUTRDGeometry* mGeo; // TRD geometry /// ---- error parametrization depending on magnetic field ---- float mRPhiA2; // parameterization for tracklet position resolution float mRPhiB; // parameterization for tracklet position resolution @@ -185,14 +185,14 @@ class GPUTRDTracker_t : public GPUProcessor float mAngleToDyB; // parameterization for conversion track angle -> tracklet deflection float mAngleToDyC; // parameterization for conversion track angle -> tracklet deflection /// ---- end error parametrization ---- - bool mDebugOutput; // store debug output - static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker - float mMaxEta; // TPC tracks with higher eta are ignored - float mRoadZ; // in z, a constant search road is used - float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle - float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z - float mTPCTDriftOffset; // TPC drift time additive offset - GPUTRDTrackerDebug* mDebug; // debug output + bool mDebugOutput; // store debug output + static constexpr const float sRadialOffset = -0.1f; // due to (possible) mis-calibration of t0 -> will become obsolete when tracklet conversion is done outside of the tracker + float mMaxEta; // TPC tracks with higher eta are ignored + float mRoadZ; // in z, a constant search road is used + float mZCorrCoefNRC; // tracklet z-position depends linearly on track dip angle + float mTPCVdrift; // TPC drift velocity used for shifting TPC tracks along Z + float mTPCTDriftOffset; // TPC drift time additive offset + GPUTRDTrackerDebug* mDebug; // debug output }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h index fc874070ec9b8..cd7dfb9432b93 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackletWord.h @@ -82,7 +82,7 @@ namespace o2::gpu class GPUTRDTrackletWord : private o2::trd::Tracklet64 { public: - GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord){}; + GPUd() GPUTRDTrackletWord(uint64_t trackletWord = 0) : o2::trd::Tracklet64(trackletWord) {}; GPUdDefault() GPUTRDTrackletWord(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() GPUTRDTrackletWord& operator=(const GPUTRDTrackletWord& rhs) = default; GPUdDefault() ~GPUTRDTrackletWord() = default; diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h index ceb63e788564a..9087ec9a431f6 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.h @@ -138,12 +138,12 @@ class GPUDisplayFrontend : public GPUDisplayFrontendInterface std::unique_ptr mGUI; - void HandleKey(uint8_t key); // Callback for handling key presses - int32_t DrawGLScene(); // Callback to draw the GL scene - void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) + void HandleKey(uint8_t key); // Callback for handling key presses + int32_t DrawGLScene(); // Callback to draw the GL scene + void HandleSendKey(); // Optional callback to handle key press from external source (e.g. stdin by default) void ResizeScene(int32_t width, int32_t height); // Callback when GL window is resized int32_t InitDisplay(bool initFailure = false); // Callback to initialize the GL Display (to be called in StartDisplay) - void ExitDisplay(); // Callback to clean up the GL Display + void ExitDisplay(); // Callback to clean up the GL Display int32_t& drawTextFontSize(); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx index 8d48536e0a351..e511718e258f7 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWindows.cxx @@ -80,12 +80,12 @@ void KillGLWindow() // Properly Kill The Window BOOL CreateGLWindow(char* title, int32_t width, int32_t height, int32_t bits, bool fullscreenflag) { - GLuint PixelFormat; // Holds The Results After Searching For A Match - WNDCLASS wc; // Windows Class Structure - DWORD dwExStyle; // Window Extended Style - DWORD dwStyle; // Window Style - RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values - WindowRect.left = (int64_t)0; // Set Left Value To 0 + GLuint PixelFormat; // Holds The Results After Searching For A Match + WNDCLASS wc; // Windows Class Structure + DWORD dwExStyle; // Window Extended Style + DWORD dwStyle; // Window Style + RECT WindowRect; // Grabs Rectangle Upper Left / Lower Right Values + WindowRect.left = (int64_t)0; // Set Left Value To 0 WindowRect.right = (int64_t)width; // Set Right Value To Requested Width WindowRect.top = (int64_t)0; // Set Top Value To 0 WindowRect.bottom = (int64_t)height; // Set Bottom Value To Requested Height diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index 92da6bbac94e8..a7811c6fd55ed 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -91,7 +91,7 @@ class GPUTPCTrkLbl inline U computeLabel(float* labelWeight = nullptr, float* totalWeight = nullptr, int32_t* maxCount = nullptr) { if (mLabels.size() == 0) { - return U(); //default constructor creates NotSet label + return U(); // default constructor creates NotSet label } else { uint32_t bestLabelNum = 0, bestLabelCount = 0; for (uint32_t j = 0; j < mLabels.size(); j++) { @@ -133,10 +133,10 @@ struct GPUTPCTrkLbl_ret { template GPUTPCTrkLbl_ret(T){}; #ifdef GPUCA_TPC_GEOMETRY_O2 - GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()){}; + GPUTPCTrkLbl_ret(const MCCompLabel& a) : id(a.getTrackEventSourceID()) {}; #endif #ifdef GPUCA_STANDALONE - GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID){}; + GPUTPCTrkLbl_ret(const AliHLTTPCClusterMCWeight& a) : id(a.fMCID) {}; #endif void setFakeFlag() { diff --git a/GPU/GPUTracking/qa/genEvents.h b/GPU/GPUTracking/qa/genEvents.h index fb3c5f22d61ef..43b946e6238b2 100644 --- a/GPU/GPUTracking/qa/genEvents.h +++ b/GPU/GPUTracking/qa/genEvents.h @@ -31,7 +31,7 @@ class genEvents int32_t GenerateEvent(const GPUParam& sectorParam, char* filename) { return 1; } void FinishEventGenerator() {} - static void RunEventGenerator(GPUChainTracking* rec){}; + static void RunEventGenerator(GPUChainTracking* rec) {}; }; #else diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cd6267179c844..cdb41ec5813f2 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -32,8 +32,7 @@ namespace qConfig { #define QCONFIG_SETTING(name, type) \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ type v; \ constexpr qon_mxcat3(q, name, _t)(type s) : v(s) {} \ }; \ @@ -41,8 +40,7 @@ namespace qConfig #define QCONFIG_SETTING_TEMPLATE(name) \ template \ - struct qon_mxcat3(q, name, _t) \ - { \ + struct qon_mxcat3(q, name, _t) { \ T v; \ constexpr qon_mxcat3(q, name, _t)(const T& s) : v(s) {} \ }; \ @@ -68,7 +66,7 @@ static inline const char* getOptName(const char** argv, int32_t i) template struct qConfigSettings { - qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false){}; + qConfigSettings() : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(nullptr), allowEmpty(false) {}; template qConfigSettings(const qConfigSettings v) : checkMin(false), checkMax(false), doSet(false), doDefault(false), min(), max(), set(), message(v.message), allowEmpty(v.allowEmpty){}; bool checkMin, checkMax; diff --git a/GPU/GPUTracking/utils/threadserver.h b/GPU/GPUTracking/utils/threadserver.h index 606531f46f201..c8dfe831fd578 100644 --- a/GPU/GPUTracking/utils/threadserver.h +++ b/GPU/GPUTracking/utils/threadserver.h @@ -98,7 +98,7 @@ class qThreadCls qThreadParamCls& XthreadParam = *((qThreadParamCls*)&this->threadParam); XthreadParam.pCls = pCls; - XthreadParam.pFunc = (void (S::*)(void*))pFunc; + XthreadParam.pFunc = (void(S::*)(void*))pFunc; XthreadParam.threadNum = threadNum; XthreadParam.pinCPU = pinCPU; pthread_t thr; @@ -150,7 +150,7 @@ void* qThreadCls::qThreadWrapperCls(T* arg) sched_setaffinity(0, sizeof(tmp_mask), &tmp_mask); } - void (S::*pFunc)(T*) = (void (S::*)(T*))arg_A->pFunc; + void (S::*pFunc)(T*) = (void(S::*)(T*))arg_A->pFunc; (arg_A->pCls->*pFunc)(arg); arg_A->threadMutex[1].Unlock(); diff --git a/GPU/GPUbenchmark/cuda/Kernels.cu b/GPU/GPUbenchmark/cuda/Kernels.cu index 75799e4aa8c96..c309e7b2dbc5d 100644 --- a/GPU/GPUbenchmark/cuda/Kernels.cu +++ b/GPU/GPUbenchmark/cuda/Kernels.cu @@ -666,9 +666,9 @@ void GPUbenchmark::runTest(Test test, Mode mode, KernelConfig config) } nThreads *= mOptions.threadPoolFraction; - void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value - void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value - void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value + void (*kernel)(chunk_t*, size_t) = &gpu::read_k; // Initialising to a default value + void (*kernel_distributed)(chunk_t**, size_t*) = &gpu::read_dist_k; // Initialising to a default value + void (*kernel_rand)(chunk_t*, size_t, int32_t) = &gpu::rand_read_k; // Initialising to a default value void (*kernel_rand_distributed)(chunk_t**, size_t*, int32_t) = &gpu::rand_read_dist_k; // Initialising to a default value bool is_random{false}; diff --git a/GPU/TPCFastTransformation/BandMatrixSolver.h b/GPU/TPCFastTransformation/BandMatrixSolver.h index f11f538e49275..7de44fe4b85e2 100644 --- a/GPU/TPCFastTransformation/BandMatrixSolver.h +++ b/GPU/TPCFastTransformation/BandMatrixSolver.h @@ -131,7 +131,7 @@ inline void BandMatrixSolver::triangulateBlock(double AA[], double b A[0] = c; // store 1/a[0][0] double* rowi = A + BandWidthT - 1; for (int32_t i = 1; i < m; i++) { // row 0+i - double ai = c * A[i]; // A[0][i] + double ai = c * A[i]; // A[0][i] for (int32_t j = i; j < m; j++) { rowi[j] -= ai * A[j]; // A[i][j] -= A[0][j]/A[0][0]*A[i][0] } diff --git a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx index d709e5b9af92d..3edd8f8f22e55 100644 --- a/GPU/TPCFastTransformation/ChebyshevFit1D.cxx +++ b/GPU/TPCFastTransformation/ChebyshevFit1D.cxx @@ -71,7 +71,7 @@ void ChebyshevFit1D::fit() mA[i * mN + j] = mA[j * mN + i]; } } - //print(); + // print(); { double* Ai = mA.data(); for (int32_t i = 0; i < mN; i++, Ai += mN) { @@ -88,7 +88,7 @@ void ChebyshevFit1D::fit() } mB[j] -= c * mB[i]; } - //print(); + // print(); } } { diff --git a/GPU/TPCFastTransformation/CorrectionMapsHelper.h b/GPU/TPCFastTransformation/CorrectionMapsHelper.h index 32ff6e1f06b10..46070b36e63b2 100644 --- a/GPU/TPCFastTransformation/CorrectionMapsHelper.h +++ b/GPU/TPCFastTransformation/CorrectionMapsHelper.h @@ -165,25 +165,25 @@ class CorrectionMapsHelper MapRefBit = 0x2, LumiBit = 0x4, MapMShapeBit = 0x10 }; - bool mOwner = false; // is content of pointers owned by the helper + bool mOwner = false; // is content of pointers owned by the helper bool mLumiCTPAvailable = false; // is CTP Lumi available // these 2 are global options, must be set by the workflow global options int32_t mLumiScaleType = -1; // use CTP Lumi (1) or TPCScaler (2) for the correction scaling, 0 - no scaling int32_t mLumiScaleMode = -1; // scaling-mode of the correciton maps int32_t mUpdatedFlags = 0; - float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) - float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling - float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling - float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference - float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi - float mMeanLumiOverride = -1.f; // optional value to override mean lumi - float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi - float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP - bool mEnableMShape = false; ///< use v shape correction - bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again - o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform - o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform - o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side + float mInstLumiCTP = 0.; // instanteneous luminosity from CTP (a.u) + float mInstLumi = 0.; // instanteneous luminosity (a.u) used for TPC corrections scaling + float mMeanLumi = 0.; // mean luminosity of the map (a.u) used for TPC corrections scaling + float mMeanLumiRef = 0.; // mean luminosity of the ref map (a.u) used for TPC corrections scaling reference + float mLumiScale = 0.; // precalculated mInstLumi/mMeanLumi + float mMeanLumiOverride = -1.f; // optional value to override mean lumi + float mMeanLumiRefOverride = -1.f; // optional value to override ref mean lumi + float mInstCTPLumiOverride = -1.f; // optional value to override inst lumi from CTP + bool mEnableMShape = false; ///< use v shape correction + bool mScaleInverse{false}; // if set to false the inverse correction is already scaled and will not scaled again + o2::gpu::TPCFastTransform* mCorrMap{nullptr}; // current transform + o2::gpu::TPCFastTransform* mCorrMapRef{nullptr}; // reference transform + o2::gpu::TPCFastTransform* mCorrMapMShape{nullptr}; // correction map for v-shape distortions on A-side ClassDefNV(CorrectionMapsHelper, 6); }; diff --git a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h index 506cd39b519af..e750bffd28f4b 100644 --- a/GPU/TPCFastTransformation/NDPiecewisePolynomials.h +++ b/GPU/TPCFastTransformation/NDPiecewisePolynomials.h @@ -48,13 +48,13 @@ struct NDPiecewisePolynomialContainer { /// for ROOT I/O NDPiecewisePolynomialContainer() = default; - const uint32_t mDim{}; ///< number of dimensions of the polynomial - const uint32_t mDegree{}; ///< degree of the polynomials - const std::vector mParams{}; ///< parameters of the polynomial - const bool mInteractionOnly{}; ///< consider only interaction terms - const std::vector mMin{}; ///< min vertices positions of the grid - const std::vector mMax{}; ///< max vertices positions of the grid - const std::vector mN{}; ///< number of vertices for each dimension + const uint32_t mDim{}; ///< number of dimensions of the polynomial + const uint32_t mDegree{}; ///< degree of the polynomials + const std::vector mParams{}; ///< parameters of the polynomial + const bool mInteractionOnly{}; ///< consider only interaction terms + const std::vector mMin{}; ///< min vertices positions of the grid + const std::vector mMax{}; ///< max vertices positions of the grid + const std::vector mN{}; ///< number of vertices for each dimension }; #endif diff --git a/GPU/TPCFastTransformation/Spline1DHelper.cxx b/GPU/TPCFastTransformation/Spline1DHelper.cxx index 938604bb9172d..9177c67d8b87b 100644 --- a/GPU/TPCFastTransformation/Spline1DHelper.cxx +++ b/GPU/TPCFastTransformation/Spline1DHelper.cxx @@ -606,7 +606,7 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) Spline1D spline2(spline1); spline1.approximateFunction(0., TMath::Pi(), F, nAuxiliaryPoints); - //if (itry == 0) + // if (itry == 0) { TFile outf("testSpline1D.root", "recreate"); if (outf.IsZombie()) { @@ -731,9 +731,9 @@ int32_t Spline1DHelper::test(const bool draw, const bool drawDataPoints) } } // draw } - //delete canv; - //delete nt; - //delete knots; + // delete canv; + // delete nt; + // delete knots; statDf1 = sqrt(statDf1 / statN); statDf2 = sqrt(statDf2 / statN); diff --git a/GPU/TPCFastTransformation/Spline1DHelperOld.h b/GPU/TPCFastTransformation/Spline1DHelperOld.h index fc8d33ad64f87..971541e03258e 100644 --- a/GPU/TPCFastTransformation/Spline1DHelperOld.h +++ b/GPU/TPCFastTransformation/Spline1DHelperOld.h @@ -41,13 +41,13 @@ class Spline1DHelperOld /// \brief Helper structure for 1D spline construction /// struct DataPoint { - double u; ///< u coordinate - double cS0; ///< a coefficient for s0 - double cZ0; ///< a coefficient for s'0 - double cS1; ///< a coefficient for s1 - double cZ1; ///< a coefficient for s'1 + double u; ///< u coordinate + double cS0; ///< a coefficient for s0 + double cZ0; ///< a coefficient for s'0 + double cS1; ///< a coefficient for s1 + double cZ1; ///< a coefficient for s'1 int32_t iKnot; ///< index of the left knot of the segment - bool isKnot; ///< is the point placed at a knot + bool isKnot; ///< is the point placed at a knot }; /// _____________ Constructors / destructors __________________________ @@ -160,11 +160,11 @@ class Spline1DHelperOld /// helpers for the construction of 1D spline - Spline1D mSpline; ///< copy of the spline - int32_t mFdimensions; ///< n of F dimensions - std::vector mDataPoints; ///< measurement points + Spline1D mSpline; ///< copy of the spline + int32_t mFdimensions; ///< n of F dimensions + std::vector mDataPoints; ///< measurement points std::vector mKnotDataPoints; ///< which measurement points are at knots - std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method + std::vector mLSMmatrixFull; ///< a matrix to convert the measurements into the spline parameters with the LSM method std::vector mLSMmatrixSderivatives; std::vector mLSMmatrixSvalues; diff --git a/GPU/TPCFastTransformation/Spline1DSpec.cxx b/GPU/TPCFastTransformation/Spline1DSpec.cxx index 603013d5e0808..0d33cdc88010a 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline1DSpec.cxx @@ -144,7 +144,7 @@ void Spline1DContainer::recreate(int32_t nYdim, int32_t numberOfKnots, co } } -#endif //GPUCA_GPUCODE +#endif // GPUCA_GPUCODE template void Spline1DContainer::print() const diff --git a/GPU/TPCFastTransformation/Spline1DSpec.h b/GPU/TPCFastTransformation/Spline1DSpec.h index 1ed1cc322ede3..6462f291d1136 100644 --- a/GPU/TPCFastTransformation/Spline1DSpec.h +++ b/GPU/TPCFastTransformation/Spline1DSpec.h @@ -211,13 +211,13 @@ class Spline1DContainer : public FlatObject /// _____________ Data members ____________ - int32_t mYdim = 0; ///< dimentionality of F - int32_t mNumberOfKnots = 0; ///< n knots on the grid - int32_t mUmax = 0; ///< U of the last knot - DataT mXmin = 0; ///< X of the first knot - DataT mXtoUscale = 0; ///< a scaling factor to convert X to U + int32_t mYdim = 0; ///< dimentionality of F + int32_t mNumberOfKnots = 0; ///< n knots on the grid + int32_t mUmax = 0; ///< U of the last knot + DataT mXmin = 0; ///< X of the first knot + DataT mXtoUscale = 0; ///< a scaling factor to convert X to U int32_t* mUtoKnotMap = nullptr; //! (transient!!) pointer to (integer U -> knot index) map inside the mFlatBufferPtr array - DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array + DataT* mParameters = nullptr; //! (transient!!) pointer to F-dependent parameters inside the mFlatBufferPtr array ClassDefNV(Spline1DContainer, 1); }; diff --git a/GPU/TPCFastTransformation/Spline2DSpec.cxx b/GPU/TPCFastTransformation/Spline2DSpec.cxx index 4571110bdedaa..055530b9314c2 100644 --- a/GPU/TPCFastTransformation/Spline2DSpec.cxx +++ b/GPU/TPCFastTransformation/Spline2DSpec.cxx @@ -66,7 +66,7 @@ void Spline2DContainer::setActualBufferAddress(char* actualFlatBufferPtr) mParameters = nullptr; parametersOffset = alignSize(u2Offset + mGridX2.getFlatBufferSize(), getParameterAlignmentBytes()); - //bufferSize = parametersOffset + getSizeOfParameters(); + // bufferSize = parametersOffset + getSizeOfParameters(); mParameters = reinterpret_cast(mFlatBufferPtr + parametersOffset); mGridX1.setActualBufferAddress(mFlatBufferPtr); diff --git a/GPU/TPCFastTransformation/SplineHelper.cxx b/GPU/TPCFastTransformation/SplineHelper.cxx index b0d1f4348ca60..6e1b53510e0d0 100644 --- a/GPU/TPCFastTransformation/SplineHelper.cxx +++ b/GPU/TPCFastTransformation/SplineHelper.cxx @@ -64,8 +64,8 @@ int32_t SplineHelper::pointstoarray(const int32_t indices[], const int32_ } //////////////// -//arraytopoints -// HILFSFUNKTION +// arraytopoints +// HILFSFUNKTION template int32_t SplineHelper::arraytopoints(int32_t point, int32_t result[], const int32_t numbers[], int32_t dim) { @@ -133,8 +133,8 @@ void SplineHelper::approximateFunction( } // end for all DataPoints d // END MY VERSION - //std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); - //DUMYY VERSION Commented out + // std::vector dataPointF(getNumberOfDataPoints() * mFdimensions); + // DUMYY VERSION Commented out /* for (int32_t i = 0; i < getNumberOfDataPoints() * mFdimensions; i++) { dataPointF[i] = 1.; } */ @@ -250,11 +250,11 @@ void SplineHelper::approximateFunction( // TO BE REMOVED TEST: // LOG(info) << "number of paramtertypes per knot : " << numberOfParameterTypes << ", "; - std::unique_ptr allParameters[numberOfParameterTypes]; //Array for the different parametertypes s, s'u, s'v, s''uv,... + std::unique_ptr allParameters[numberOfParameterTypes]; // Array for the different parametertypes s, s'u, s'v, s''uv,... for (int32_t i = 0; i < numberOfParameterTypes; i++) { - allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); //To-Do:Fdim!! + allParameters[i] = std::unique_ptr(new double[numberOfAllDataPoints * mFdimensions]); // To-Do:Fdim!! } - //filling allParameters[0] and FParameters with s: + // filling allParameters[0] and FParameters with s: for (int32_t i = 0; i < numberOfAllDataPoints; i++) { for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions allParameters[0][i * mFdimensions + f] = DataPointF[i * mFdimensions + f]; // TO DO - Just get the pointer adress there PLEASE! @@ -273,24 +273,24 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions // WORKAROUND Getting Knotindices: knotindices[j] = p0indices[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(p0indices[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { // for all f-dimensions get function values into Fparameters - Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + f] = DataPointF[i * mFdimensions + f]; /// write derivatives in FParameters } } // end if isKnot } // end i (filling DataPointF Values into allParameters[0] and FParameters) // now: allParameters[0] = dataPointF; - //Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); + // Array for input DataPointF-values for Spline1D::approximateFunctionGradually(...); std::unique_ptr dataPointF1D[mXdimensions]; for (int32_t i = 0; i < mXdimensions; i++) { dataPointF1D[i] = std::unique_ptr(new double[numberOfDataPoints[i] * mFdimensions]); // To-Do:Fdim!! For s and derivetives at all knots. } - //Array to be filled by Spline1D::approximateFunctionGradually(...); + // Array to be filled by Spline1D::approximateFunctionGradually(...); std::unique_ptr par[mXdimensions]; std::unique_ptr parD[mXdimensions]; @@ -301,7 +301,7 @@ void SplineHelper::approximateFunction( // LOG(info) << "NumberOfParameters: " << mNumberOfParameters ; - //STARTING MAIN-LOOP, for all Parametertypes: + // STARTING MAIN-LOOP, for all Parametertypes: for (int32_t p = 1; p < numberOfParameterTypes; p++) { // p = 1!! Wir kriegen s (p0) durch approximateFunction()oben int32_t dimension = 0; // find the dimension for approximation for (int32_t i = (int32_t)(log2f((float)p)); i >= 0; i--) { @@ -366,9 +366,9 @@ void SplineHelper::approximateFunction( for (int32_t i = 0; i < mXdimensions; i++) { redistributionindex[i] = startpoint[i]; } - //redistributing the derivatives at dimension-Knots into array p + // redistributing the derivatives at dimension-Knots into array p for (int32_t i = 0; i < numberOfKnots[dimension]; i++) { // for all dimension-Knots - redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); //find the indices + redistributionindex[dimension] = mHelpers[dimension].getKnotDataPoint(i); // find the indices int32_t finalposition = pointstoarray(redistributionindex, numberOfDataPoints, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { @@ -380,7 +380,7 @@ void SplineHelper::approximateFunction( if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; break; - } //noch mal checken!! Das muss noch anders!! + } // noch mal checken!! Das muss noch anders!! } if (isKnot) { // for all knots @@ -388,20 +388,20 @@ void SplineHelper::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate Knotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } // get the knotindexvalue for FParameters: int32_t knotind = pointstoarray(knotindices, numberOfKnots, mXdimensions); for (int32_t f = 0; f < mFdimensions; f++) { - Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; ///write derivatives in FParameters + Fparameters[knotind * numberOfParameterTypes * mFdimensions + p * mFdimensions + f] = par[dimension][2 * i * mFdimensions + mFdimensions + f]; /// write derivatives in FParameters } } } // end for all fknots (for redistribution) // recalculation: for (int32_t i = 0; i < numberOfDataPoints[dimension]; i++) { // this is somehow still redundant// TO DO: ONLY PART OF approximateFunction WHERE NDIM is considerd!! - redistributionindex[dimension] = i; // getting current datapointindices - bool isKnot = 1; // check is current datapoint a knot? + redistributionindex[dimension] = i; // getting current datapointindices + bool isKnot = 1; // check is current datapoint a knot? for (int32_t j = 0; j < mXdimensions; j++) { if (!mHelpers[j].getDataPoint(redistributionindex[j]).isKnot) { isKnot = 0; @@ -410,7 +410,7 @@ void SplineHelper::approximateFunction( } double splineF[mFdimensions]; double u = mHelpers[dimension].getDataPoint(i).u; - mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); //recalculate at all datapoints of dimension + mHelpers[dimension].getSpline().interpolateU(mFdimensions, parD[dimension].get(), u, splineF); // recalculate at all datapoints of dimension for (int32_t dim = 0; dim < mFdimensions; dim++) { // writing it in allParameters // LOG(info)<::approximateFunction( for (int32_t j = 0; j < mXdimensions; j++) { // calculate KNotindices for all dimensions knotindices[j] = redistributionindex[j] / ((numberOfDataPoints[j] - 1) / (numberOfKnots[j] - 1)); - //knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück + // knotindices[j] = mHelpers[j].getDataPoint(redistributionindex[j]).iKnot; //in der Annahme der wert ist ein Knotenindex und falls der datapoint ein knoten ist, gibt er seinen eigenen knotenindex zurück } int32_t currentknotarrayindex = pointstoarray(knotindices, numberOfKnots, mXdimensions); // getting the recalculated value into FParameters: @@ -433,7 +433,7 @@ void SplineHelper::approximateFunction( } // end recalculation } // end of all1DSplines } // end of for parametertypes -} //end of approxymateFunction MYVERSION! +} // end of approxymateFunction MYVERSION! template int32_t SplineHelper::test(const bool draw, const bool drawDataPoints) diff --git a/GPU/TPCFastTransformation/SplineHelper.h b/GPU/TPCFastTransformation/SplineHelper.h index 986297e368aab..8c99e8113864f 100644 --- a/GPU/TPCFastTransformation/SplineHelper.h +++ b/GPU/TPCFastTransformation/SplineHelper.h @@ -106,9 +106,9 @@ class SplineHelper /// Stores an error message int32_t storeError(Int_t code, const char* msg); - TString mError = ""; ///< error string - int32_t mXdimensions; ///< number of X dimensions - int32_t mFdimensions; ///< number of F dimensions + TString mError = ""; ///< error string + int32_t mXdimensions; ///< number of X dimensions + int32_t mFdimensions; ///< number of F dimensions int32_t mNumberOfParameters; ///< number of parameters int32_t mNumberOfDataPoints; ///< number of data points std::vector> mHelpers; diff --git a/GPU/TPCFastTransformation/SplineSpec.h b/GPU/TPCFastTransformation/SplineSpec.h index dae17b22f42ea..1af427dee503b 100644 --- a/GPU/TPCFastTransformation/SplineSpec.h +++ b/GPU/TPCFastTransformation/SplineSpec.h @@ -299,7 +299,7 @@ class SplineSpec : public SplineContainer DataT iParameters[(1 << (2 * maxXdim)) * maxYdim]; // Array for all parameters - //get the indices of the "most left" Knot: + // get the indices of the "most left" Knot: int32_t indices[maxXdim]; // indices of the 'most left' knot for (int32_t i = 0; i < nXdim; i++) { @@ -309,7 +309,7 @@ class SplineSpec : public SplineContainer int32_t indicestmp[maxXdim]; for (int32_t i = 0; i < nKnotParametersPerY; i++) { // for every necessary Knot for (int32_t k = 0; k < nXdim; k++) { - indicestmp[k] = indices[k] + (i / (1 << k)) % 2; //get the knot-indices in every dimension (mirrored order binary counting) + indicestmp[k] = indices[k] + (i / (1 << k)) % 2; // get the knot-indices in every dimension (mirrored order binary counting) } int32_t index = TBase::getKnotIndex(indicestmp); // get index of the current Knot @@ -317,7 +317,7 @@ class SplineSpec : public SplineContainer iParameters[i * nKnotParameters + j] = Parameters[index * nKnotParameters + j]; } } - //now start with the interpolation loop: + // now start with the interpolation loop: constexpr auto maxInterpolations = (1 << (2 * maxXdim - 2)) * maxYdim; @@ -329,10 +329,10 @@ class SplineSpec : public SplineContainer int32_t nInterpolations = (1 << (2 * nXdim - 2)) * nYdim; int32_t nKnots = 1 << (nXdim); - for (int32_t d = 0; d < nXdim; d++) { // for every dimension - DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart - for (int32_t i = 0; i < nKnots; i++) { // for every knot - for (int32_t j = 0; j < nKnots; j++) { // for every parametertype + for (int32_t d = 0; d < nXdim; d++) { // for every dimension + DataT* pointer[4] = {S0, D0, S1, D1}; // pointers for interpolation arrays S0, D0, S1, D1 point to Arraystart + for (int32_t i = 0; i < nKnots; i++) { // for every knot + for (int32_t j = 0; j < nKnots; j++) { // for every parametertype int32_t pointernr = 2 * (i % 2) + (j % 2); // to which array should it be delivered for (int32_t k = 0; k < nYdim; k++) { pointer[pointernr][0] = iParameters[(i * nKnots + j) * nYdim + k]; diff --git a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h index c353f3f3329e7..9589ecbfc1fc4 100644 --- a/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h +++ b/GPU/TPCFastTransformation/TPCFastSpaceChargeCorrection.h @@ -43,7 +43,7 @@ class TPCFastSpaceChargeCorrection : public FlatObject /// \brief The struct contains necessary info for TPC padrow /// struct RowInfo { - int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) + int32_t splineScenarioID{0}; ///< scenario index (which of Spline2D splines to use) size_t dataOffsetBytes[3]{0}; ///< offset for the spline data withing a TPC slice ClassDefNV(RowInfo, 1); }; diff --git a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx index 27500d12d9d5d..7eea34c19ec25 100644 --- a/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx +++ b/GPU/TPCFastTransformation/devtools/IrregularSpline2D3DCalibrator.cxx @@ -428,7 +428,7 @@ double IrregularSpline2D3DCalibrator::getIntegralDeviationLine(const IrregularSp double d2 = dx * dx + dy * dy + dz * dz; sum += sqrt(d2 / 3.); } - //sum = sqrt(sum/3.); + // sum = sqrt(sum/3.); return sum; } diff --git a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h index 2398ff4cd1cbc..885b2e12eea0b 100644 --- a/GPU/TPCFastTransformation/devtools/RegularSpline1D.h +++ b/GPU/TPCFastTransformation/devtools/RegularSpline1D.h @@ -97,11 +97,11 @@ inline T RegularSpline1D::getSpline(const int32_t iknot1, T f0, T f1, T f2, T f3 /// The polynom is constructed with function values f0,f1,f2,f3 at knots {iknot0,iknot1,iknot2,iknot3} /// The u value supposed to be inside the [knot1,knot2] region, but also may be any. - ///f0 = f value at iknot1-1 - ///f1 = f value at iknot1 - ///f2 = f value at iknot1+1 - ///f3 = f value at iknot1+2 - ///u = u value where f(u) is searched for. + /// f0 = f value at iknot1-1 + /// f1 = f value at iknot1 + /// f2 = f value at iknot1+1 + /// f3 = f value at iknot1+2 + /// u = u value where f(u) is searched for. f0 -= f1; f2 -= f1; @@ -153,7 +153,7 @@ inline double RegularSpline1D::knotIndexToU(int32_t iknot) const inline int32_t RegularSpline1D::getKnotIndex(float u) const { - //index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) + // index is just u elem [0, 1] * numberOfKnots and then floored. (so the "left" coordinate beside u gets chosen) int32_t index = (int32_t)(u * (mNumberOfKnots - 1)); if (index <= 1) { index = 1; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx index 076e4ee0ed780..c030bae650414 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.cxx @@ -110,7 +110,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int FlatObject::startConstruction(); - //construct regular grid for v + // construct regular grid for v mGridV.construct(numberOfRows); // For each x element numbersOfKnots may be a single RegularSpline1D with x knots. @@ -128,7 +128,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this is the space which is taken just by the RegularSpline1D's mDataIndexMapOffset = numberOfRows * sizeof(RegularSpline1D); - //The buffer size is the size of the array + // The buffer size is the size of the array FlatObject::finishConstruction(mDataIndexMapOffset + numberOfRows * sizeof(int32_t)); // Array for the 1D-Splines inside the buffer @@ -146,7 +146,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int numberOfKnots += knotsU; } - //save the numberOfRows and numberOfKnots + // save the numberOfRows and numberOfKnots mNumberOfRows = numberOfRows; mNumberOfKnots = numberOfKnots; @@ -156,7 +156,7 @@ void SemiregularSpline2D3D::construct(const int32_t numberOfRowsInput, const int // this will count the amount of u-knots "under" a v-coordinate int32_t uSum = 0; - //count the amount of knots which are in gridU's lower than i + // count the amount of knots which are in gridU's lower than i for (int32_t dv = 0; dv < mNumberOfRows; dv++) { dataIndexMap[dv] = uSum; uSum += numbersOfKnots[dv]; diff --git a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h index 4da954c8096ac..954738fa74f1b 100644 --- a/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h +++ b/GPU/TPCFastTransformation/devtools/SemiregularSpline2D3D.h @@ -122,7 +122,7 @@ class SemiregularSpline2D3D : public FlatObject const RegularSpline1D& getGridV() const { return mGridV; } /// Get 1-D grid for V coordinate - //const RegularSpline1D& getGridV() const { return mGridV; } + // const RegularSpline1D& getGridV() const { return mGridV; } const RegularSpline1D& getGridU(const int32_t i) const { return getSplineArray()[i]; } /// Get u,v of i-th knot @@ -131,7 +131,7 @@ class SemiregularSpline2D3D : public FlatObject /// Get size of the mFlatBuffer data size_t getFlatBufferSize() const { return mFlatBufferSize; } - ///Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space + /// Gets the knot index which is the i-th knot in v-space and the j-th knot in u-space int32_t getDataIndex(int32_t i, int32_t j) const; int32_t getDataIndex0(int32_t i, int32_t j) const; @@ -212,16 +212,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) // the searched u-v-coordinates have to be in this spline. if (iKnot <= nk - 1) { - //in that case v is the current index + // in that case v is the current index v = mGridV.knotIndexToU(i); - //and u the coordinate of the given index + // and u the coordinate of the given index u = gridU.knotIndexToU(iKnot); break; } - //if iKnot is greater than number of knots the searched u-v cannot be in the current gridU - //so we search for nk less indizes and continue with the next v-coordinate + // if iKnot is greater than number of knots the searched u-v cannot be in the current gridU + // so we search for nk less indizes and continue with the next v-coordinate iKnot -= nk; } } @@ -229,16 +229,16 @@ inline void SemiregularSpline2D3D::getKnotUV(int32_t iKnot, float& u, float& v) template inline void SemiregularSpline2D3D::correctEdges(T* data) const { - //Regular v-Grid (vertical) + // Regular v-Grid (vertical) const RegularSpline1D& gridV = getGridV(); int32_t nv = mNumberOfRows; - //EIGENTLICH V VOR U!!! - //Wegen Splines aber U vor V + // EIGENTLICH V VOR U!!! + // Wegen Splines aber U vor V { // ==== left edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { T* f0 = data + getDataIndex(0, iv); T* f1 = f0 + 3; @@ -251,7 +251,7 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const } { // ==== right edge of U ==== - //loop through all gridUs + // loop through all gridUs for (int32_t iv = 1; iv < mNumberOfRows - 1; iv++) { const RegularSpline1D& gridU = getGridU(iv); int32_t nu = gridU.getNumberOfKnots(); @@ -270,8 +270,8 @@ inline void SemiregularSpline2D3D::correctEdges(T* data) const int32_t nu = gridU.getNumberOfKnots(); for (int32_t iu = 0; iu < nu; iu++) { - //f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. - //Since there are no knots because of the irregularity you can get this by using the getSplineMethod. + // f0 to f3 are the x,y,z values of 4 points in the grid along the v axis. + // Since there are no knots because of the irregularity you can get this by using the getSplineMethod. T* f0 = data + getDataIndex(iu, 0); float u = gridU.knotIndexToU(iu); @@ -387,7 +387,7 @@ inline void SemiregularSpline2D3D::getSpline(const T* correctedData, float u, fl dataVx[vxIndex + 2] = gridU.getSpline(ui, correctedData[dataOffset + 2], correctedData[dataOffset + 5], correctedData[dataOffset + 8], correctedData[dataOffset + 11], u); } - //return results + // return results x = mGridV.getSpline(iknotv, dataVx[0], dataVx[3], dataVx[6], dataVx[9], v); y = mGridV.getSpline(iknotv, dataVx[1], dataVx[4], dataVx[7], dataVx[10], v); z = mGridV.getSpline(iknotv, dataVx[2], dataVx[5], dataVx[8], dataVx[11], v); @@ -426,7 +426,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa */ - //workaround 1: + // workaround 1: int32_t vGridi = mGridV.getKnotIndex(v); float dataU[12]; @@ -464,7 +464,7 @@ inline void SemiregularSpline2D3D::getSplineVec(const float* correctedData, floa y = res[1]; z = res[2]; -//getSpline( correctedData, u, v, x, y, z ); +// getSpline( correctedData, u, v, x, y, z ); #else getSpline(correctedData, u, v, x, y, z); #endif diff --git a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx index a9c39e8528354..f77a55ffcc894 100644 --- a/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx +++ b/GPU/TPCFastTransformation/test/testMultivarPolynomials.cxx @@ -95,7 +95,7 @@ BOOST_AUTO_TEST_CASE(Polynomials5D_InteractionOnly) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class + const float abstolerance = 0.0001f; // abosulte difference between refernce to polynomial class const bool interactionOnly = true; MultivariatePolynomial polCT; // compile time polynomial @@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(Piecewise_polynomials) const int32_t nPar5D5DegInteraction = 32; // number of parameters const int32_t nDim = 5; // dimensions const int32_t nDegree = 5; // degree - const bool interactionOnly = true; // consider only interaction terms + const bool interactionOnly = true; // consider only interaction terms // reference polynomial which will be approximated by the NDPiecewisePolynomials MultivariatePolynomial polCT; diff --git a/GPU/Utils/FlatObject.h b/GPU/Utils/FlatObject.h index eba81a2ba06a2..8e13a8dedb868 100644 --- a/GPU/Utils/FlatObject.h +++ b/GPU/Utils/FlatObject.h @@ -28,7 +28,7 @@ #include "GPUCommonRtypes.h" #include "GPUCommonLogger.h" -//#define GPUCA_GPUCODE // uncomment to test "GPU" mode +// #define GPUCA_GPUCODE // uncomment to test "GPU" mode namespace o2 { @@ -319,10 +319,10 @@ class FlatObject InProgress = 0x2 ///< construction started: temporary memory is reserved }; - int32_t mFlatBufferSize = 0; ///< size of the flat buffer - uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class - char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer - char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer + int32_t mFlatBufferSize = 0; ///< size of the flat buffer + uint32_t mConstructionMask = ConstructionState::NotConstructed; ///< mask for constructed object members, first two bytes are used by this class + char* mFlatBufferContainer = nullptr; //[mFlatBufferSize] Optional container for the flat buffer + char* mFlatBufferPtr = nullptr; //! Pointer to the flat buffer ClassDefNV(FlatObject, 1); }; @@ -569,7 +569,7 @@ inline void FlatObject::setFutureBufferAddress(char* futureFlatBufferPtr) mFlatBufferContainer = nullptr; } -#endif //GPUCA_GPUCODE_DEVICE +#endif // GPUCA_GPUCODE_DEVICE } // namespace gpu } // namespace o2 diff --git a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx index 52c3421fa8eb5..a9c9b78e9847e 100644 --- a/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx +++ b/GPU/Workflow/helper/src/GPUWorkflowHelper.cxx @@ -51,7 +51,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsClusterMC = ITSClsLabels; } } - //LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; + // LOG(info) << "Got " << ioPtr.nItsClusters << " ITS Clusters"; } if (maskTrk[GID::ITS] && ioPtr.nItsTracks == 0) { const auto& ITSTracksArray = recoCont.getITSTracks(); @@ -68,7 +68,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.itsTrackMC = ITSTrkLabels.data(); } } - //LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; + // LOG(info) << "Got " << ioPtr.nItsTracks << " ITS Tracks"; } if (maskTrk[GID::ITSTPC] && ioPtr.nTracksTPCITSO2 == 0) { @@ -77,7 +77,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTracksTPCITSO2 = trkITSTPC.size(); ioPtr.tracksTPCITSO2 = trkITSTPC.data(); } - //LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nTracksTPCITSO2 << " ITS-TPC Tracks"; } if (maskCl[GID::TOF] && ioPtr.nTOFClusters == 0) { @@ -86,7 +86,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTOFClusters = tofClusters.size(); ioPtr.tofClusters = tofClusters.data(); } - //LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; + // LOG(info) << "Got " << ioPtr.nTOFClusters << " TOF Clusters"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTOF]) && ioPtr.nITSTPCTOFMatches == 0) { @@ -95,7 +95,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTOFMatches = itstpctofMatches.size(); ioPtr.itstpctofMatches = itstpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTOFMatches << " ITS-TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::ITSTPCTRDTOF]) && ioPtr.nITSTPCTRDTOFMatches == 0) { @@ -104,7 +104,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nITSTPCTRDTOFMatches = itstpctrdtofMatches.size(); ioPtr.itstpctrdtofMatches = itstpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nITSTPCTRDTOFMatches << " ITS-TPC-TRD-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTOF]) && ioPtr.nTPCTOFMatches == 0) { @@ -113,7 +113,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTOFMatches = tpctofMatches.size(); ioPtr.tpctofMatches = tpctofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if ((maskMatch[GID::TOF] || maskMatch[GID::TPCTRDTOF]) && ioPtr.nTPCTRDTOFMatches == 0) { @@ -122,12 +122,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTPCTRDTOFMatches = tpctrdtofMatches.size(); ioPtr.tpctrdtofMatches = tpctrdtofMatches.data(); } - //LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; + // LOG(info) << "Got " << ioPtr.nTPCTOFMatches << " TPC-TOF Matches"; } if (maskCl[GID::TRD]) { recoCont.inputsTRD->fillGPUIOPtr(&ioPtr); - //LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; + // LOG(info) << "Got " << ioPtr.nTRDTracklets << " TRD Tracklets"; } if (maskTrk[GID::ITSTPCTRD] && ioPtr.nTRDTracksITSTPCTRD == 0) { @@ -136,7 +136,7 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksITSTPCTRD = trdTracks.size(); ioPtr.trdTracksITSTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksITSTPCTRD << " ITS-TPC-TRD Tracks"; } if (maskTrk[GID::TPCTRD] && ioPtr.nTRDTracksTPCTRD == 0) { @@ -145,12 +145,12 @@ std::shared_ptr GPUWorkflowHelper::fi ioPtr.nTRDTracksTPCTRD = trdTracks.size(); ioPtr.trdTracksTPCTRD = trdTracks.data(); } - //LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; + // LOG(info) << "Got " << ioPtr.nTRDTracksTPCTRD << " TPC-TRD Tracks"; } if (maskCl[GID::TPC] && ioPtr.clustersNative == nullptr) { ioPtr.clustersNative = &recoCont.getTPCClusters(); - //LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; + // LOG(info) << "Got " << ioPtr.clustersNative->nClustersTotal << " TPC Clusters"; } if (maskTrk[GID::TPC] && ioPtr.nOutputTracksTPCO2 == 0) { @@ -176,7 +176,7 @@ std::shared_ptr GPUWorkflowHelper::fi retVal->tpcLinkTRD.resize(ioPtr.nOutputTracksTPCO2, -1); ioPtr.tpcLinkTRD = retVal->tpcLinkTRD.data(); } - //LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; + // LOG(info) << "Got " << ioPtr.nOutputTracksTPCO2 << " TPC Tracks"; } auto creator = [maskTrk, &ioPtr, &recoCont, &retVal](auto& trk, GID gid, float time, float) { From 1838f8fa4746d146815b9e3855e3ebcc69802ab6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 14 Mar 2025 22:34:57 +0100 Subject: [PATCH 0227/1914] GPU dpl-worflow: Remove HSA_NO_SCRATCH_RECLAIM=1 optimization for MI50, with latest ROCm actually decreases performance... --- prodtests/full-system-test/dpl-workflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 76235d127037a..5259bbf951d73 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -259,7 +259,6 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ ${EPN_NODE_MI100:-} != "1" ]] && export HSA_NO_SCRATCH_RECLAIM=1 [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else From 07cdaf8b104c494953904574f06fd3a12bcac879 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 09:09:14 +0100 Subject: [PATCH 0228/1914] GPU HIP: #define no longer needed, rocthrust does it internally --- GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h index c7a1b4f55e501..5506e3925bf80 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesHost.h @@ -15,8 +15,6 @@ #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H #define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H -#define __HIP_ENABLE_DEVICE_MALLOC__ 1 // Fix SWDEV-239120 - #include #include #include From e4d7d36915b667590cb11139a4d972fbb2df9e1b Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 10:55:51 +0100 Subject: [PATCH 0229/1914] ITS-GPU: remove debug options in production --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt index e2fc1f1388ad0..3cdb107e07438 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt @@ -13,7 +13,7 @@ if(CUDA_ENABLED) find_package(CUDAToolkit) message(STATUS "Building ITS CUDA tracker") -add_compile_options(-O0 -g -lineinfo -fPIC) +# add_compile_options(-O0 -g -lineinfo -fPIC) # add_compile_definitions(ITS_MEASURE_GPU_TIME) o2_add_library(ITStrackingCUDA SOURCES ClusterLinesGPU.cu From 203973d51038cd46734b6b8ce3a232b5ff913753 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 17 Mar 2025 12:05:52 +0100 Subject: [PATCH 0230/1914] Drop unneeded minimum requirement (#14075) --- Framework/Foundation/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Framework/Foundation/CMakeLists.txt b/Framework/Foundation/CMakeLists.txt index 65a43073ef9d8..dc6d7238c60ac 100644 --- a/Framework/Foundation/CMakeLists.txt +++ b/Framework/Foundation/CMakeLists.txt @@ -8,8 +8,6 @@ # In applying this license CERN does not waive the privileges and immunities # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -cmake_minimum_required(VERSION 3.5) - install(DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/include/Framework DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) From 900e6245e8a977bd9dfc0d96df1fbd5b3fd15850 Mon Sep 17 00:00:00 2001 From: pillot Date: Sun, 16 Mar 2025 13:49:31 +0100 Subject: [PATCH 0231/1914] fix compiler warning about printf format --- .../MCH/Conditions/src/scan-hvlv-ccdb.cxx | 56 ++++++++++--------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 307759c97a0c3..236effc4b1182 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -56,6 +56,7 @@ using DPBMAP = std::map; using ISSUE = std::tuple; using ISSUELIST = std::vector; using ISSUEMAP = std::map; +using ULL = unsigned long long; //---------------------------------------------------------------------------- bool containsAKey(std::string data, const std::set& Keys) @@ -238,14 +239,14 @@ void checkRunBoundaries(const RBMAP& runBoundaries) for (const auto& [run, boundaries] : runBoundaries) { if (boundaries.second <= boundaries.first) { printf("error: run %d EOR <= SOR: %llu - %llu (%s - %s)\n", - run, boundaries.first, boundaries.second, + run, (ULL)boundaries.first, (ULL)boundaries.second, getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); error = true; } if (boundaries.first <= endOfPreviousRun) { printf("error: SOR run %d <= EOR run %d: %llu (%s) <= %llu (%s)\n", - run, previousRun, boundaries.first, getTime(boundaries.first).c_str(), - endOfPreviousRun, getTime(endOfPreviousRun).c_str()); + run, previousRun, (ULL)boundaries.first, getTime(boundaries.first).c_str(), + (ULL)endOfPreviousRun, getTime(endOfPreviousRun).c_str()); error = true; } previousRun = run; @@ -266,7 +267,7 @@ void printRunBoundaries(const RBMAP& runBoundaries) printf("------------------------------------\n"); for (const auto& [run, boundaries] : runBoundaries) { - printf("%d: %llu - %llu (%s - %s)\n", run, boundaries.first, boundaries.second, + printf("%d: %llu - %llu (%s - %s)\n", run, (ULL)boundaries.first, (ULL)boundaries.second, getTime(boundaries.first).c_str(), getTime(boundaries.second).c_str()); } @@ -324,7 +325,7 @@ DPBMAP getDPBoundaries(ccdb::CcdbApi const& api, std::string what, if (dpBoundaries.empty()) { printf("\e[0;31merror: no file found in %s in time range %llu - %llu (%s - %s) --> use the default one\e[0m\n", - what.c_str(), tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + what.c_str(), (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); dpBoundaries.emplace(1, 9999999999999); } @@ -340,13 +341,13 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, if (dpBoundaries.begin()->first > tStart) { printf("error: the beginning of the time range is not covered: %llu > %llu (%s > %s)\n", - dpBoundaries.begin()->first, tStart, + (ULL)dpBoundaries.begin()->first, (ULL)tStart, getTime(dpBoundaries.begin()->first).c_str(), getTime(tStart).c_str()); error = true; } if (dpBoundaries.rbegin()->second < tStop) { printf("error: the end of the time range is not covered: %llu < %llu (%s < %s)\n", - dpBoundaries.rbegin()->second, tStop, + (ULL)dpBoundaries.rbegin()->second, (ULL)tStop, getTime(dpBoundaries.rbegin()->second).c_str(), getTime(tStop).c_str()); error = true; } @@ -355,13 +356,13 @@ void checkDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t tStart, for (auto [tStart, tStop] : dpBoundaries) { if (tStop <= tStart) { printf("error: EOF <= SOF: %llu - %llu (%s - %s)\n", - tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); error = true; } if (tStart != previousTStop) { printf("error: end of %s file != start of next %s file: %llu (%s) != %llu (%s))\n", scanHV ? "HV" : "LV", scanHV ? "HV" : "LV", - previousTStop, getTime(previousTStop).c_str(), tStart, getTime(tStart).c_str()); + (ULL)previousTStop, getTime(previousTStop).c_str(), (ULL)tStart, getTime(tStart).c_str()); error = true; } previousTStop = tStop; @@ -381,10 +382,10 @@ void printDPBoundaries(const DPBMAP& dpBoundaries, bool scanHV, uint64_t timeInt printf("------------------------------------\n"); for (auto [tStart, tStop] : dpBoundaries) { - printf("%llu - %llu (%s - %s)", tStart, tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); + printf("%llu - %llu (%s - %s)", (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getTime(tStop).c_str()); if (tStop - tStart < 60000 * (timeInterval - 1) || tStop - tStart > 60000 * (timeInterval + 1)) { printf("\e[0;31m ! warning: validity range %s != %llu±1 min\e[0m\n", - getDuration(tStart, tStop).c_str(), timeInterval); + getDuration(tStart, tStop).c_str(), (ULL)timeInterval); } else { printf("\n"); } @@ -478,14 +479,15 @@ void fillDataPoints(const std::vector& dps, std::map& d auto previousTS = dps2.rbegin()->first; if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); exit(1); } if (printWarning) { printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), - (previousTS < tMin) ? "-" : "+", (previousTS < tMin) ? tMin - previousTS : previousTS - tMin); + (previousTS < tMin) ? "-" : "+", + (ULL)((previousTS < tMin) ? tMin - previousTS : previousTS - tMin)); if (ts <= tMin) { - printf(" but get one at dt = -%llu ms\e[0m\n", tMin - ts); + printf(" but get one at dt = -%llu ms\e[0m\n", (ULL)(tMin - ts)); } else { printf("\e[0m\n"); } @@ -496,11 +498,11 @@ void fillDataPoints(const std::vector& dps, std::map& d // add the first data point (should be before the start of validity of the file) if (ts >= tMax) { - printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", ts - tMax); + printf("error: first data point exceeding file validity range (dt = +%llu ms)\n", (ULL)(ts - tMax)); exit(1); } else if (ts > tMin && printWarning) { printf("%s%s missing data point prior file start of validity (dt = +%llu ms)\e[0m\n", - color.c_str(), header.c_str(), ts - tMin); + color.c_str(), header.c_str(), (ULL)(ts - tMin)); header = " "; } dps2.emplace(ts, getValue(*itDP)); @@ -510,15 +512,15 @@ void fillDataPoints(const std::vector& dps, std::map& d for (++itDP; itDP < dps.end(); ++itDP) { ts = itDP->get_epoch_time(); if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", ts, previousTS); + printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); exit(1); } if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", - (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), tMin - ts); + (ts + tolerance < tMin) ? "\e[0;31m" : "\e[0;34m", header.c_str(), (ULL)(tMin - ts)); } else if (ts >= tMax && warningLevel >= 1) { printf("\e[0;31m%s data point outside of file validity range (dt = +%llu ms)\e[0m\n", - header.c_str(), ts - tMax); + header.c_str(), (ULL)(ts - tMax)); } dps2.emplace(ts, getValue(*itDP)); previousTS = ts; @@ -580,13 +582,13 @@ void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool for (const auto& [alias, dps] : dpsMapsPerCh[ch]) { - printf("- %s: %lu values", alias.c_str(), dps.size()); + printf("- %s: %zu values", alias.c_str(), dps.size()); if (all) { printf("\n"); for (const auto& [ts, val] : dps) { - printf(format1.c_str(), ts, getTime(ts).c_str(), val); + printf(format1.c_str(), (ULL)ts, getTime(ts).c_str(), val); } } else if (!dps.empty()) { @@ -594,8 +596,8 @@ void printDataPoints(const DPMAP2 dpsMapsPerCh[10], std::string hvlvFormat, bool const auto firstdt = dps.begin(); const auto lastdt = dps.rbegin(); printf(format2.c_str(), - firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, - lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); + (ULL)firstdt->first, getTime(firstdt->first).c_str(), firstdt->second, + (ULL)lastdt->first, getTime(lastdt->first).c_str(), lastdt->second); } else { printf("\n"); @@ -719,7 +721,7 @@ void fillO2Issues(const std::vector& o2issues, // exclude issues fully outside of the DP file boudaries if (itIssue->end <= tMin || itIssue->begin >= tMax) { printf("\e[0;35mwarning: skipping O2 issue outside of file boundaries (%llu - %llu)\e[0m\n", - itIssue->begin, itIssue->end); + (ULL)itIssue->begin, (ULL)itIssue->end); continue; } @@ -727,14 +729,14 @@ void fillO2Issues(const std::vector& o2issues, if (itIssue->begin < tMin - mch::StatusMapCreatorParam::Instance().timeMargin && (itIssue != o2issues.begin() || itIssue->begin != 0)) { printf("\e[0;35mwarning: O2 returns an issue with uncommon start time (%llu < %llu)\e[0m\n", - itIssue->begin, tMin - mch::StatusMapCreatorParam::Instance().timeMargin); + (ULL)itIssue->begin, (ULL)(tMin - mch::StatusMapCreatorParam::Instance().timeMargin)); } // only the last issue could in principle extend beyond the end of the DP file, to infinity if (itIssue->end >= tMax + mch::StatusMapCreatorParam::Instance().timeMargin && (itIssue != std::prev(o2issues.end()) || itIssue->end != std::numeric_limits::max())) { printf("\e[0;35mwarning: O2 returns an issue with uncommon end time (%llu >= %llu)\e[0m\n", - itIssue->end, tMax + mch::StatusMapCreatorParam::Instance().timeMargin); + (ULL)itIssue->end, (ULL)(tMax + mch::StatusMapCreatorParam::Instance().timeMargin)); } // extend the last issue in case of continuity accross the DP files or add a new one, @@ -897,7 +899,7 @@ void printIssues(const ISSUEMAP issuesPerCh[10], const ISSUEMAP o2IssuesPerCh[10 auto printIssue = [&format](ISSUE issue, std::string color) { const auto& [tStart, tStop, min, mean, runs] = issue; printf("%s", color.c_str()); - printf(format.c_str(), tStart, tStop, + printf(format.c_str(), (ULL)tStart, (ULL)tStop, getTime(tStart).c_str(), getDuration(tStart, tStop).c_str(), min, mean, runs.c_str()); printf("\e[0m"); }; From c39a87024ea2bd73db083972deae4c58ef8350ed Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 3 Feb 2025 10:32:02 +0100 Subject: [PATCH 0232/1914] Promoting critical Error logs to ILG Ops level --- Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx b/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx index b625ab344e112..d7a7282a78684 100644 --- a/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx +++ b/Utilities/EPNMonitoring/src/EPNstderrMonitor.cxx @@ -77,7 +77,7 @@ class EPNMonitor std::unordered_map mFiles; std::string mPath; std::vector mFilters; - std::unordered_map> mMapRootLogTypes; + std::unordered_map> mMapLogTypes; volatile unsigned int mRunNumber; std::string mPartition; unsigned int nLines = 0; @@ -95,12 +95,11 @@ EPNMonitor::EPNMonitor(std::string path, bool infoLogger, int runNumber, std::st mFilters.emplace_back("^Warning in {InfoLogger::InfoLogger::Severity::Info, 13}); - mMapRootLogTypes.emplace("Print in <", std::pair{InfoLogger::InfoLogger::Severity::Info, 13}); - mMapRootLogTypes.emplace("Warning in <", std::pair{InfoLogger::InfoLogger::Severity::Warning, 11}); - mMapRootLogTypes.emplace("Error in <", std::pair{InfoLogger::InfoLogger::Severity::Error, 2}); - mMapRootLogTypes.emplace("Fatal in <", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); - mMapRootLogTypes.emplace("*** Break ***", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); + mMapLogTypes.emplace("(core dumped)", std::pair{InfoLogger::InfoLogger::Severity::Error, 1}); + mMapLogTypes.emplace("Warning in <", std::pair{InfoLogger::InfoLogger::Severity::Warning, 11}); + mMapLogTypes.emplace("Error in <", std::pair{InfoLogger::InfoLogger::Severity::Error, 2}); + mMapLogTypes.emplace("Fatal in <", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); + mMapLogTypes.emplace("*** Break ***", std::pair{InfoLogger::InfoLogger::Severity::Fatal, 1}); mInfoLoggerActive = infoLogger; mPath = path; mRunNumber = runNumber; @@ -214,7 +213,7 @@ void EPNMonitor::thread() // assign proper severity / level for remaining ROOT log messages auto severity{InfoLogger::InfoLogger::Severity::Error}; int level{3}; - for (const auto& logType : mMapRootLogTypes) { + for (const auto& logType : mMapLogTypes) { if (line.find(logType.first) != std::string::npos) { severity = std::get(logType.second); level = std::get(logType.second); From 79ea4b7a5091d0613f7a5b49fd96e9e385c20b21 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 12 Feb 2025 15:31:14 +0100 Subject: [PATCH 0233/1914] Promoting DPL errors relevant to the ECS shifter to critical, leaving messages to be ignored at error --- Detectors/TPC/workflow/src/IDCToVectorSpec.cxx | 6 +++--- Framework/Core/src/CommonServices.cxx | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx b/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx index 27dbcf5d85bbf..e9433c775a02a 100644 --- a/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx +++ b/Detectors/TPC/workflow/src/IDCToVectorSpec.cxx @@ -405,7 +405,7 @@ class IDCToVectorDevice : public o2::framework::Task for (const auto& inf : infVec) { if (!inf.hasBothEPs()) { - LOGP(error, "IDC CRU {:3}: data missing at ({:8}, {:4}) for one or both end points {:02b} in TF {}", cru, inf.heartbeatOrbit, inf.heartbeatBC, inf.epSeen, tfCounter); + LOGP(critical, "IDC CRU {:3}: data missing at ({:8}, {:4}) for one or both end points {:02b} in TF {}", cru, inf.heartbeatOrbit, inf.heartbeatBC, inf.epSeen, tfCounter); hasErrors = true; } } @@ -417,12 +417,12 @@ class IDCToVectorDevice : public o2::framework::Task } if (packetsInTF != infVec.size()) { - LOGP(error, "IDC CRU {:3}: number of IDC packets {} does not match max over all CRUs {} in TF {}", cru, packetsInTF, infVec.size(), tfCounter); + LOGP(critical, "IDC CRU {:3}: number of IDC packets {} does not match max over all CRUs {} in TF {}", cru, packetsInTF, infVec.size(), tfCounter); hasErrors = true; } if (!std::equal(infVecComp->begin(), infVecComp->end(), infVec.begin())) { - LOGP(error, "IDC CRU {:3}: mismatch in orbit numbers", cru); + LOGP(critical, "IDC CRU {:3}: mismatch in orbit numbers", cru); hasErrors = true; } } diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 95836adc02171..cc3c920bc7be1 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -579,7 +579,7 @@ auto decongestionCallbackOrdered = [](AsyncTask& task, size_t id) -> void { if (state.transitionHandling != TransitionHandlingState::NoTransition && DefaultsHelpers::onlineDeploymentMode()) { O2_SIGNPOST_EVENT_EMIT_WARN(async_queue, cid, "oldest_possible_timeslice", "Stop transition requested. Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } else { - O2_SIGNPOST_EVENT_EMIT_ERROR(async_queue, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); + O2_SIGNPOST_EVENT_EMIT_CRITICAL(async_queue, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } timesliceIndex.rescan(); } @@ -650,7 +650,7 @@ o2::framework::ServiceSpec if (state.transitionHandling != TransitionHandlingState::NoTransition && DefaultsHelpers::onlineDeploymentMode()) { O2_SIGNPOST_EVENT_EMIT_WARN(data_processor_context, cid, "oldest_possible_timeslice", "Stop transition requested. Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } else { - O2_SIGNPOST_EVENT_EMIT_ERROR(data_processor_context, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); + O2_SIGNPOST_EVENT_EMIT_CRITICAL(data_processor_context, cid, "oldest_possible_timeslice", "Some Lifetime::Timeframe data got dropped starting at %" PRIi64, oldNextTimeslice); } timesliceIndex.rescan(); } From 74c2ec27488b2c0874884cb016b3d36771ffd7d9 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 17:22:54 +0100 Subject: [PATCH 0234/1914] Bump nvToolsExt (#14074) --- Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt | 1 - Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt index 3cdb107e07438..531cf2b0dcd33 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/CMakeLists.txt @@ -33,7 +33,6 @@ o2_add_library(ITStrackingCUDA O2::SimulationDataFormat O2::ReconstructionDataFormats O2::GPUCommon - CUDA::nvToolsExt PRIVATE_LINK_LIBRARIES O2::GPUTrackingCUDAExternalProvider TARGETVARNAME targetName) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu index 0bca6360d268c..7c42658242231 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TracerGPU.cu @@ -13,7 +13,7 @@ #include "ITStrackingGPU/TracerGPU.h" #if !defined(__HIPCC__) && defined(__USE_GPU_TRACER__) -#include "nvToolsExt.h" +#include constexpr uint32_t colors[] = {0xff00ff00, 0xff0000ff, 0xffffff00, 0xffff00ff, 0xff00ffff, 0xffff0000, 0xffffffff}; constexpr int num_colors = sizeof(colors) / sizeof(uint32_t); From 64abf5c5e675edbef70cf21ae4a71b499246fb73 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 15:16:10 +0100 Subject: [PATCH 0235/1914] GPU: Fix GPUChkErr macro, GPU::Common should not use GPU::GPUTracking internals --- GPU/Common/GPUCommonHelpers.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h index ad876db0d6c3a..915d93c9bc791 100644 --- a/GPU/Common/GPUCommonHelpers.h +++ b/GPU/Common/GPUCommonHelpers.h @@ -33,6 +33,7 @@ #define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false) #include "GPUCommonDef.h" +#include "GPUCommonLogger.h" #include namespace o2::gpu::internal @@ -43,7 +44,7 @@ extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)( inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line) { if (error) { - GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line); + LOGF(error, "GPUCommon Error Code %ld (%s:%d)", (long)error, file, line); } return error != 0; } From 50bf6546f14e71cd62a01b634737af44c24bc265 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 09:23:17 +0100 Subject: [PATCH 0236/1914] GPU TPC CF: Fix rounding error in qMax cut --- GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx | 5 +---- GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index b3b3c64095017..f876270a34358 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -98,10 +98,7 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t if (cn.qTot <= param.rec.tpc.cfQTotCutoff) { return false; } - cn.qMax = q; - if (cn.qMax <= param.rec.tpc.cfQMaxCutoff) { - return false; - } + cn.qMax = q; // cfQMaxCutoff check already done at PeakFinder level if (mTimeMean < param.rec.tpc.clustersShiftTimebinsClusterizer) { return false; } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx index 30fdac92e8607..1de922f716c14 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx @@ -42,7 +42,7 @@ GPUdii() bool GPUTPCCFPeakFinder::isPeak( { uint16_t ll = get_local_id(0); - bool belowThreshold = (q <= calib.tpc.cfQMaxCutoff); + bool belowThreshold = (uint32_t)q <= calib.tpc.cfQMaxCutoff; uint16_t lookForPeaks; uint16_t partId = CfUtils::partition( From 1e625e72171876fbb02e0e75c9c6b73a2c6e1ad5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:35:55 +0100 Subject: [PATCH 0237/1914] Fix CMake warnings --- Steer/CMakeLists.txt | 2 ++ run/CMakeLists.txt | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Steer/CMakeLists.txt b/Steer/CMakeLists.txt index 70f50f4ab8823..8e2706d31bb0a 100644 --- a/Steer/CMakeLists.txt +++ b/Steer/CMakeLists.txt @@ -34,11 +34,13 @@ o2_target_root_dictionary(Steer include/Steer/MCKinematicsReader.h include/Steer/MaterialBudgetMap.h) o2_add_test(InteractionSampler + COMPONENT_NAME steer PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat SOURCES test/testInteractionSampler.cxx LABELS steer) o2_add_test(HitProcessingManager + COMPONENT_NAME steer PUBLIC_LINK_LIBRARIES O2::Steer SOURCES test/testHitProcessingManager.cxx LABELS steer) diff --git a/run/CMakeLists.txt b/run/CMakeLists.txt index 662716901ed0a..fd43207f92d1e 100644 --- a/run/CMakeLists.txt +++ b/run/CMakeLists.txt @@ -229,6 +229,7 @@ set_property(TEST o2sim_G4 APPEND PROPERTY ENVIRONMENT ${G4ENV}) o2_add_test(CheckStackG4 + COMPONENT_NAME sim SOURCES checkStack.cxx NAME o2sim_checksimkinematics_G4 WORKING_DIRECTORY ${SIMTESTDIR} @@ -276,6 +277,7 @@ set_tests_properties(o2sim_G3 G3) o2_add_test(CheckStackG3 + COMPONENT_NAME sim SOURCES checkStack.cxx NAME o2sim_checksimkinematics_G3 WORKING_DIRECTORY ${SIMTESTDIR} From 40a883754789b3630fff732076136123a695ae3a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:49:01 +0100 Subject: [PATCH 0238/1914] GPU: Drop obsolete minimum version setting --- GPU/GPUTracking/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index d5a90dbd65ea3..5ce96d450f765 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -10,7 +10,6 @@ # or submit itself to any jurisdiction. set(MODULE GPUTracking) -cmake_minimum_required(VERSION 3.27 FATAL_ERROR) # set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this... # set(GPUCA_BUILD_DEBUG 1) From 134f5ea21c5edd15c2836829b5ec629925342643 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 10:52:03 +0100 Subject: [PATCH 0239/1914] GPU TPC: Remove tpcGeometry instance, use constexpr GPUTPCGeometry:: --- .../study/src/SVStudy.cxx | 3 +- .../study/src/TrackingStudy.cxx | 3 +- .../TPC/workflow/src/EntropyEncoderSpec.cxx | 4 +-- GPU/GPUTracking/Base/GPUParam.cxx | 6 ++-- GPU/GPUTracking/Base/GPUParam.h | 2 -- GPU/GPUTracking/Base/GPUParam.inc | 19 ++++++------ .../Base/GPUReconstructionConvert.cxx | 27 +++++++++-------- .../GPUTPCCompressionKernels.cxx | 14 ++++----- .../TPCClusterDecompressionCore.inc | 18 ++++++----- GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx | 2 +- GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 13 ++++---- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 2 +- .../Merger/GPUTPCGMSectorTrack.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 30 +++++++++---------- .../SectorTracker/GPUTPCTrackingData.cxx | 5 ++-- .../GPUTPCTrackletConstructor.cxx | 6 ++-- .../TPCClusterFinder/ClusterAccumulator.cxx | 8 ++--- .../GPUTPCCFCheckPadBaseline.cxx | 3 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 7 +++-- .../GPUTPCNNClusterizerKernels.cxx | 19 ++++++------ .../GPUTPCNNClusterizerKernels.h | 4 +-- GPU/GPUTracking/dEdx/GPUdEdx.cxx | 1 - GPU/GPUTracking/dEdx/GPUdEdx.h | 21 ++++++------- .../display/render/GPUDisplayImportEvent.cxx | 4 +-- GPU/GPUTracking/qa/genEvents.cxx | 2 +- 26 files changed, 118 insertions(+), 109 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx index c28048a1f9503..1e141a29d3f55 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/SVStudy.cxx @@ -48,6 +48,7 @@ // #include "GPUSettingsO2.h" #include "GPUParam.h" #include "GPUParam.inc" +#include "GPUTPCGeometry.h" #include "GPUO2InterfaceRefit.h" #include "GPUO2InterfaceUtils.h" @@ -254,7 +255,7 @@ o2::dataformats::V0Ext SVStudySpec::processV0(int iv, o2::globaltracking::RecoCo tpcTr.getClusterReference(clRefs, tpcTr.getNClusterReferences() - 1, clSect, clRow, clIdx); const auto& clus = recoData.getTPCClusters().clusters[clSect][clRow][clIdx]; prInfo.lowestRow = clRow; - int npads = mParam->tpcGeometry.NPads(clRow); + int npads = o2::gpu::GPUTPCGeometry::NPads(clRow); prInfo.padFromEdge = uint8_t(clus.getPad()); if (prInfo.padFromEdge > npads / 2) { prInfo.padFromEdge = npads - 1 - prInfo.padFromEdge; diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index db57ad5f8a7eb..f206c43f7f57a 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -47,6 +47,7 @@ #include "GPUO2Interface.h" // Needed for propper settings in GPUParam.h #include "GPUParam.h" #include "GPUParam.inc" +#include "GPUTPCGeometry.h" #include "Steer/MCKinematicsReader.h" #include "MathUtils/fit.h" #include @@ -301,7 +302,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) trExt.rowMinTPC = clRow; const auto& clus = tpcClusAcc.clusters[clSect][clRow][clIdx]; trExt.padFromEdge = uint8_t(clus.getPad()); - int npads = mTPCRefitter->getParam()->tpcGeometry.NPads(clRow); + int npads = o2::gpu::GPUTPCGeometry::NPads(clRow); if (trExt.padFromEdge > npads / 2) { trExt.padFromEdge = npads - 1 - trExt.padFromEdge; } diff --git a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx index cc964ade0d87c..2efa7077be125 100644 --- a/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx +++ b/Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx @@ -230,9 +230,9 @@ void EntropyEncoderSpec::run(ProcessingContext& pc) int myThread = 0; #endif unsigned int count = 0; - const float x = mParam->tpcGeometry.Row2X(j); + const float x = GPUTPCGeometry::Row2X(j); auto checker = [i, j, firstIR, totalT, x, this, &preCl, &count, &outBuffer = tmpBuffer[myThread], &rejectHits, &clustersFiltered](const o2::tpc::ClusterNative& cl, unsigned int k) { - const float y = mParam->tpcGeometry.LinearPad2Y(i, j, cl.getPad()); + const float y = GPUTPCGeometry::LinearPad2Y(i, j, cl.getPad()); const float r = sqrtf(x * x + y * y); const float maxz = r * mEtaFactor + mMaxZ; const unsigned int deltaBC = std::max(0.f, totalT - mFastTransform->convDeltaZtoDeltaTimeInTimeFrameAbs(maxz)) * constants::LHCBCPERTIMEBIN; diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index 192e46c36dc68..bbca150df405a 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -21,6 +21,7 @@ #include "GPUDataTypes.h" #include "GPUConstantMem.h" #include "DetectorsBase/Propagator.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; @@ -32,7 +33,6 @@ using namespace o2::gpu; void GPUParam::SetDefaults(float solenoidBz) { memset((void*)this, 0, sizeof(*this)); - new (&tpcGeometry) GPUTPCGeometry; new (&rec) GPUSettingsRec; occupancyMap = nullptr; occupancyTotal = 0; @@ -178,8 +178,8 @@ void GPUParam::UpdateRun3ClusterErrors(const float* yErrorParam, const float* zE for (int32_t rowType = 0; rowType < 4; rowType++) { constexpr int32_t regionMap[4] = {0, 4, 6, 8}; ParamErrors[yz][rowType][0] = param[0] * param[0]; - ParamErrors[yz][rowType][1] = param[1] * param[1] * tpcGeometry.PadHeightByRegion(regionMap[rowType]); - ParamErrors[yz][rowType][2] = param[2] * param[2] / tpcGeometry.TPCLength() / tpcGeometry.PadHeightByRegion(regionMap[rowType]); + ParamErrors[yz][rowType][1] = param[1] * param[1] * GPUTPCGeometry::PadHeightByRegion(regionMap[rowType]); + ParamErrors[yz][rowType][2] = param[2] * param[2] / GPUTPCGeometry::TPCLength() / GPUTPCGeometry::PadHeightByRegion(regionMap[rowType]); ParamErrors[yz][rowType][3] = param[3] * param[3] * rec.tpc.clusterErrorOccupancyScaler * rec.tpc.clusterErrorOccupancyScaler; } } diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index fbce6246de112..4b77628c88775 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -19,7 +19,6 @@ #include "GPUCommonMath.h" #include "GPUDef.h" #include "GPUSettings.h" -#include "GPUTPCGeometry.h" #include "GPUTPCGMPolynomialField.h" #if !defined(GPUCA_GPUCODE) @@ -59,7 +58,6 @@ struct GPUParam_t { int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; - GPUTPCGeometry tpcGeometry; // TPC Geometry GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM const uint32_t* occupancyMap; // Ptr to TPC occupancy map uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf) diff --git a/GPU/GPUTracking/Base/GPUParam.inc b/GPU/GPUTracking/Base/GPUParam.inc index 19dc1fc4a3578..a118a8f639fe9 100644 --- a/GPU/GPUTracking/Base/GPUParam.inc +++ b/GPU/GPUTracking/Base/GPUParam.inc @@ -18,6 +18,7 @@ #include "GPUParam.h" #include "GPUTPCGMMergedTrackHit.h" #include "GPUTPCClusterOccupancyMap.h" +#include "GPUTPCGeometry.h" namespace o2::gpu { @@ -42,14 +43,14 @@ GPUdi() void GPUParam::Global2Sector(int32_t iSector, float X, float Y, float Z, GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - const int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + const int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); const float sec2 = 1.f / (1.f - s2); const float angleY2 = s2 * sec2; // dy/dx const float angleZ2 = DzDs * DzDs * sec2; // dz/dx - const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / tpcGeometry.Row2X(iRow) : 0.f; + const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f; ErrY2 = GetClusterErrorSeeding(0, rowType, z, angleY2, unscaledMult); // Returns Err2 ErrZ2 = GetClusterErrorSeeding(1, rowType, z, angleZ2, unscaledMult); // Returns Err2 @@ -132,8 +133,8 @@ GPUdi() float GPUParam::GetClusterErrorSeeding(int32_t yz, int32_t type, float z GPUdi() void GPUParam::GetClusterErrorsSeeding2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float& ErrY2, float& ErrZ2) const { - int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); float sec2 = 1.f / (1.f - s2); float angleY2 = s2 * sec2; // dy/dx @@ -172,14 +173,14 @@ GPUdi() float GPUParam::GetSystematicClusterErrorC122(float trackX, float trackY GPUdi() void GPUParam::GetClusterErrors2(uint8_t sector, int32_t iRow, float z, float sinPhi, float DzDs, float time, float avgInvCharge, float invCharge, float& ErrY2, float& ErrZ2) const { - const int32_t rowType = tpcGeometry.GetROC(iRow); - z = CAMath::Abs(tpcGeometry.TPCLength() - CAMath::Abs(z)); + const int32_t rowType = GPUTPCGeometry::GetROC(iRow); + z = CAMath::Abs(GPUTPCGeometry::TPCLength() - CAMath::Abs(z)); const float s2 = CAMath::Min(sinPhi * sinPhi, 0.95f * 0.95f); const float sec2 = 1.f / (1.f - s2); const float angleY2 = s2 * sec2; // dy/dx const float angleZ2 = DzDs * DzDs * sec2; // dz/dx - const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / tpcGeometry.Row2X(iRow) : 0.f; + const float unscaledMult = time >= 0.f ? GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f; const float scaledInvAvgCharge = avgInvCharge * rec.tpc.clusterErrorChargeScaler > 0.f ? avgInvCharge * rec.tpc.clusterErrorChargeScaler : 1.f; const float scaledInvCharge = invCharge * rec.tpc.clusterErrorChargeScaler > 0.f ? invCharge * rec.tpc.clusterErrorChargeScaler : 1.f; @@ -218,7 +219,7 @@ GPUdi() float GPUParam::GetUnscaledMult(float time) const GPUdi() bool GPUParam::rejectEdgeClusterByY(float uncorrectedY, int32_t iRow, float trackSigmaY) const { - return CAMath::Abs(uncorrectedY) > (tpcGeometry.NPads(iRow) - 1) * 0.5f * tpcGeometry.PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; + return CAMath::Abs(uncorrectedY) > (GPUTPCGeometry::NPads(iRow) - 1) * 0.5f * GPUTPCGeometry::PadWidth(iRow) + rec.tpc.rejectEdgeClustersMargin + trackSigmaY * rec.tpc.rejectEdgeClustersSigmaMargin; } } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index bc760f6188caa..e12ca7ec601ad 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -24,6 +24,7 @@ #include "GPUTPCClusterData.h" #include "GPUO2DataTypes.h" #include "GPUDataTypes.h" +#include "GPUTPCGeometry.h" #include "AliHLTTPCRawCluster.h" #include "GPUParam.h" #include "GPULogging.h" @@ -275,10 +276,10 @@ struct zsEncoderRow : public zsEncoder { inline bool zsEncoderRow::sort(const o2::tpc::Digit a, const o2::tpc::Digit b) { - int32_t endpointa = param->tpcGeometry.GetRegion(a.getRow()); - int32_t endpointb = param->tpcGeometry.GetRegion(b.getRow()); - endpointa = 2 * endpointa + (a.getRow() >= param->tpcGeometry.GetRegionStart(endpointa) + param->tpcGeometry.GetRegionRows(endpointa) / 2); - endpointb = 2 * endpointb + (b.getRow() >= param->tpcGeometry.GetRegionStart(endpointb) + param->tpcGeometry.GetRegionRows(endpointb) / 2); + int32_t endpointa = GPUTPCGeometry::GetRegion(a.getRow()); + int32_t endpointb = GPUTPCGeometry::GetRegion(b.getRow()); + endpointa = 2 * endpointa + (a.getRow() >= GPUTPCGeometry::GetRegionStart(endpointa) + GPUTPCGeometry::GetRegionRows(endpointa) / 2); + endpointb = 2 * endpointb + (b.getRow() >= GPUTPCGeometry::GetRegionStart(endpointb) + GPUTPCGeometry::GetRegionRows(endpointb) / 2); if (endpointa != endpointb) { return endpointa <= endpointb; } @@ -295,11 +296,11 @@ bool zsEncoderRow::checkInput(std::vector& tmpBuffer, uint32_t k { seqLen = 1; if (lastRow != tmpBuffer[k].getRow()) { - endpointStart = param->tpcGeometry.GetRegionStart(curRegion); + endpointStart = GPUTPCGeometry::GetRegionStart(curRegion); endpoint = curRegion * 2; - if (tmpBuffer[k].getRow() >= endpointStart + param->tpcGeometry.GetRegionRows(curRegion) / 2) { + if (tmpBuffer[k].getRow() >= endpointStart + GPUTPCGeometry::GetRegionRows(curRegion) / 2) { endpoint++; - endpointStart += param->tpcGeometry.GetRegionRows(curRegion) / 2; + endpointStart += GPUTPCGeometry::GetRegionRows(curRegion) / 2; } } for (uint32_t l = k + 1; l < tmpBuffer.size(); l++) { @@ -408,7 +409,7 @@ void zsEncoderRow::decodePage(std::vector& outputBuffer, const z if ((uint32_t)region != decEndpoint / 2) { throw std::runtime_error("CRU ID / endpoint mismatch"); } - int32_t nRowsRegion = param->tpcGeometry.GetRegionRows(region); + int32_t nRowsRegion = GPUTPCGeometry::GetRegionRows(region); int32_t timeBin = (decHDR->timeOffset + (uint64_t)(o2::raw::RDHUtils::getHeartBeatOrbit(*rdh) - firstOrbit) * o2::constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN; for (int32_t l = 0; l < decHDR->nTimeBinSpan; l++) { @@ -420,7 +421,7 @@ void zsEncoderRow::decodePage(std::vector& outputBuffer, const z if (tbHdr->rowMask != 0 && ((upperRows) ^ ((decEndpoint & 1) != 0))) { throw std::runtime_error("invalid endpoint"); } - const int32_t rowOffset = param->tpcGeometry.GetRegionStart(region) + (upperRows ? (nRowsRegion / 2) : 0); + const int32_t rowOffset = GPUTPCGeometry::GetRegionStart(region) + (upperRows ? (nRowsRegion / 2) : 0); const int32_t nRows = upperRows ? (nRowsRegion - nRowsRegion / 2) : (nRowsRegion / 2); const int32_t nRowsUsed = __builtin_popcount((uint32_t)(tbHdr->rowMask & 0x7FFF)); decPagePtr += nRowsUsed ? (2 * nRowsUsed) : 2; @@ -513,7 +514,7 @@ void zsEncoderLinkBased::createBitmask(std::vector& tmpBuffer, u uint32_t l; for (l = k; l < tmpBuffer.size(); l++) { const auto& a = tmpBuffer[l]; - int32_t cruinsector = param->tpcGeometry.GetRegion(a.getRow()); + int32_t cruinsector = GPUTPCGeometry::GetRegion(a.getRow()); o2::tpc::GlobalPadNumber pad = mapper.globalPadNumber(o2::tpc::PadPos(a.getRow(), a.getPad())); o2::tpc::FECInfo fec = mapper.fecInfo(pad); o2::tpc::CRU cru = cruinsector; @@ -535,8 +536,8 @@ void zsEncoderLinkBased::createBitmask(std::vector& tmpBuffer, u bool zsEncoderLinkBased::sort(const o2::tpc::Digit a, const o2::tpc::Digit b) { // Fixme: this is blasphemy... one shoult precompute all values and sort an index array - int32_t cruinsectora = param->tpcGeometry.GetRegion(a.getRow()); - int32_t cruinsectorb = param->tpcGeometry.GetRegion(b.getRow()); + int32_t cruinsectora = GPUTPCGeometry::GetRegion(a.getRow()); + int32_t cruinsectorb = GPUTPCGeometry::GetRegion(b.getRow()); if (cruinsectora != cruinsectorb) { return cruinsectora < cruinsectorb; } @@ -1124,7 +1125,7 @@ inline uint32_t zsEncoderRun::run(std::vector* buffer, std::vectortpcGeometry.GetRegion(tmpBuffer[k].getRow()); + curRegion = GPUTPCGeometry::GetRegion(tmpBuffer[k].getRow()); } mustWriteSubPage = checkInput(tmpBuffer, k); } else { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 4831be9b12bcc..445c03113cd39 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -18,7 +18,6 @@ #include "GPUParam.h" #include "GPUCommonAlgorithm.h" #include "GPUTPCCompressionTrackModel.h" -#include "GPUTPCGeometry.h" #include "GPUTPCClusterRejection.h" #include "GPUTPCCompressionKernels.inc" @@ -68,9 +67,10 @@ GPUdii() void GPUTPCCompressionKernels::Thread clusters[hit.sector][hit.row][hit.num - clusters->clusterOffset[hit.sector][hit.row]]; - float x = param.tpcGeometry.Row2X(hit.row); - float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), param.tpcGeometry.PadWidth(hit.row), param.tpcGeometry.NPads(hit.row)); - float z = param.tpcGeometry.LinearTime2Z(hit.sector, orgCl.getTime()); + constexpr GPUTPCGeometry geo; + float x = geo.Row2X(hit.row); + float y = track.LinearPad2Y(hit.sector, orgCl.getPad(), geo.PadWidth(hit.row), geo.NPads(hit.row)); + float z = geo.LinearTime2Z(hit.sector, orgCl.getTime()); if (nClustersStored) { if ((hit.sector < GPUCA_NSECTORS) ^ (lastSector < GPUCA_NSECTORS)) { break; @@ -78,7 +78,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread @@ -80,6 +81,7 @@ class TPCClusterDecompressionCore uint32_t slice = cmprClusters.sliceA[trackIndex]; uint32_t row = cmprClusters.rowA[trackIndex]; GPUTPCCompressionTrackModel track; + constexpr GPUTPCGeometry geo; uint32_t clusterIndex; for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) { uint32_t pad = 0, time = 0; @@ -105,23 +107,23 @@ class TPCClusterDecompressionCore if (changeLeg && track.Mirror()) { break; } - if (track.Propagate(param.tpcGeometry.Row2X(row), param.SectorParam[slice].Alpha)) { + if (track.Propagate(geo.Row2X(row), param.SectorParam[slice].Alpha)) { break; } uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1]; if (timeTmp & 800000) { timeTmp |= 0xFF000000; } - time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset))); - float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(slice, track.Y(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)))); + time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, geo.LinearZ2Time(slice, track.Z() + zOffset))); + float tmpPad = CAMath::Max(0.f, CAMath::Min((float)geo.NPads(GPUCA_ROW_COUNT - 1), track.LinearY2Pad(slice, track.Y(), geo.PadWidth(row), geo.NPads(row)))); pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad); time = time & 0xFFFFFF; pad = (uint16_t)pad; - if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) { + if (pad >= geo.NPads(row) * ClusterNative::scalePadPacked) { if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2 pad = 0; } else { - pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1; + pad = geo.NPads(row) * ClusterNative::scalePadPacked - 1; } } if (param.continuousMaxTimeBin > 0 && time >= maxTime) { @@ -136,11 +138,11 @@ class TPCClusterDecompressionCore pad = cmprClusters.padA[trackIndex]; } const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...); - float y = track.LinearPad2Y(slice, cluster.getPad(), param.tpcGeometry.PadWidth(row), param.tpcGeometry.NPads(row)); - float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime()); + float y = track.LinearPad2Y(slice, cluster.getPad(), geo.PadWidth(row), geo.NPads(row)); + float z = geo.LinearTime2Z(slice, cluster.getTime()); if (clusterIndex == 0) { zOffset = z; - track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); + track.Init(geo.Row2X(row), y, z - zOffset, param.SectorParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param); } if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) { break; diff --git a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx index 40ce8c0ccda81..0b8e67fbe495e 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadBitMap.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; TPCPadBitMap::TPCPadBitMap() { - GPUTPCGeometry geo{}; + constexpr GPUTPCGeometry geo; int32_t offset = 0; for (int32_t r = 0; r < GPUCA_ROW_COUNT; r++) { mPadOffsetPerRow[r] = offset; diff --git a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx index 41f0ad819d1b6..a20f3dc8aac1d 100644 --- a/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx +++ b/GPU/GPUTracking/DataTypes/TPCPadGainCalib.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; TPCPadGainCalib::TPCPadGainCalib() { - GPUTPCGeometry geo{}; + constexpr GPUTPCGeometry geo{}; int32_t offset = 0; for (int32_t r = 0; r < GPUCA_ROW_COUNT; r++) { mPadOffsetPerRow[r] = offset; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1c2a8e2b29a9c..f373d56ea0395 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -36,6 +36,7 @@ #include "GPUO2DataTypes.h" #include "TPCFastTransform.h" #include "GPUTPCConvertImpl.h" +#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" @@ -601,13 +602,13 @@ GPUd() void GPUTPCGMMerger::MergeSectorsPrepareStep2(int32_t nBlocks, int32_t nT } else if (iBorder == 1) { // transport to the right edge of the sector and rotate horizontally dAlpha = -dAlpha - CAMath::Pi() / 2; } else if (iBorder == 2) { // transport to the middle of the sector and rotate vertically to the border on the left - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } else if (iBorder == 3) { // transport to the middle of the sector and rotate vertically to the border on the right dAlpha = -dAlpha; - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } else if (iBorder == 4) { // transport to the middle of the sßector, w/o rotation dAlpha = 0; - x0 = Param().tpcGeometry.Row2X(63); + x0 = GPUTPCGeometry::Row2X(63); } const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); @@ -955,7 +956,7 @@ template GPUdni() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int GPUd() void GPUTPCGMMerger::MergeWithinSectorsPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - float x0 = Param().tpcGeometry.Row2X(63); + float x0 = GPUTPCGeometry::Row2X(63); const float maxSin = CAMath::Sin(60.f / 180.f * CAMath::Pi()); for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { @@ -1295,7 +1296,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const int32_t sector = track->Sector(); for (int32_t attempt = 0; attempt < 2; attempt++) { GPUTPCGMBorderTrack b; - const float x0 = Param().tpcGeometry.Row2X(attempt == 0 ? 63 : cls.row); + const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); b.SetNClusters(mOutputTracks[itr].NClusters()); @@ -1759,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : Param().tpcGeometry.Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ae413aaa98648..1e4cc633eb4ca 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -353,7 +353,7 @@ void GPUTPCGMMerger::DebugStreamerUpdate(int32_t iTrk, int32_t ihit, float xx, f auto uncorrectedYZ = StreamerUncorrectedZY(cluster.sector, cluster.row, track, prop); float invCharge = 1.f / clusterNative.qMax; int32_t iRow = cluster.row; - float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / Param().tpcGeometry.Row2X(iRow) : 0.f); + float unscaledMult = (time >= 0.f ? Param().GetUnscaledMult(time) / GPUTPCGeometry::Row2X(iRow) : 0.f); const float clAlpha = Param().Alpha(cluster.sector); uint32_t occupancyTotal = Param().occupancyTotal; o2::utils::DebugStreamer::instance()->getStreamer("debug_update_track", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_update_track").data() diff --git a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx index a439e6e653039..11b153c7f0d8b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMSectorTrack.cxx @@ -95,7 +95,7 @@ GPUd() void GPUTPCGMSectorTrack::SetParam2(const GPUTPCGMTrackParam& trk) GPUd() bool GPUTPCGMSectorTrack::FilterErrors(const GPUTPCGMMerger* merger, int32_t iSector, float maxSinPhi, float sinPhiMargin) { float lastX; - // float lastX = merger->Param().tpcGeometry.Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks + // float lastX = GPUTPCGeometry::Row2X(mOrigTrack->Cluster(mOrigTrack->NClusters() - 1).GetRow()); // TODO: Why is this needed to be set below, Row2X should work, but looses some tracks float y, z; int32_t row, index; const GPUTPCTracker& trk = merger->GetConstantMem()->tpcTrackers[iSector]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index be1d3803312fe..3bd2257d02e01 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -219,7 +219,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection, param); + dEdx.fillSubThreshold(lastRow - wayDirection); } } @@ -384,7 +384,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ pad /= clusterCount; relTime /= clusterCount; relTime = relTime - CAMath::Round(relTime); - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], param, merger->GetConstantMem()->calibObjects, zz, pad, relTime); + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } else if (retVal >= GPUTPCGMPropagator::updateErrorClusterRejected) { // cluster far away form the track if (allowModification) { @@ -650,7 +650,7 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP return dodEdx; } int32_t step = toRow > lastRow ? 1 : -1; - float xx = mX - Merger->Param().tpcGeometry.Row2X(lastRow); + float xx = mX - GPUTPCGeometry::Row2X(lastRow); for (int32_t iRow = lastRow + step; iRow != toRow; iRow += step) { if (CAMath::Abs(mP[2]) > maxSinPhi) { return dodEdx; @@ -658,15 +658,15 @@ GPUd() bool GPUTPCGMTrackParam::AttachClustersPropagate(const GPUTPCGMMerger* GP if (CAMath::Abs(mP[0]) > CAMath::Abs(mX) * CAMath::Tan(kSectAngle / 2.f)) { return dodEdx; } - int32_t err = prop.PropagateToXAlpha(xx + Merger->Param().tpcGeometry.Row2X(iRow), prop.GetAlpha(), inFlyDirection); + int32_t err = prop.PropagateToXAlpha(xx + GPUTPCGeometry::Row2X(iRow), prop.GetAlpha(), inFlyDirection); if (err) { return dodEdx; } if (dodEdx && iRow + step == toRow) { float yUncorrected, zUncorrected; Merger->GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(sector, iRow, mP[0], mP[1], yUncorrected, zUncorrected); - uint32_t pad = CAMath::Float2UIntRn(Merger->Param().tpcGeometry.LinearY2Pad(sector, iRow, yUncorrected)); - if (pad >= Merger->Param().tpcGeometry.NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { + uint32_t pad = CAMath::Float2UIntRn(GPUTPCGeometry::LinearY2Pad(sector, iRow, yUncorrected)); + if (pad >= GPUTPCGeometry::NPads(iRow) || (Merger->GetConstantMem()->calibObjects.dEdxCalibContainer && Merger->GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(sector, iRow, pad))) { dodEdx = false; } } @@ -782,7 +782,7 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr } CADEBUG(printf("\tPropagated to y = %f: X %f Z %f SinPhi %f\n", mX, mP[0], mP[1], mP[2])); for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - float rowX = Merger->Param().tpcGeometry.Row2X(j); + float rowX = GPUTPCGeometry::Row2X(j); if (CAMath::Abs(rowX - (-mP[0] * lrFactor)) < 1.5f) { CADEBUG(printf("\t\tAttempt row %d (Y %f Z %f)\n", j, mX * lrFactor, mP[1])); AttachClusters(Merger, sector, j, iTrack, false, mX * lrFactor, mP[1]); @@ -823,18 +823,18 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr prop.Rotate180(); CADEBUG(printf("\tMirrored position: Alpha %f X %f Y %f Z %f SinPhi %f DzDs %f\n", prop.GetAlpha(), mX, mP[0], mP[1], mP[2], mP[3])); iRow = toRow; - float dx = toX - Merger->Param().tpcGeometry.Row2X(toRow); + float dx = toX - GPUTPCGeometry::Row2X(toRow); if (up ^ (toX > mX)) { if (up) { - while (iRow < GPUCA_ROW_COUNT - 2 && Merger->Param().tpcGeometry.Row2X(iRow + 1) + dx <= mX) { + while (iRow < GPUCA_ROW_COUNT - 2 && GPUTPCGeometry::Row2X(iRow + 1) + dx <= mX) { iRow++; } } else { - while (iRow > 1 && Merger->Param().tpcGeometry.Row2X(iRow - 1) + dx >= mX) { + while (iRow > 1 && GPUTPCGeometry::Row2X(iRow - 1) + dx >= mX) { iRow--; } } - prop.PropagateToXAlpha(Merger->Param().tpcGeometry.Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); + prop.PropagateToXAlpha(GPUTPCGeometry::Row2X(iRow) + dx, prop.GetAlpha(), inFlyDirection); AttachClustersPropagate(Merger, sector, iRow, toRow, iTrack, false, prop, inFlyDirection); } if (prop.PropagateToXAlpha(toX, prop.GetAlpha(), inFlyDirection)) { @@ -875,7 +875,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU return; } float dx = (toX - X) / count; - const float myRowX = Merger->Param().tpcGeometry.Row2X(iRow); + const float myRowX = GPUTPCGeometry::Row2X(iRow); // printf("AttachMirror\n"); // printf("X %f Y %f Z %f SinPhi %f toY %f -->\n", mX, mP[0], mP[1], mP[2], toY); // printf("X %f Y %f Z %f SinPhi %f, count %d dx %f (to: %f)\n", X, Y, Z, SinPhi, count, dx, X + count * dx); @@ -905,7 +905,7 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU int32_t step = paramX >= mX ? 1 : -1; int32_t found = 0; for (int32_t j = iRow; j >= 0 && j < GPUCA_ROW_COUNT && found < 3; j += step) { - float rowX = mX + Merger->Param().tpcGeometry.Row2X(j) - myRowX; + float rowX = mX + GPUTPCGeometry::Row2X(j) - myRowX; if (CAMath::Abs(rowX - paramX) < 1.5f) { // printf("Attempt row %d\n", j); AttachClusters(Merger, sector, j, iTrack, false, mP[2] > 0 ? X : -X, Z); @@ -930,8 +930,8 @@ GPUd() void GPUTPCGMTrackParam::ShiftZ2(const GPUTPCGMMergedTrackHit* clusters, const auto& GPUrestrict() cls = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear; tzInner = cls[clusters[N - 1].num].getTime(); tzOuter = cls[clusters[0].num].getTime(); - xInner = merger->Param().tpcGeometry.Row2X(clusters[N - 1].row); - xOuter = merger->Param().tpcGeometry.Row2X(clusters[0].row); + xInner = GPUTPCGeometry::Row2X(clusters[N - 1].row); + xOuter = GPUTPCGeometry::Row2X(clusters[0].row); } ShiftZ(merger, clusters[0].sector, tzInner, tzOuter, xInner, xOuter); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx index a3e73c377ed44..9a4d2eebcb953 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.cxx @@ -19,6 +19,7 @@ #include "GPUProcessor.h" #include "GPUO2DataTypes.h" #include "GPUTPCConvertImpl.h" +#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #ifndef GPUCA_GPUCODE_DEVICE @@ -39,7 +40,7 @@ void GPUTPCTrackingData::InitializeRows(const GPUParam& p) new (&mRows[i]) GPUTPCRow; } for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { - mRows[i].mX = p.tpcGeometry.Row2X(i); + mRows[i].mX = GPUTPCGeometry::Row2X(i); mRows[i].mMaxY = CAMath::Tan(p.par.dAlpha / 2.f) * mRows[i].mX; } } @@ -101,7 +102,7 @@ void* GPUTPCTrackingData::SetPointersRows(void* mem) GPUd() void GPUTPCTrackingData::GetMaxNBins(GPUconstantref() const GPUConstantMem* mem, GPUTPCRow* GPUrestrict() row, int32_t& maxY, int32_t& maxZ) { maxY = row->mMaxY * 2.f / GPUCA_MIN_BIN_SIZE + 1; - maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : mem->param.tpcGeometry.TPCLength()) + 50; + maxZ = (mem->param.continuousMaxTimeBin > 0 ? (mem->calibObjects.fastTransformHelper->getCorrMap()->convTimeToZinTimeFrame(0, 0, mem->param.continuousMaxTimeBin)) : GPUTPCGeometry::TPCLength()) + 50; maxZ = maxZ / GPUCA_MIN_BIN_SIZE + 1; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 3aac31c87498c..5a7df0ba8b874 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -377,8 +377,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } while (false); (void)found; if (!found && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer) { - uint32_t pad = CAMath::Float2UIntRn(tracker.Param().tpcGeometry.LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); - if (pad < tracker.Param().tpcGeometry.NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { + uint32_t pad = CAMath::Float2UIntRn(GPUTPCGeometry::LinearY2Pad(tracker.ISector(), iRow, yUncorrected)); + if (pad < GPUTPCGeometry::NPads(iRow) && tracker.GetConstantMem()->calibObjects.dEdxCalibContainer->isDead(tracker.ISector(), iRow, pad)) { r.mNMissed--; rowHit = CALINK_DEAD_CHANNEL; } @@ -395,7 +395,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, const float z1 = row1.Grid().ZMin() + hh1.y * row1.HstepZ(); const float z2 = row2.Grid().ZMin() + hh2.y * row2.HstepZ(); float oldOffset = tParam.ZOffset(); - tParam.ShiftZ(z1, z2, tracker.Param().tpcGeometry.Row2X(r.mFirstRow), tracker.Param().tpcGeometry.Row2X(r.mLastRow), tracker.Param().bzCLight, tracker.Param().rec.tpc.defaultZOffsetOverR); + tParam.ShiftZ(z1, z2, GPUTPCGeometry::Row2X(r.mFirstRow), GPUTPCGeometry::Row2X(r.mLastRow), tracker.Param().bzCLight, tracker.Param().rec.tpc.defaultZOffsetOverR); r.mLastZ -= tParam.ZOffset() - oldOffset; CADEBUG(printf("Shifted z from %f to %f\n", oldOffset, tParam.ZOffset())); } diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index f876270a34358..622da856af805 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -13,9 +13,9 @@ /// \author Felix Weiglhofer #include "ClusterAccumulator.h" -#include "GPUTPCGeometry.h" #include "CfUtils.h" #include "GPUParam.h" +#include "GPUTPCGeometry.h" #include "DataFormatsTPC/ClusterNative.h" using namespace o2::gpu; @@ -81,17 +81,17 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, t bool isEdgeCluster; if (param.rec.tpc.cfEdgeTwoPads) { - isEdgeCluster = pad < 2 || pad >= param.tpcGeometry.NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge + isEdgeCluster = pad < 2 || pad >= GPUTPCGeometry::NPads(pos.row()) - 2; // Geometrical edge check, peak within 2 pads of sector edge if (isEdgeCluster) { bool leftEdge = (pad < 2); - if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (param.tpcGeometry.NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { + if (leftEdge ? (pad == 1 && chargeMap[pos.delta({-1, 0})].unpack() < 1) : (pad == (GPUTPCGeometry::NPads(pos.row()) - 2) && chargeMap[pos.delta({1, 0})].unpack() < 1)) { isEdgeCluster = false; // No edge cluster if peak is close to edge but no charge at the edge. } else if (leftEdge ? (pad < mPadMean) : (pad > mPadMean)) { mPadMean = pad; // Correct to peak position if COG is close to middle of pad than peak } } } else { - isEdgeCluster = pad == 0 || pad == param.tpcGeometry.NPads(pos.row()) - 1; + isEdgeCluster = pad == 0 || pad == GPUTPCGeometry::NPads(pos.row()) - 1; } cn.qTot = CAMath::Float2UIntRn(mQtot); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 4a167b7d53890..1e76860331de6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -15,6 +15,7 @@ #include "GPUTPCCFCheckPadBaseline.h" #include "Array2D.h" #include "PackedCharge.h" +#include "GPUTPCGeometry.h" #include "clusterFinderDefs.h" #ifndef GPUCA_GPUCODE @@ -151,7 +152,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) { - const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry; + constexpr GPUTPCGeometry geo; int32_t padOffset = 0; for (Row r = 0; r < GPUCA_ROW_COUNT; r++) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index f1fd95d696f5d..6662b93eccb78 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -22,6 +22,7 @@ #include "GPUCommonAlgorithm.h" #include "TPCPadGainCalib.h" #include "TPCZSLinkMapping.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -57,8 +58,8 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset; if (iThread == 0) { const int32_t region = endpoint / 2; - s.nRowsRegion = clusterer.Param().tpcGeometry.GetRegionRows(region); - s.regionStartRow = clusterer.Param().tpcGeometry.GetRegionStart(region); + s.nRowsRegion = GPUTPCGeometry::GetRegionRows(region); + s.regionStartRow = GPUTPCGeometry::GetRegionStart(region); s.nThreadsPerRow = CAMath::Max(1u, nThreads / ((s.nRowsRegion + (endpoint & 1)) / 2)); s.rowStride = nThreads / s.nThreadsPerRow; s.rowOffsetCounter = 0; @@ -524,7 +525,7 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy { #ifdef GPUCA_TPC_GEOMETRY_O2 // Ported from tpc::Mapper (Not available on GPU...) - const GPUTPCGeometry& geo = clusterer.Param().tpcGeometry; + constexpr GPUTPCGeometry geo; const int32_t regionIter = cru % 2; const int32_t istreamm = ((rawFECChannel % 10) / 2); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 25cd2497fbf62..379ea27443fea 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -14,6 +14,7 @@ #include "GPUTPCNNClusterizerKernels.h" #include "GPUTPCCFClusterizer.h" +#include "GPUTPCGeometry.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -102,9 +103,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 62 ? global_shift : 0); } -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift, const GPUTPCGeometry& geo) +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) { if (pad < 0 || row < 0) { // Faster short-circuit return true; } else if (row < 63) { - return (pad >= static_cast(geo.NPads(row))); + return (pad >= static_cast(GPUTPCGeometry::NPads(row))); } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network return true; } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { - return (pad >= static_cast(geo.NPads(row - global_shift))); + return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); } else { return true; } @@ -152,9 +153,9 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n #endif for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); - int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r, clusterer.Param().tpcGeometry); + int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { - bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow, clusterer.Param().tpcGeometry); + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { if (!is_boundary) { ChargePos tmp_pos(row + r, pad + p, time + t); @@ -183,11 +184,11 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n if (dtype == 0) { clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row)); + clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; clustererNN.inputData32[write_idx + 1] = row / 152.f; - clustererNN.inputData32[write_idx + 2] = static_cast(pad) / clusterer.Param().tpcGeometry.NPads(row); + clustererNN.inputData32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); } } } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index c7bd18115d61f..e6c1dc508d6e4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -67,9 +67,9 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); - static GPUd() int padOffset(int, int, const GPUTPCGeometry&); + static GPUd() int padOffset(int, int); static GPUd() int rowOffset(int, int); - static GPUd() bool isBoundary(int, int, int, const GPUTPCGeometry&); + static GPUd() bool isBoundary(int, int, int); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index 2e67ddda7c99c..b7da0de4c0e29 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -13,7 +13,6 @@ /// \author David Rohr #include "GPUdEdx.h" -#include "GPUTPCGeometry.h" #include "GPUdEdxInfo.h" #include "GPUCommonAlgorithm.h" #include "GPUParam.h" diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 6c0a96d3adb75..bcd75af468c28 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -16,12 +16,12 @@ #define GPUDEDX_H #include "GPUDef.h" -#include "GPUTPCGeometry.h" #include "GPUCommonMath.h" #include "GPUParam.h" #include "GPUdEdxInfo.h" #include "DataFormatsTPC/Defs.h" #include "CalibdEdxContainer.h" +#include "GPUTPCGeometry.h" #include "GPUDebugStreamer.h" namespace o2::gpu @@ -32,8 +32,8 @@ class GPUdEdx public: // The driver must call clear(), fill clusters row by row outside-in, then run computedEdx() to get the result GPUd() void clear(); - GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); - GPUd() void fillSubThreshold(int32_t padRow, const GPUParam& param); + GPUd() void fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUCalibObjectsConst& calib, float z, float pad, float relTime); + GPUd() void fillSubThreshold(int32_t padRow); GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: @@ -92,7 +92,7 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) mLastROC = roc; } -GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUParam& GPUrestrict() param, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) +GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint8_t sector, float trackSnp, float trackTgl, const GPUCalibObjectsConst& calib, float z, float pad, float relTime) { if (mCount >= MAX_NCL) { return; @@ -100,8 +100,9 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint // container containing all the dE/dx corrections auto calibContainer = calib.dEdxCalibContainer; + constexpr GPUTPCGeometry geo; - const int32_t roc = param.tpcGeometry.GetROC(padRow); + const int32_t roc = geo.GetROC(padRow); checkSubThresh(roc); float snp2 = trackSnp * trackSnp; if (snp2 > GPUCA_MAX_SIN_PHI_LOW) { @@ -119,7 +120,7 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint // getting the topology correction const uint32_t padPos = CAMath::Float2UIntRn(pad); // position of the pad is shifted half a pad ( pad=3 -> centre position of third pad) const float absRelPad = CAMath::Abs(pad - padPos); - const int32_t region = param.tpcGeometry.GetRegion(padRow); + const int32_t region = geo.GetRegion(padRow); z = CAMath::Abs(z); const float threshold = calibContainer->getZeroSupressionThreshold(sector, padRow, padPos); // TODO: Use the mean zero supresion threshold of all pads in the cluster? const bool useFullGainMap = calibContainer->isUsageOfFullGainMap(); @@ -161,8 +162,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint } GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamdEdx)) { - float padlx = param.tpcGeometry.Row2X(padRow); - float padly = param.tpcGeometry.LinearPad2Y(sector, padRow, padPos); + float padlx = geo.Row2X(padRow); + float padly = geo.LinearPad2Y(sector, padRow, padPos); o2::utils::DebugStreamer::instance()->getStreamer("debug_dedx", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_dedx").data() << "qTot=" << mChargeTot[mCount - 1] << "qMax=" << mChargeMax[mCount - 1] @@ -189,9 +190,9 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint }) } -GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow, const GPUParam& GPUrestrict() param) +GPUdi() void GPUdEdx::fillSubThreshold(int32_t padRow) { - const int32_t roc = param.tpcGeometry.GetROC(padRow); + const int32_t roc = GPUTPCGeometry::GetROC(padRow); checkSubThresh(roc); mNSubThresh++; } diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 6fd70354c9486..0a780732273db 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -150,13 +150,13 @@ void GPUDisplay::DrawGLScene_updateEventData() float4* ptr = &mGlobalPos[cid]; if (mParam->par.earlyTpcTransform) { const auto& cl = mIOPtrs->clusterData[iSector][i]; - mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? mParam->tpcGeometry.Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); + mParam->Sector2Global(iSector, (mCfgH.clustersOnNominalRow ? GPUTPCGeometry::Row2X(row) : cl.x) + mCfgH.xAdd, cl.y, cl.z, &ptr->x, &ptr->y, &ptr->z); } else { float x, y, z; const auto& cln = mIOPtrs->clustersNative->clusters[iSector][0][i]; GPUTPCConvertImpl::convert(*mCalib->fastTransform, *mParam, iSector, row, cln.getPad(), cln.getTime(), x, y, z); if (mCfgH.clustersOnNominalRow) { - x = mParam->tpcGeometry.Row2X(row); + x = GPUTPCGeometry::Row2X(row); } mParam->Sector2Global(iSector, x + mCfgH.xAdd, y, z, &ptr->x, &ptr->y, &ptr->z); } diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 627cfc5f9909a..2e1bc1c5c64b2 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -222,7 +222,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) for (int32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) { // if( iRow>=50 ) break; //SG!!! - float xRow = param.tpcGeometry.Row2X(iRow); + float xRow = GPUTPCGeometry::Row2X(iRow); // transport to row int32_t err = 0; for (int32_t itry = 0; itry < 1; itry++) { From 9070674d026b2540656e18760bb3ed1f8517835b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 17:54:41 +0100 Subject: [PATCH 0240/1914] GPU: Improve memory usage debug printout --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 15 +++++++-------- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 8 ++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2bd4c0e937c20..b715b08f52b32 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -811,11 +811,9 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); } if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { - if (IsGPU()) { - printf("Allocated Device memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase), mDeviceMemoryPoolBlocked ? ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolBlocked) : 0); - } - printf("Allocated Host memory after %30s (%8s): %'13zd (non temporary %'13zd, blocked %'13zd)\n", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mHostMemoryPool, mHostMemoryBase), mHostMemoryPoolBlocked ? ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolBlocked) : 0); - printf("%16s", ""); + printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); + PrintMemoryOverview(); + printf("%76s", ""); PrintMemoryMax(); } mHostMemoryPoolEnd = std::get<0>(mNonPersistentMemoryStack.back()); @@ -888,9 +886,10 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { - printf("Memory Allocation: Host %'zd / %'zu (Permanent %'zd), Device %'zd / %'zu, (Permanent %'zd) %zu chunks\n", - ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), - ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), mMemoryResources.size()); + printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n", + ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), + ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), + mMemoryResources.size()); } } diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index f397fc51bd407..abf2d55c95db7 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -215,6 +215,10 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when starting processing %34s", ""); + PrintMemoryOverview(); + } mTimerTotal.Start(); const std::clock_t cpuTimerStart = std::clock(); if (mProcessingSettings.doublePipeline) { @@ -235,6 +239,10 @@ int32_t GPUReconstructionCPU::RunChains() } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; + if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + printf("Allocated memory when ending processing %36s", ""); + PrintMemoryOverview(); + } mStatWallTime = (mTimerTotal.GetElapsedTime() * 1000000. / mStatNEvents); std::string nEventReport; From 435b17ba0828727f3962032c4aeefd4e27891bbd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 18:02:01 +0100 Subject: [PATCH 0241/1914] GPU TPC: Add option to clear all non-external-output TPC memory at end of processing --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 3 +++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 4 insertions(+) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index abf2d55c95db7..a4074282da30f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -236,6 +236,9 @@ int32_t GPUReconstructionCPU::RunChains() return retVal; } } + if (GetProcessingSettings().tpcFreeAllocatedMemoryAfterProcessing) { + ClearAllocatedMemory(); + } } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b7f761c73ffc0..f7004d76c726c 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -326,6 +326,7 @@ AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the back AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code") AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") +AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingParam, param) From 1c4d839ae9281bc727ba91d12886b12cf1dfc4af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 17 Mar 2025 15:32:52 +0100 Subject: [PATCH 0242/1914] GPU: Remove bogus placement new leading to memory leak --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 -- GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx | 1 - GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index b715b08f52b32..35e44d99d5c0c 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -86,8 +86,6 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos mMaster = cfg.master; cfg.master->mSlaves.emplace_back(this); } - new (&mProcessingSettings) GPUSettingsProcessing; - new (&mGRPSettings) GPUSettingsGRP; param().SetDefaults(&mGRPSettings); mMemoryScalers.reset(new GPUMemorySizeScalers); for (uint32_t i = 0; i < NSECTORS; i++) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index c47bd488d96ef..aa01d26446b56 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -34,7 +34,6 @@ using namespace o2::gpu; GPUReconstruction* GPUReconstruction::CreateInstance(DeviceType type, bool forceType, GPUReconstruction* master) { GPUSettingsDeviceBackend cfg; - new (&cfg) GPUSettingsDeviceBackend; cfg.deviceType = type; cfg.forceDeviceType = forceType; cfg.master = master; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index f7004d76c726c..40a7fc71cbb4d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -607,7 +607,7 @@ EndConfig() // Derrived parameters used in GPUParam BeginHiddenConfig(GPUSettingsParam, param) -AddVariableRTC(dAlpha, float, 0.f) // angular size +AddVariableRTC(dAlpha, float, 0.f) // angular size AddVariableRTC(assumeConstantBz, int8_t, 0) // Assume a constant magnetic field AddVariableRTC(toyMCEventsFlag, int8_t, 0) // events were build with home-made event generator AddVariableRTC(continuousTracking, int8_t, 0) // Continuous tracking, estimate bz and errors for abs(z) = 125cm during seeding From 4060a20febe97646422c8e89b1abc1037aca0788 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 10 Mar 2025 17:01:04 +0530 Subject: [PATCH 0243/1914] explicitly add CCDB to some CCDB fatal error strings --- CCDB/include/CCDB/BasicCCDBManager.h | 2 +- Framework/CCDBSupport/src/CCDBHelpers.cxx | 6 +++--- Framework/Core/src/DataRefUtils.cxx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CCDB/include/CCDB/BasicCCDBManager.h b/CCDB/include/CCDB/BasicCCDBManager.h index 678bedf24e551..8af1817718fa2 100644 --- a/CCDB/include/CCDB/BasicCCDBManager.h +++ b/CCDB/include/CCDB/BasicCCDBManager.h @@ -333,7 +333,7 @@ T* CCDBManagerInstance::getSpecificForRun(std::string const& path, int runNumber auto [start, stop] = getRunDuration(runNumber); if (start < 0 || stop < 0) { if (mFatalWhenNull) { - reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber)); + reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber) + std::string(" from CCDB")); } return nullptr; } diff --git a/Framework/CCDBSupport/src/CCDBHelpers.cxx b/Framework/CCDBSupport/src/CCDBHelpers.cxx index 29d446403e1c8..0db4cbb5ac71d 100644 --- a/Framework/CCDBSupport/src/CCDBHelpers.cxx +++ b/Framework/CCDBSupport/src/CCDBHelpers.cxx @@ -219,7 +219,7 @@ auto populateCacheWith(std::shared_ptr const& helper, } else if (meta.defaultValue.get() == 2) { timestampToUse = std::stoi(dtc.runNumber); } else { - LOGP(fatal, "Undefined run-dependent option {} for spec {}/{}/{}", meta.defaultValue.get(), concrete.origin.as(), concrete.description.as(), int(concrete.subSpec)); + LOGP(fatal, "Undefined ccdb-run-dependent option {} for spec {}/{}/{}", meta.defaultValue.get(), concrete.origin.as(), concrete.description.as(), int(concrete.subSpec)); } } else if (isPrefix(ccdbMetadataPrefix, meta.name)) { std::string key = meta.name.substr(ccdbMetadataPrefix.size()); @@ -252,7 +252,7 @@ auto populateCacheWith(std::shared_ptr const& helper, LOGP(detail, "Loading {} for timestamp {}", path, timestampToUse); api.loadFileToMemory(v, path, metadata, timestampToUse, &headers, etag, helper->createdNotAfter, helper->createdNotBefore); if ((headers.count("Error") != 0) || (etag.empty() && v.empty())) { - LOGP(fatal, "Unable to find object {}/{}", path, timestampToUse); + LOGP(fatal, "Unable to find CCDB object {}/{}", path, timestampToUse); // FIXME: I should send a dummy message. continue; } @@ -394,7 +394,7 @@ AlgorithmSpec CCDBHelpers::fetchFromCCDB() helper->lastCheckedTFCounterOrbReset = timingInfo.tfCounter; api.loadFileToMemory(v, path, metadata, timingInfo.creation, &headers, etag, helper->createdNotAfter, helper->createdNotBefore); if ((headers.count("Error") != 0) || (etag.empty() && v.empty())) { - LOGP(fatal, "Unable to find object {}/{}", path, timingInfo.creation); + LOGP(fatal, "Unable to find CCDB object {}/{}", path, timingInfo.creation); // FIXME: I should send a dummy message. return; } diff --git a/Framework/Core/src/DataRefUtils.cxx b/Framework/Core/src/DataRefUtils.cxx index f092429d9c5a0..69eb1dc7faba6 100644 --- a/Framework/Core/src/DataRefUtils.cxx +++ b/Framework/Core/src/DataRefUtils.cxx @@ -94,7 +94,7 @@ void* DataRefUtils::decodeCCDB(DataRef const& ref, std::type_info const& tinfo) headerSize = *reinterpret_cast(buff + dh->payloadSize - Offset); } if (headerSize < 0) { - LOGP(fatal, "Anomalous flattened header size {} extracted", headerSize); + LOGP(fatal, "Anomalous flattened header size {} extracted for CCDB object {}/{}", headerSize, dh->dataOrigin.as(), dh->dataDescription.as()); } TMemFile memFile("name", const_cast(ref.payload), dh->payloadSize - headerSize, "READ"); gErrorIgnoreLevel = previousErrorLevel; @@ -128,7 +128,7 @@ std::map DataRefUtils::extractCCDBHeaders(DataRef cons } if (headerSize < 0) { - LOGP(fatal, "Anomalous flattened header size {} extracted", headerSize); + LOGP(fatal, "Anomalous flattened header size {} extracted for CCDB object {}/{}", headerSize, dh->dataOrigin.as(), dh->dataDescription.as()); } buff += dh->payloadSize - headerSize; // jump to the start of flattened header From caeafb5126c157792dbde64ca9310a86f7cd65b2 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 17 Mar 2025 10:53:31 +0100 Subject: [PATCH 0244/1914] itsresponse: remove incompatible arg in CMake --- Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt index ad26d9e658e16..381e4f4b54c01 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt @@ -29,7 +29,6 @@ endif() add_custom_command(TARGET O2exe-alpide-response-generator POST_BUILD COMMAND ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -i ${ITSRESPONSE_DIR}/response/AlpideResponseData/ -o ${CMAKE_CURRENT_BINARY_DIR}/ BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/AlpideResponseData.root - DEPENDS alpide-response-generator COMMENT "Generating AlpideResponseData.root" ) From 8c1e88b840a1910ff8a9138c6ed7bde87519a08c Mon Sep 17 00:00:00 2001 From: afurs Date: Sat, 15 Mar 2025 11:59:47 +0100 Subject: [PATCH 0245/1914] DataFormatsFIT: hotfix for LUT, excluded CCDB API from header --- .../Detectors/FIT/common/CMakeLists.txt | 1 + .../include/DataFormatsFIT/LookUpTable.h | 17 +++++------- .../Detectors/FIT/common/src/LookUpTable.cxx | 26 +++++++++++++++++++ 3 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 DataFormats/Detectors/FIT/common/src/LookUpTable.cxx diff --git a/DataFormats/Detectors/FIT/common/CMakeLists.txt b/DataFormats/Detectors/FIT/common/CMakeLists.txt index fc8d975a34023..61dbcabc7f087 100644 --- a/DataFormats/Detectors/FIT/common/CMakeLists.txt +++ b/DataFormats/Detectors/FIT/common/CMakeLists.txt @@ -13,6 +13,7 @@ o2_add_library(DataFormatsFIT SOURCES src/RawEventData.cxx src/Triggers.cxx src/RawDataMetric.cxx + src/LookUpTable.cxx PUBLIC_LINK_LIBRARIES O2::CommonDataFormat O2::DetectorsCommonDataFormats O2::CCDB) diff --git a/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h b/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h index 64ae3dc9653d0..aa4bb1fba8d41 100644 --- a/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h +++ b/DataFormats/Detectors/FIT/common/include/DataFormatsFIT/LookUpTable.h @@ -15,8 +15,9 @@ // Look Up Table FIT ////////////////////////////////////////////// -#include "CCDB/BasicCCDBManager.h" #include "DetectorsCommonDataFormats/DetID.h" +#include "CommonUtils/NameConf.h" +#include "Framework/Logger.h" #define BOOST_BIND_GLOBAL_PLACEHOLDERS #include #include @@ -158,8 +159,7 @@ enum class EModuleType : int { kUnknown, kTCM }; template , - typename MapEntryPM2ChannelID = std::unordered_map, - typename = typename std::enable_if_t::value>> + typename MapEntryPM2ChannelID = std::unordered_map> class LookupTableBase { public: @@ -174,7 +174,7 @@ class LookupTableBase typedef EntryPM_t Topo_t; // temporary for common interface LookupTableBase() = default; - LookupTableBase(const Table_t& vecEntryFEE) { initFromTable(vecEntryFEE); } + LookupTableBase(const Table_t* vecEntryFEE) { initFromTable(vecEntryFEE); } LookupTableBase(const std::string& pathToFile) { initFromFile(pathToFile); } LookupTableBase(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1) { initCCDB(urlCCDB, pathToStorageInCCDB, timestamp); } // Map of str module names -> enum types @@ -243,13 +243,7 @@ class LookupTableBase prepareEntriesFEE(filepath); prepareLUT(); } - void initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1) - { - auto& mgr = o2::ccdb::BasicCCDBManager::instance(); - mgr.setURL(urlCCDB); - mVecEntryFEE = *(mgr.getForTimeStamp(pathToStorageInCCDB, timestamp)); - prepareLUT(); - } + void initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp = -1); void initFromTable(const Table_t* vecEntryFEE) { mVecEntryFEE = *vecEntryFEE; @@ -419,6 +413,7 @@ class LookupTableBase Table_t mVecEntryFEE; MapEntryCRU2ModuleType_t mMapEntryCRU2ModuleType; MapEntryPM2ChannelID_t mMapEntryPM2ChannelID; + typedef std::enable_if_t::value> CheckChannelIDtype; // should be integral }; // Singleton for LookUpTable, coomon for all three FIT detectors diff --git a/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx b/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx new file mode 100644 index 0000000000000..73c0b1bf1bb9e --- /dev/null +++ b/DataFormats/Detectors/FIT/common/src/LookUpTable.cxx @@ -0,0 +1,26 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "DataFormatsFIT/LookUpTable.h" +#include "CCDB/BasicCCDBManager.h" +#include +using namespace o2::fit; +template +void LookupTableBase::initCCDB(const std::string& urlCCDB, const std::string& pathToStorageInCCDB, long timestamp) +{ + + auto& mgr = o2::ccdb::BasicCCDBManager::instance(); + mgr.setURL(urlCCDB); + mVecEntryFEE = *(mgr.getForTimeStamp::Table_t>(pathToStorageInCCDB, timestamp)); + prepareLUT(); +} +template class o2::fit::LookupTableBase, + std::unordered_map>; From 940e2e25ac2830b683ce0d5630b4370596b7a886 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 18 Mar 2025 10:48:12 +0100 Subject: [PATCH 0246/1914] Add empty skeleton for the TRK digitization (#13959) --- Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt | 3 +- .../ALICE3/TRK/simulation/CMakeLists.txt | 8 +- .../include/TRKSimulation/DPLDigitizerParam.h | 69 +++ .../include/TRKSimulation/Digitizer.h | 128 +++++ .../TRK/simulation/src/DPLDigitizerParam.cxx | 23 + .../ALICE3/TRK/simulation/src/Digitizer.cxx | 467 ++++++++++++++++++ .../TRK/simulation/src/TRKSimulationLinkDef.h | 6 + .../ALICE3/TRK/workflow/CMakeLists.txt | 32 ++ .../include/TRKWorkflow/DigitReaderSpec.h | 87 ++++ .../include/TRKWorkflow/DigitWriterSpec.h | 26 + .../TRK/workflow/src/DigitReaderSpec.cxx | 139 ++++++ .../TRK/workflow/src/DigitWriterSpec.cxx | 110 +++++ Steer/DigitizerWorkflow/CMakeLists.txt | 5 +- .../src/SimpleDigitizerWorkflow.cxx | 13 + .../src/TRKDigitizerSpec.cxx | 303 ++++++++++++ .../DigitizerWorkflow/src/TRKDigitizerSpec.h | 24 + 16 files changed, 1439 insertions(+), 4 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx create mode 100644 Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx create mode 100644 Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h diff --git a/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt index 83838a01d13f1..645e3149e4ab7 100644 --- a/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/CMakeLists.txt @@ -10,4 +10,5 @@ # or submit itself to any jurisdiction. add_subdirectory(base) -add_subdirectory(simulation) \ No newline at end of file +add_subdirectory(simulation) +add_subdirectory(workflow) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index c21b7b9aebbf6..856fd310fe5a2 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -12,16 +12,20 @@ o2_add_library(TRKSimulation SOURCES src/TRKLayer.cxx src/Detector.cxx + src/Digitizer.cxx src/TRKServices.cxx + src/DPLDigitizerParam.cxx src/TRKPetalCase.cxx src/TRKPetalLayer.cxx src/TRKPetalDisk.cxx PUBLIC_LINK_LIBRARIES O2::TRKBase O2::FT3Simulation - O2::ITSMFTSimulation) + O2::ITSMFTSimulation + O2::SimulationDataFormat) o2_target_root_dictionary(TRKSimulation - HEADERS include/TRKSimulation/Detector.h + HEADERS include/TRKSimulation/Digitizer.h + include/TRKSimulation/Detector.h include/TRKSimulation/TRKLayer.h include/TRKSimulation/TRKServices.h include/TRKSimulation/TRKPetalCase.h diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h new file mode 100644 index 0000000000000..59b3551ecbd32 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/DPLDigitizerParam.h @@ -0,0 +1,69 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_TRKDPLDIGITIZERPARAM_H_ +#define ALICEO2_TRKDPLDIGITIZERPARAM_H_ + +#include "DetectorsCommonDataFormats/DetID.h" +#include "CommonUtils/ConfigurableParam.h" +#include "CommonUtils/ConfigurableParamHelper.h" +#include + +namespace o2 +{ +namespace trk +{ +template +struct DPLDigitizerParam : public o2::conf::ConfigurableParamHelper> { + static_assert(N == o2::detectors::DetID::TRK || N == o2::detectors::DetID::FT3, "only DetID::TRK or DetID::FT3 are allowed"); + + static constexpr std::string_view getParamName() + { + return N == o2::detectors::DetID::TRK ? ParamName[0] : ParamName[1]; + } + + bool continuous = true; ///< flag for continuous simulation + float noisePerPixel = DEFNoisePerPixel(); ///< ALPIDE Noise per channel + float strobeFlatTop = 7500.; ///< strobe shape flat top + float strobeMaxRiseTime = 1100.; ///< strobe max rise time + float strobeQRiseTime0 = 450.; ///< q @ which strobe rise time is 0 + + double timeOffset = 0.; ///< time offset (in seconds!) to calculate ROFrame from hit time + int chargeThreshold = 150; ///< charge threshold in Nelectrons + int minChargeToAccount = 15; ///< minimum charge contribution to account + int nSimSteps = 7; ///< number of steps in response simulation + float energyToNElectrons = 1. / 3.6e-9; // conversion of eloss to Nelectrons + + float Vbb = 0.0; ///< back bias absolute value for MFT (in Volt) + float IBVbb = 0.0; ///< back bias absolute value for ITS Inner Barrel (in Volt) + float OBVbb = 0.0; ///< back bias absolute value for ITS Outter Barrel (in Volt) + + std::string noiseFilePath{}; ///< optional noise masks file path. FIXME to be removed once switch to CCDBFetcher + + // boilerplate stuff + make principal key + O2ParamDef(DPLDigitizerParam, getParamName().data()); + + private: + static constexpr float DEFNoisePerPixel() + { + return N == o2::detectors::DetID::TRK ? 1e-8 : 1e-8; // ITS/MFT values here!! + } + + static constexpr std::string_view ParamName[2] = {"TRKDigitizerParam", "FT3DigitizerParam"}; +}; + +template +DPLDigitizerParam DPLDigitizerParam::sInstance; + +} // namespace trk +} // namespace o2 + +#endif diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h new file mode 100644 index 0000000000000..6863c5392cae3 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/Digitizer.h @@ -0,0 +1,128 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file Digitizer.h +/// \brief Definition of the TRK digitizer +#ifndef ALICEO2_TRK_DIGITIZER_H +#define ALICEO2_TRK_DIGITIZER_H + +#include +#include +#include + +#include "Rtypes.h" // for Digitizer::Class +#include "TObject.h" // for TObject + +#include "ITSMFTSimulation/ChipDigitsContainer.h" +// #include "ITSMFTSimulation/AlpideSimResponse.h" +#include "ITSMFTSimulation/DigiParams.h" +#include "ITSMFTSimulation/Hit.h" +#include "TRKBase/GeometryTGeo.h" +// #include "ITS3Base/SegmentationSuperAlpide.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "CommonDataFormat/InteractionRecord.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#endif + +namespace o2::trk +{ + +class Digitizer : public TObject +{ + using ExtraDig = std::vector; ///< container for extra contributions to PreDigits + + public: + void setDigits(std::vector* dig) { mDigits = dig; } + void setMCLabels(o2::dataformats::MCTruthContainer* mclb) { mMCLabels = mclb; } + void setROFRecords(std::vector* rec) { mROFRecords = rec; } + + o2::itsmft::DigiParams& getParams() { return (o2::itsmft::DigiParams&)mParams; } + const o2::itsmft::DigiParams& getParams() const { return mParams; } + + void init(); + + /// Steer conversion of hits to digits + void process(const std::vector* hits, int evID, int srcID); + void setEventTime(const o2::InteractionTimeRecord& irt); + double getEndTimeOfROFMax() const + { + ///< return the time corresponding to end of the last reserved ROFrame : mROFrameMax + return mParams.getROFrameLength() * (mROFrameMax + 1) + mParams.getTimeOffset(); + } + + void setContinuous(bool v) { mParams.setContinuous(v); } + bool isContinuous() const { return mParams.isContinuous(); } + void fillOutputContainer(uint32_t maxFrame = 0xffffffff); + + void setDigiParams(const o2::itsmft::DigiParams& par) { mParams = par; } + const o2::itsmft::DigiParams& getDigitParams() const { return mParams; } + + // provide the common itsmft::GeometryTGeo to access matrices and segmentation + void setGeometry(const o2::trk::GeometryTGeo* gm) { mGeometry = gm; } + + uint32_t getEventROFrameMin() const { return mEventROFrameMin; } + uint32_t getEventROFrameMax() const { return mEventROFrameMax; } + void resetEventROFrames() + { + mEventROFrameMin = 0xffffffff; + mEventROFrameMax = 0; + } + + void setDeadChannelsMap(const o2::itsmft::NoiseMap* mp) { mDeadChanMap = mp; } + + private: + void processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID); + void registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl); + + ExtraDig* getExtraDigBuffer(uint32_t roFrame) + { + if (mROFrameMin > roFrame) { + return nullptr; // nothing to do + } + int ind = roFrame - mROFrameMin; + while (ind >= int(mExtraBuff.size())) { + mExtraBuff.emplace_back(std::make_unique()); + } + return mExtraBuff[ind].get(); + } + + static constexpr float sec2ns = 1e9; + + o2::itsmft::DigiParams mParams; ///< digitization parameters + o2::InteractionTimeRecord mEventTime; ///< global event time and interaction record + o2::InteractionRecord mIRFirstSampledTF; ///< IR of the 1st sampled IR, noise-only ROFs will be inserted till this IR only + double mCollisionTimeWrtROF{}; + uint32_t mROFrameMin = 0; ///< lowest RO frame of current digits + uint32_t mROFrameMax = 0; ///< highest RO frame of current digits + uint32_t mNewROFrame = 0; ///< ROFrame corresponding to provided time + + uint32_t mEventROFrameMin = 0xffffffff; ///< lowest RO frame for processed events (w/o automatic noise ROFs) + uint32_t mEventROFrameMax = 0; ///< highest RO frame forfor processed events (w/o automatic noise ROFs) + + o2::itsmft::AlpideSimResponse* mAlpSimResp = nullptr; // simulated response + + const o2::trk::GeometryTGeo* mGeometry = nullptr; ///< TRK geometry + + std::vector mChips; ///< Array of chips digits containers + std::deque> mExtraBuff; ///< burrer (per roFrame) for extra digits + + std::vector* mDigits = nullptr; //! output digits + std::vector* mROFRecords = nullptr; //! output ROF records + o2::dataformats::MCTruthContainer* mMCLabels = nullptr; //! output labels + + const o2::itsmft::NoiseMap* mDeadChanMap = nullptr; + + ClassDef(Digitizer, 1); +}; +} // namespace o2::trk \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx new file mode 100644 index 0000000000000..a13f2e58bd3a4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/DPLDigitizerParam.cxx @@ -0,0 +1,23 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKSimulation/DPLDigitizerParam.h" + +namespace o2 +{ +namespace trk +{ +// this makes sure that the constructor of the parameters is statically called +// so that these params are part of the parameter database +static auto& sDigitizerParamITS = o2::trk::DPLDigitizerParam::Instance(); +static auto& sDigitizerParamMFT = o2::trk::DPLDigitizerParam::Instance(); +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx new file mode 100644 index 0000000000000..21e6e629ec418 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/Digitizer.cxx @@ -0,0 +1,467 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file Digitizer.cxx + +#include "DataFormatsITSMFT/Digit.h" +// #include "ITSMFTBase/SegmentationAlpide.h" +#include "TRKSimulation/DPLDigitizerParam.h" +#include "TRKSimulation/Digitizer.h" +// #include "MathUtils/Cartesian.h" +// #include "SimulationDataFormat/MCTruthContainer.h" +// #include "DetectorsRaw/HBFUtils.h" + +// #include +// #include +// #include +// #include +#include // for LOG + +using o2::itsmft::Digit; +using o2::itsmft::Hit; +// using Segmentation = o2::itsmft::SegmentationAlpide; + +using namespace o2::trk; +// using namespace o2::base; + +//_______________________________________________________________________ +void Digitizer::init() +{ + // mNumberOfChips = mGeometry->getNumberOfChips(); + // mChips.resize(mNumberOfChips); + // for (int i = mNumberOfChips; i--;) { + // mChips[i].setChipIndex(i); + // if (mNoiseMap) { + // mChips[i].setNoiseMap(mNoiseMap); + // } + // if (mDeadChanMap) { + // mChips[i].disable(mDeadChanMap->isFullChipMasked(i)); + // mChips[i].setDeadChanMap(mDeadChanMap); + // } + // } + // initializing for both collection tables + /*for (int i = 0; i < 2; i++) { + mAlpSimResp[i].initData(i); + }*/ + + // importing the charge collection tables + // (initialized while building O2) + // auto file = TFile::Open(mResponseFile.data()); + // if (!file) { + // LOG(fatal) << "Cannot open response file " << mResponseFile; + // } + /*std::string response = "response"; + for (int i=0; i<2; i++) { + response.append(std::to_string(i)); + mAlpSimResp[i] = *(o2::itsmft::AlpideSimResponse*)file->Get(response.data()); + }*/ + // mAlpSimResp[0] = *(o2::itsmft::AlpideSimResponse*)file->Get("response0"); + // mAlpSimResp[1] = *(o2::itsmft::AlpideSimResponse*)file->Get("response1"); + + // importing the parameters from DPLDigitizerParam.h + auto& dOptTRK = DPLDigitizerParam::Instance(); + + LOGP(info, "TRK Digitizer is initalised."); +} + +// auto Digitizer::getChipResponse(int chipID) +// { +// if (mNumberOfChips < 10000) { // in MFT +// return mAlpSimRespMFT; +// } + +// if (chipID < 432) { // in ITS Inner Barrel +// return mAlpSimRespIB; +// } else { // in ITS Outter Barrel +// return mAlpSimRespOB; +// } +// } + +//_______________________________________________________________________ +void Digitizer::process(const std::vector* hits, int evID, int srcID) +{ + // digitize single event, the time must have been set beforehand + + // LOG(info) << "Digitizing " << mGeometry->getName() << " hits of entry " << evID << " from source " + // << srcID << " at time " << mEventTime << " ROFrame= " << mNewROFrame << ")" + // << " cont.mode: " << isContinuous() + // << " Min/Max ROFrames " << mROFrameMin << "/" << mROFrameMax; + + // // is there something to flush ? + // if (mNewROFrame > mROFrameMin) { + // fillOutputContainer(mNewROFrame - 1); // flush out all frame preceding the new one + // } + + // int nHits = hits->size(); + // std::vector hitIdx(nHits); + // std::iota(std::begin(hitIdx), std::end(hitIdx), 0); + // // sort hits to improve memory access + // std::sort(hitIdx.begin(), hitIdx.end(), + // [hits](auto lhs, auto rhs) { + // return (*hits)[lhs].GetDetectorID() < (*hits)[rhs].GetDetectorID(); + // }); + // for (int i : hitIdx) { + // processHit((*hits)[i], mROFrameMax, evID, srcID); + // } + // // in the triggered mode store digits after every MC event + // // TODO: in the real triggered mode this will not be needed, this is actually for the + // // single event processing only + // if (!mParams.isContinuous()) { + // fillOutputContainer(mROFrameMax); + // } +} + +//_______________________________________________________________________ +void Digitizer::setEventTime(const o2::InteractionTimeRecord& irt) +{ + // // assign event time in ns + // mEventTime = irt; + // if (!mParams.isContinuous()) { + // mROFrameMin = 0; // in triggered mode reset the frame counters + // mROFrameMax = 0; + // } + // // RO frame corresponding to provided time + // mCollisionTimeWrtROF = mEventTime.timeInBCNS; // in triggered mode the ROF starts at BC (is there a delay?) + // if (mParams.isContinuous()) { + // auto nbc = mEventTime.differenceInBC(mIRFirstSampledTF); + // if (mCollisionTimeWrtROF < 0 && nbc > 0) { + // nbc--; + // } + + // // we might get interactions to digitize from before + // // the first sampled IR + // if (nbc < 0) { + // mNewROFrame = 0; + // // this event is before the first RO + // mIsBeforeFirstRO = true; + // } else { + // mNewROFrame = nbc / mParams.getROFrameLengthInBC(); + // mIsBeforeFirstRO = false; + // } + // LOG(info) << " NewROFrame " << mNewROFrame << " nbc " << nbc; + + // // in continuous mode depends on starts of periodic readout frame + // mCollisionTimeWrtROF += (nbc % mParams.getROFrameLengthInBC()) * o2::constants::lhc::LHCBunchSpacingNS; + // } else { + // mNewROFrame = 0; + // } + + // if (mNewROFrame < mROFrameMin) { + // LOG(error) << "New ROFrame " << mNewROFrame << " (" << irt << ") precedes currently cashed " << mROFrameMin; + // throw std::runtime_error("deduced ROFrame precedes already processed one"); + // } + + // if (mParams.isContinuous() && mROFrameMax < mNewROFrame) { + // mROFrameMax = mNewROFrame - 1; // all frames up to this are finished + // } +} + +//_______________________________________________________________________ +void Digitizer::fillOutputContainer(uint32_t frameLast) +{ + // // fill output with digits from min.cached up to requested frame, generating the noise beforehand + // if (frameLast > mROFrameMax) { + // frameLast = mROFrameMax; + // } + // // make sure all buffers for extra digits are created up to the maxFrame + // getExtraDigBuffer(mROFrameMax); + + // LOG(info) << "Filling " << mGeometry->getName() << " digits output for RO frames " << mROFrameMin << ":" + // << frameLast; + + // o2::itsmft::ROFRecord rcROF; + + // // we have to write chips in RO increasing order, therefore have to loop over the frames here + // for (; mROFrameMin <= frameLast; mROFrameMin++) { + // rcROF.setROFrame(mROFrameMin); + // rcROF.setFirstEntry(mDigits->size()); // start of current ROF in digits + + // auto& extra = *(mExtraBuff.front().get()); + // for (auto& chip : mChips) { + // if (chip.isDisabled()) { + // continue; + // } + // chip.addNoise(mROFrameMin, mROFrameMin, &mParams); + // auto& buffer = chip.getPreDigits(); + // if (buffer.empty()) { + // continue; + // } + // auto itBeg = buffer.begin(); + // auto iter = itBeg; + // ULong64_t maxKey = chip.getOrderingKey(mROFrameMin + 1, 0, 0) - 1; // fetch digits with key below that + // for (; iter != buffer.end(); ++iter) { + // if (iter->first > maxKey) { + // break; // is the digit ROFrame from the key > the max requested frame + // } + // auto& preDig = iter->second; // preDigit + // if (preDig.charge >= mParams.getChargeThreshold()) { + // int digID = mDigits->size(); + // mDigits->emplace_back(chip.getChipIndex(), preDig.row, preDig.col, preDig.charge); + // mMCLabels->addElement(digID, preDig.labelRef.label); + // auto& nextRef = preDig.labelRef; // extra contributors are in extra array + // while (nextRef.next >= 0) { + // nextRef = extra[nextRef.next]; + // mMCLabels->addElement(digID, nextRef.label); + // } + // } + // } + // buffer.erase(itBeg, iter); + // } + // // finalize ROF record + // rcROF.setNEntries(mDigits->size() - rcROF.getFirstEntry()); // number of digits + // if (isContinuous()) { + // rcROF.getBCData().setFromLong(mIRFirstSampledTF.toLong() + mROFrameMin * mParams.getROFrameLengthInBC()); + // } else { + // rcROF.getBCData() = mEventTime; // RSTODO do we need to add trigger delay? + // } + // if (mROFRecords) { + // mROFRecords->push_back(rcROF); + // } + // extra.clear(); // clear container for extra digits of the mROFrameMin ROFrame + // // and move it as a new slot in the end + // mExtraBuff.emplace_back(mExtraBuff.front().release()); + // mExtraBuff.pop_front(); + // } +} + +//_______________________________________________________________________ +void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID) +{ + // // convert single hit to digits + // int chipID = hit.GetDetectorID(); + // auto& chip = mChips[chipID]; + // if (chip.isDisabled()) { + // LOG(debug) << "skip disabled chip " << chipID; + // return; + // } + // float timeInROF = hit.GetTime() * sec2ns; + // if (timeInROF > 20e3) { + // const int maxWarn = 10; + // static int warnNo = 0; + // if (warnNo < maxWarn) { + // LOG(warning) << "Ignoring hit with time_in_event = " << timeInROF << " ns" + // << ((++warnNo < maxWarn) ? "" : " (suppressing further warnings)"); + // } + // return; + // } + // if (isContinuous()) { + // timeInROF += mCollisionTimeWrtROF; + // } + // if (mIsBeforeFirstRO && timeInROF < 0) { + // // disregard this hit because it comes from an event before readout starts and it does not effect this RO + // return; + // } + + // // calculate RO Frame for this hit + // if (timeInROF < 0) { + // timeInROF = 0.; + // } + // float tTot = mParams.getSignalShape().getMaxDuration(); + // // frame of the hit signal start wrt event ROFrame + // int roFrameRel = int(timeInROF * mParams.getROFrameLengthInv()); + // // frame of the hit signal end wrt event ROFrame: in the triggered mode we read just 1 frame + // uint32_t roFrameRelMax = mParams.isContinuous() ? (timeInROF + tTot) * mParams.getROFrameLengthInv() : roFrameRel; + // int nFrames = roFrameRelMax + 1 - roFrameRel; + // uint32_t roFrameMax = mNewROFrame + roFrameRelMax; + // if (roFrameMax > maxFr) { + // maxFr = roFrameMax; // if signal extends beyond current maxFrame, increase the latter + // } + + // // here we start stepping in the depth of the sensor to generate charge diffusion + // float nStepsInv = mParams.getNSimStepsInv(); + // int nSteps = mParams.getNSimSteps(); + // const auto& matrix = mGeometry->getMatrixL2G(hit.GetDetectorID()); + // math_utils::Vector3D xyzLocS(matrix ^ (hit.GetPosStart())); // start position in sensor frame + // math_utils::Vector3D xyzLocE(matrix ^ (hit.GetPos())); // end position in sensor frame + + // math_utils::Vector3D step(xyzLocE); + // step -= xyzLocS; + // step *= nStepsInv; // position increment at each step + // // the electrons will injected in the middle of each step + // math_utils::Vector3D stepH(step * 0.5); + // xyzLocS += stepH; + // xyzLocE -= stepH; + + // int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; + // // get entrance pixel row and col + // while (!Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + // if (++nSkip >= nSteps) { + // return; // did not enter to sensitive matrix + // } + // xyzLocS += step; + // } + // // get exit pixel row and col + // while (!Segmentation::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + // if (++nSkip >= nSteps) { + // return; // did not enter to sensitive matrix + // } + // xyzLocE -= step; + // } + // // estimate the limiting min/max row and col where the non-0 response is possible + // if (rowS > rowE) { + // std::swap(rowS, rowE); + // } + // if (colS > colE) { + // std::swap(colS, colE); + // } + // rowS -= AlpideRespSimMat::NPix / 2; + // rowE += AlpideRespSimMat::NPix / 2; + // if (rowS < 0) { + // rowS = 0; + // } + // if (rowE >= Segmentation::NRows) { + // rowE = Segmentation::NRows - 1; + // } + // colS -= AlpideRespSimMat::NPix / 2; + // colE += AlpideRespSimMat::NPix / 2; + // if (colS < 0) { + // colS = 0; + // } + // if (colE >= Segmentation::NCols) { + // colE = Segmentation::NCols - 1; + // } + // int rowSpan = rowE - rowS + 1, colSpan = colE - colS + 1; // size of plaquet where some response is expected + + // float respMatrix[rowSpan][colSpan]; // response accumulated here + // std::fill(&respMatrix[0][0], &respMatrix[0][0] + rowSpan * colSpan, 0.f); + + // float nElectrons = hit.GetEnergyLoss() * mParams.getEnergyToNElectrons(); // total number of deposited electrons + // nElectrons *= nStepsInv; // N electrons injected per step + // if (nSkip) { + // nSteps -= nSkip; + // } + // // + // int rowPrev = -1, colPrev = -1, row, col; + // float cRowPix = 0.f, cColPix = 0.f; // local coordinated of the current pixel center + + // const o2::itsmft::AlpideSimResponse* resp = getChipResponse(chipID); + + // // take into account that the AlpideSimResponse depth defintion has different min/max boundaries + // // although the max should coincide with the surface of the epitaxial layer, which in the chip + // // local coordinates has Y = +SensorLayerThickness/2 + + // xyzLocS.SetY(xyzLocS.Y() + resp->getDepthMax() - Segmentation::SensorLayerThickness / 2.); + + // // collect charge in every pixel which might be affected by the hit + // for (int iStep = nSteps; iStep--;) { + // // Get the pixel ID + // Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + // if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center + // if (!Segmentation::detectorToLocal(row, col, cRowPix, cColPix)) { + // continue; // should not happen + // } + // rowPrev = row; + // colPrev = col; + // } + // bool flipCol, flipRow; + // // note that response needs coordinates along column row (locX) (locZ) then depth (locY) + // auto rspmat = resp->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol); + + // xyzLocS += step; + // if (!rspmat) { + // continue; + // } + + // for (int irow = AlpideRespSimMat::NPix; irow--;) { + // int rowDest = row + irow - AlpideRespSimMat::NPix / 2 - rowS; // destination row in the respMatrix + // if (rowDest < 0 || rowDest >= rowSpan) { + // continue; + // } + // for (int icol = AlpideRespSimMat::NPix; icol--;) { + // int colDest = col + icol - AlpideRespSimMat::NPix / 2 - colS; // destination column in the respMatrix + // if (colDest < 0 || colDest >= colSpan) { + // continue; + // } + // respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, flipRow, flipCol); + // } + // } + // } + + // // fire the pixels assuming Poisson(n_response_electrons) + // o2::MCCompLabel lbl(hit.GetTrackID(), evID, srcID, false); + // auto roFrameAbs = mNewROFrame + roFrameRel; + // for (int irow = rowSpan; irow--;) { + // uint16_t rowIS = irow + rowS; + // for (int icol = colSpan; icol--;) { + // float nEleResp = respMatrix[irow][icol]; + // if (!nEleResp) { + // continue; + // } + // int nEle = gRandom->Poisson(nElectrons * nEleResp); // total charge in given pixel + // // ignore charge which have no chance to fire the pixel + // if (nEle < mParams.getMinChargeToAccount()) { + // continue; + // } + // uint16_t colIS = icol + colS; + // if (mNoiseMap && mNoiseMap->isNoisy(chipID, rowIS, colIS)) { + // continue; + // } + // if (mDeadChanMap && mDeadChanMap->isNoisy(chipID, rowIS, colIS)) { + // continue; + // } + // // + // registerDigits(chip, roFrameAbs, timeInROF, nFrames, rowIS, colIS, nEle, lbl); + // } + // } +} + +//________________________________________________________________________________ +void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl) +{ + // Register digits for given pixel, accounting for the possible signal contribution to + // multiple ROFrame. The signal starts at time tInROF wrt the start of provided roFrame + // In every ROFrame we check the collected signal during strobe + + // float tStrobe = mParams.getStrobeDelay() - tInROF; // strobe start wrt signal start + // for (int i = 0; i < nROF; i++) { + // uint32_t roFr = roFrame + i; + // int nEleROF = mParams.getSignalShape().getCollectedCharge(nEle, tStrobe, tStrobe + mParams.getStrobeLength()); + // tStrobe += mParams.getROFrameLength(); // for the next ROF + + // // discard too small contributions, they have no chance to produce a digit + // if (nEleROF < mParams.getMinChargeToAccount()) { + // continue; + // } + // if (roFr > mEventROFrameMax) { + // mEventROFrameMax = roFr; + // } + // if (roFr < mEventROFrameMin) { + // mEventROFrameMin = roFr; + // } + // auto key = chip.getOrderingKey(roFr, row, col); + // PreDigit* pd = chip.findDigit(key); + // if (!pd) { + // chip.addDigit(key, roFr, row, col, nEleROF, lbl); + // } else { // there is already a digit at this slot, account as PreDigitExtra contribution + // pd->charge += nEleROF; + // if (pd->labelRef.label == lbl) { // don't store the same label twice + // continue; + // } + // ExtraDig* extra = getExtraDigBuffer(roFr); + // int& nxt = pd->labelRef.next; + // bool skip = false; + // while (nxt >= 0) { + // if ((*extra)[nxt].label == lbl) { // don't store the same label twice + // skip = true; + // break; + // } + // nxt = (*extra)[nxt].next; + // } + // if (skip) { + // continue; + // } + // // new predigit will be added in the end of the chain + // nxt = extra->size(); + // extra->emplace_back(lbl); + // } + // } +} diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index b82d8879e7dad..876810b5bef9d 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -22,5 +22,11 @@ #pragma link C++ class o2::trk::TRKServices + ; #pragma link C++ class o2::trk::Detector + ; #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; +#pragma link C++ class o2::trk::Digitizer + ; + +// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; +// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; +// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; +// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; #endif diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt new file mode 100644 index 0000000000000..c9f4099017717 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt @@ -0,0 +1,32 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +o2_add_library(TRKWorkflow + SOURCES src/DigitReaderSpec.cxx + src/DigitWriterSpec.cxx + # src/RecoWorkflow.cxx + # src/ClusterWriterWorkflow.cxx + # src/ClustererSpec.cxx + # src/ClusterWriterSpec.cxx + # src/TrackerSpec.cxx + # src/TrackWriterSpec.cxx + # src/TrackReaderSpec.cxx + # src/VertexReaderSpec.cxx + PUBLIC_LINK_LIBRARIES O2::Framework + O2::SimConfig + O2::DataFormatsITSMFT + O2::SimulationDataFormat + O2::DPLUtils) + +# o2_add_executable(reco-workflow +# SOURCES src/trk-reco-workflow.cxx +# COMPONENT_NAME alice3-trk +# PUBLIC_LINK_LIBRARIES O2::TRKWorkflow) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h new file mode 100644 index 0000000000000..2a0acd792f4a9 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitReaderSpec.h @@ -0,0 +1,87 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_TRK_DIGITREADER +#define O2_TRK_DIGITREADER + +#include "TFile.h" +#include "TTree.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/GBTCalibData.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "Framework/DataProcessorSpec.h" +#include "Framework/Task.h" +#include "Headers/DataHeader.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "DetectorsCommonDataFormats/DetID.h" + +using namespace o2::framework; + +namespace o2 +{ +namespace trk +{ + +class DigitReader : public Task +{ + public: + DigitReader() = delete; + DigitReader(o2::detectors::DetID id, bool useMC, bool useCalib); + ~DigitReader() override = default; + void init(InitContext& ic) final; + void run(ProcessingContext& pc) final; + + protected: + void connectTree(const std::string& filename); + + std::vector mDigits, *mDigitsPtr = &mDigits; + std::vector mCalib, *mCalibPtr = &mCalib; + std::vector mDigROFRec, *mDigROFRecPtr = &mDigROFRec; + std::vector mDigMC2ROFs, *mDigMC2ROFsPtr = &mDigMC2ROFs; + + o2::header::DataOrigin mOrigin = o2::header::gDataOriginInvalid; + + std::unique_ptr mFile; + std::unique_ptr mTree; + + bool mUseMC = true; // use MC truth + bool mUseCalib = true; // send calib data + + std::string mDetName = ""; + std::string mDetNameLC = ""; + std::string mFileName = ""; + std::string mDigTreeName = "o2sim"; + std::string mDigitBranchName = "Digit"; + std::string mDigROFBranchName = "DigitROF"; + std::string mCalibBranchName = "Calib"; + + std::string mDigtMCTruthBranchName = "DigitMCTruth"; + std::string mDigtMC2ROFBranchName = "DigitMC2ROF"; +}; + +class TRKDigitReader : public DigitReader +{ + public: + TRKDigitReader(bool useMC = true, bool useCalib = false) + : DigitReader(o2::detectors::DetID::TRK, useMC, useCalib) + { + mOrigin = o2::header::gDataOriginTRK; + } +}; + +/// create a processor spec +/// read ITS/MFT Digit data from a root file +framework::DataProcessorSpec getTRKDigitReaderSpec(bool useMC = true, bool useCalib = false, std::string defname = "trkdigits.root"); + +} // namespace trk +} // namespace o2 + +#endif /* O2_TRK_DigitREADER */ diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h new file mode 100644 index 0000000000000..9c37d4318bb0f --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/DigitWriterSpec.h @@ -0,0 +1,26 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef STEER_TRKDIGITWRITER_H_ +#define STEER_TRKDIGITWRITER_H_ + +#include "Framework/DataProcessorSpec.h" + +namespace o2 +{ +namespace trk +{ + +o2::framework::DataProcessorSpec getTRKDigitWriterSpec(bool mctruth = true, bool dec = false, bool calib = false); +} // namespace trk +} // end namespace o2 + +#endif /* STEER_TRKDIGITWRITER_H_ */ diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx new file mode 100644 index 0000000000000..09bb1f12a48e4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitReaderSpec.cxx @@ -0,0 +1,139 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include + +#include "TTree.h" + +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/Logger.h" +#include "TRKWorkflow/DigitReaderSpec.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" +#include + +using namespace o2::framework; +using namespace o2::itsmft; + +namespace o2 +{ +namespace trk +{ + +DigitReader::DigitReader(o2::detectors::DetID id, bool useMC, bool useCalib) +{ + assert(id == o2::detectors::DetID::TRK); + mDetNameLC = mDetName = id.getName(); + mDigTreeName = "o2sim"; + + mDigitBranchName = mDetName + mDigitBranchName; + mDigROFBranchName = mDetName + mDigROFBranchName; + mCalibBranchName = mDetName + mCalibBranchName; + + mDigtMCTruthBranchName = mDetName + mDigtMCTruthBranchName; + mDigtMC2ROFBranchName = mDetName + mDigtMC2ROFBranchName; + + mUseMC = useMC; + mUseCalib = useCalib; + std::transform(mDetNameLC.begin(), mDetNameLC.end(), mDetNameLC.begin(), ::tolower); +} + +void DigitReader::init(InitContext& ic) +{ + mFileName = ic.options().get((mDetNameLC + "-digit-infile").c_str()); + connectTree(mFileName); +} + +void DigitReader::run(ProcessingContext& pc) +{ + auto ent = mTree->GetReadEntry() + 1; + assert(ent < mTree->GetEntries()); // this should not happen + + o2::dataformats::IOMCTruthContainerView* plabels = nullptr; + if (mUseMC) { + mTree->SetBranchAddress(mDigtMCTruthBranchName.c_str(), &plabels); + } + mTree->GetEntry(ent); + LOG(info) << mDetName << "DigitReader pushes " << mDigROFRec.size() << " ROFRecords, " + << mDigits.size() << " digits at entry " << ent; + + // This is a very ugly way of providing DataDescription, which anyway does not need to contain detector name. + // To be fixed once the names-definition class is ready + pc.outputs().snapshot(Output{mOrigin, "DIGITSROF", 0}, mDigROFRec); + pc.outputs().snapshot(Output{mOrigin, "DIGITS", 0}, mDigits); + if (mUseCalib) { + pc.outputs().snapshot(Output{mOrigin, "GBTCALIB", 0}, mCalib); + } + + if (mUseMC) { + auto& sharedlabels = pc.outputs().make>(Output{mOrigin, "DIGITSMCTR", 0}); + plabels->copyandflatten(sharedlabels); + delete plabels; + pc.outputs().snapshot(Output{mOrigin, "DIGITSMC2ROF", 0}, mDigMC2ROFs); + } + + if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { + pc.services().get().endOfStream(); + pc.services().get().readyToQuit(QuitRequest::Me); + } +} + +void DigitReader::connectTree(const std::string& filename) +{ + mTree.reset(nullptr); // in case it was already loaded + mFile.reset(TFile::Open(filename.c_str())); + assert(mFile && !mFile->IsZombie()); + mTree.reset((TTree*)mFile->Get(mDigTreeName.c_str())); + assert(mTree); + + mTree->SetBranchAddress(mDigROFBranchName.c_str(), &mDigROFRecPtr); + mTree->SetBranchAddress(mDigitBranchName.c_str(), &mDigitsPtr); + if (mUseCalib) { + if (!mTree->GetBranch(mCalibBranchName.c_str())) { + throw std::runtime_error("GBT calibration data requested but not found in the tree"); + } + mTree->SetBranchAddress(mCalibBranchName.c_str(), &mCalibPtr); + } + if (mUseMC) { + if (!mTree->GetBranch(mDigtMC2ROFBranchName.c_str()) || !mTree->GetBranch(mDigtMCTruthBranchName.c_str())) { + throw std::runtime_error("MC data requested but not found in the tree"); + } + mTree->SetBranchAddress(mDigtMC2ROFBranchName.c_str(), &mDigMC2ROFsPtr); + } + LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; +} + +DataProcessorSpec getTRKDigitReaderSpec(bool useMC, bool useCalib, std::string defname) +{ + std::vector outputSpec; + outputSpec.emplace_back("TRK", "DIGITS", 0, Lifetime::Timeframe); + outputSpec.emplace_back("TRK", "DIGITSROF", 0, Lifetime::Timeframe); + if (useCalib) { + outputSpec.emplace_back("TRK", "GBTCALIB", 0, Lifetime::Timeframe); + } + if (useMC) { + outputSpec.emplace_back("TRK", "DIGITSMCTR", 0, Lifetime::Timeframe); + outputSpec.emplace_back("TRK", "DIGITSMC2ROF", 0, Lifetime::Timeframe); + } + + return DataProcessorSpec{ + "trk-digit-reader", + Inputs{}, + outputSpec, + AlgorithmSpec{adaptFromTask(useMC, useCalib)}, + Options{ + {"trk-digit-infile", VariantType::String, defname, {"Name of the input digit file"}}}}; +} + +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx new file mode 100644 index 0000000000000..2a743551adddb --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/DigitWriterSpec.cxx @@ -0,0 +1,110 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @brief Processor spec for a ROOT file writer for ITSMFT digits + +#include "TRKWorkflow/DigitWriterSpec.h" +#include "DPLUtils/MakeRootTreeWriterSpec.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/GBTCalibData.h" +#include "Headers/DataHeader.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include +#include +#include + +using namespace o2::framework; +using SubSpecificationType = o2::framework::DataAllocator::SubSpecificationType; + +namespace o2 +{ +namespace trk +{ + +template +using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; +using MCCont = o2::dataformats::ConstMCTruthContainer; + +/// create the processor spec +/// describing a processor receiving digits for ITS/MFT and writing them to file +DataProcessorSpec getDigitWriterSpec(bool mctruth, bool dec, bool calib, o2::header::DataOrigin detOrig, o2::detectors::DetID detId) +{ + std::string detStr = o2::detectors::DetID::getName(detId); + std::string detStrL = dec ? "o2_" : ""; // for decoded digits prepend by o2 + detStrL += detStr; + std::transform(detStrL.begin(), detStrL.end(), detStrL.begin(), ::tolower); + auto logger = [](std::vector const& inDigits) { + LOG(info) << "RECEIVED DIGITS SIZE " << inDigits.size(); + }; + + // the callback to be set as hook for custom action when the writer is closed + auto finishWriting = [](TFile* outputfile, TTree* outputtree) { + const auto* brArr = outputtree->GetListOfBranches(); + int64_t nent = 0; + for (const auto* brc : *brArr) { + int64_t n = ((const TBranch*)brc)->GetEntries(); + if (nent && (nent != n)) { + LOG(error) << "Branches have different number of entries"; + } + nent = n; + } + outputtree->SetEntries(nent); + outputtree->Write("", TObject::kOverwrite); + outputfile->Close(); + }; + + // handler for labels + // This is necessary since we can't store the original label buffer in a ROOT entry -- as is -- if it exceeds a certain size. + // We therefore convert it to a special split class. + auto fillLabels = [](TBranch& branch, std::vector const& labelbuffer, DataRef const& /*ref*/) { + o2::dataformats::ConstMCTruthContainerView labels(labelbuffer); + LOG(info) << "WRITING " << labels.getNElements() << " LABELS "; + + o2::dataformats::IOMCTruthContainerView outputcontainer; + auto ptr = &outputcontainer; + auto br = framework::RootTreeWriter::remapBranch(branch, &ptr); + outputcontainer.adopt(labelbuffer); + br->Fill(); + br->ResetAddress(); + }; + + return MakeRootTreeWriterSpec((detStr + "DigitWriter" + (dec ? "_dec" : "")).c_str(), + (detStrL + "digits.root").c_str(), + MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Digits tree"}, + MakeRootTreeWriterSpec::CustomClose(finishWriting), + // in case of labels we first read them as std::vector and process them correctly in the fillLabels hook + BranchDefinition>{InputSpec{"digitsMCTR", detOrig, "DIGITSMCTR", 0}, + (detStr + "DigitMCTruth").c_str(), + (mctruth ? 1 : 0), fillLabels}, + BranchDefinition>{InputSpec{"digitsMC2ROF", detOrig, "DIGITSMC2ROF", 0}, + (detStr + "DigitMC2ROF").c_str(), + (mctruth ? 1 : 0)}, + BranchDefinition>{InputSpec{"digits", detOrig, "DIGITS", 0}, + (detStr + "Digit").c_str(), + logger}, + BranchDefinition>{InputSpec{"calib", detOrig, "GBTCALIB", 0}, + (detStr + "Calib").c_str(), + (calib ? 1 : 0)}, + BranchDefinition>{InputSpec{"digitsROF", detOrig, "DIGITSROF", 0}, + (detStr + "DigitROF").c_str()})(); +} + +DataProcessorSpec getTRKDigitWriterSpec(bool mctruth, bool dec, bool calib) +{ + return getDigitWriterSpec(mctruth, dec, calib, o2::header::gDataOriginTRK, o2::detectors::DetID::TRK); +} + +} // end namespace trk +} // end namespace o2 diff --git a/Steer/DigitizerWorkflow/CMakeLists.txt b/Steer/DigitizerWorkflow/CMakeLists.txt index 1b839ba462b63..babc5fce4d864 100644 --- a/Steer/DigitizerWorkflow/CMakeLists.txt +++ b/Steer/DigitizerWorkflow/CMakeLists.txt @@ -29,6 +29,7 @@ o2_add_executable(digitizer-workflow src/ZDCDigitizerSpec.cxx src/TOFDigitizerSpec.cxx $<$:src/ITS3DigitizerSpec.cxx> + $<$:src/TRKDigitizerSpec.cxx> PUBLIC_LINK_LIBRARIES O2::Framework O2::Steer O2::CommonConstants @@ -67,7 +68,9 @@ o2_add_executable(digitizer-workflow O2::DetectorsRaw $<$:O2::ITS3Simulation> $<$:O2::ITS3Workflow> - $<$:O2::ITS3Align>) + $<$:O2::ITS3Align> + $<$:O2::TRKSimulation> + $<$:O2::TRKWorkflow>) o2_add_executable(mctruth-testworkflow diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index a30294a240fb0..75141425f7c49 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -44,6 +44,10 @@ // for ITS3 #include "ITS3DigitizerSpec.h" #include "ITS3Workflow/DigitWriterSpec.h" + +// for alice 3 TRK +#include "TRKDigitizerSpec.h" +#include "TRKWorkflow/DigitWriterSpec.h" #endif // for TOF @@ -632,6 +636,15 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) // // connect ITS digit writer specs.emplace_back(o2::its3::getITS3DigitWriterSpec(mctruth)); } + + // the ALICE 3 TRK part + if (isEnabled(o2::detectors::DetID::TRK)) { + detList.emplace_back(o2::detectors::DetID::TRK); + // connect the ALICE 3 TRK digitization + specs.emplace_back(o2::trk::getTRKDigitizerSpec(fanoutsize++, mctruth)); + // connect the ALICE 3 TRK digit writer + specs.emplace_back(o2::trk::getTRKDigitWriterSpec(mctruth)); + } #endif // the MFT part diff --git a/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx new file mode 100644 index 0000000000000..f35b53a58fae4 --- /dev/null +++ b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.cxx @@ -0,0 +1,303 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKDigitizerSpec.h" +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/CCDBParamSpec.h" +#include "Framework/DataProcessorSpec.h" +#include "Framework/DataRefUtils.h" +#include "Framework/Lifetime.h" +#include "Framework/Task.h" +#include "Steer/HitProcessingManager.h" +#include "DataFormatsITSMFT/Digit.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "DetectorsBase/BaseDPLDigitizer.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/SimTraits.h" +#include "DataFormatsParameters/GRPObject.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "TRKSimulation/Digitizer.h" +#include "TRKSimulation/DPLDigitizerParam.h" +#include "ITSMFTBase/DPLAlpideParam.h" +#include "TRKBase/GeometryTGeo.h" +#include "TRKBase/TRKBaseParam.h" + +#include +#include + +#include + +using namespace o2::framework; +using SubSpecificationType = o2::framework::DataAllocator::SubSpecificationType; + +namespace +{ +std::vector makeOutChannels(o2::header::DataOrigin detOrig, bool mctruth) +{ + std::vector outputs; + outputs.emplace_back(detOrig, "DIGITS", 0, Lifetime::Timeframe); + outputs.emplace_back(detOrig, "DIGITSROF", 0, Lifetime::Timeframe); + if (mctruth) { + outputs.emplace_back(detOrig, "DIGITSMC2ROF", 0, Lifetime::Timeframe); + outputs.emplace_back(detOrig, "DIGITSMCTR", 0, Lifetime::Timeframe); + } + outputs.emplace_back(detOrig, "ROMode", 0, Lifetime::Timeframe); + return outputs; +} +} // namespace + +namespace o2::trk +{ +using namespace o2::base; +class TRKDPLDigitizerTask : BaseDPLDigitizer +{ + public: + using BaseDPLDigitizer::init; + + TRKDPLDigitizerTask(bool mctruth = true) : BaseDPLDigitizer(InitServices::FIELD | InitServices::GEOM), mWithMCTruth(mctruth) {} + + void initDigitizerTask(framework::InitContext& ic) override + { + mDisableQED = ic.options().get("disable-qed"); + } + + void run(framework::ProcessingContext& pc) + { + if (mFinished) { + return; + } + updateTimeDependentParams(pc); + + // read collision context from input + auto context = pc.inputs().get("collisioncontext"); + context->initSimChains(mID, mSimChains); + const bool withQED = context->isQEDProvided() && !mDisableQED; + auto& timesview = context->getEventRecords(withQED); + LOG(info) << "GOT " << timesview.size() << " COLLISION TIMES"; + LOG(info) << "SIMCHAINS " << mSimChains.size(); + + // if there is nothing to do ... return + if (timesview.empty()) { + return; + } + TStopwatch timer; + timer.Start(); + LOG(info) << " CALLING TRK DIGITIZATION "; + + // mDigitizer.setDigits(&mDigits); + mDigitizer.setROFRecords(&mROFRecords); + mDigitizer.setMCLabels(&mLabels); + + // digits are directly put into DPL owned resource + auto& digitsAccum = pc.outputs().make>(Output{mOrigin, "DIGITS", 0}); + + auto accumulate = [this, &digitsAccum]() { + // accumulate result of single event processing, called after processing every event supplied + // AND after the final flushing via digitizer::fillOutputContainer + if (mDigits.empty()) { + return; // no digits were flushed, nothing to accumulate + } + auto ndigAcc = digitsAccum.size(); + std::copy(mDigits.begin(), mDigits.end(), std::back_inserter(digitsAccum)); + + // fix ROFrecords references on ROF entries + auto nROFRecsOld = mROFRecordsAccum.size(); + + for (int i = 0; i < mROFRecords.size(); i++) { + auto& rof = mROFRecords[i]; + rof.setFirstEntry(ndigAcc + rof.getFirstEntry()); + rof.print(); + + if (mFixMC2ROF < mMC2ROFRecordsAccum.size()) { // fix ROFRecord entry in MC2ROF records + for (int m2rid = mFixMC2ROF; m2rid < mMC2ROFRecordsAccum.size(); m2rid++) { + // need to register the ROFRecors entry for MC event starting from this entry + auto& mc2rof = mMC2ROFRecordsAccum[m2rid]; + if (rof.getROFrame() == mc2rof.minROF) { + mFixMC2ROF++; + mc2rof.rofRecordID = nROFRecsOld + i; + mc2rof.print(); + } + } + } + } + + std::copy(mROFRecords.begin(), mROFRecords.end(), std::back_inserter(mROFRecordsAccum)); + if (mWithMCTruth) { + mLabelsAccum.mergeAtBack(mLabels); + } + LOG(info) << "Added " << mDigits.size() << " digits "; + // clean containers from already accumulated stuff + mLabels.clear(); + mDigits.clear(); + mROFRecords.clear(); + }; // and accumulate lambda + + auto& eventParts = context->getEventParts(withQED); + // loop over all composite collisions given from context (aka loop over all the interaction records) + const int bcShift = mDigitizer.getParams().getROFrameBiasInBC(); + // loop over all composite collisions given from context (aka loop over all the interaction records) + for (size_t collID = 0; collID < timesview.size(); ++collID) { + auto irt = timesview[collID]; + if (irt.toLong() < bcShift) { // due to the ROF misalignment the collision would go to negative ROF ID, discard + continue; + } + irt -= bcShift; // account for the ROF start shift + + mDigitizer.setEventTime(irt); + mDigitizer.resetEventROFrames(); // to estimate min/max ROF for this collID + // for each collision, loop over the constituents event and source IDs + // (background signal merging is basically taking place here) + for (auto& part : eventParts[collID]) { + + // get the hits for this event and this source + mHits.clear(); + context->retrieveHits(mSimChains, o2::detectors::SimTraits::DETECTORBRANCHNAMES[mID][0].c_str(), part.sourceID, part.entryID, &mHits); + + if (!mHits.empty()) { + LOG(debug) << "For collision " << collID << " eventID " << part.entryID + << " found " << mHits.size() << " hits "; + mDigitizer.process(&mHits, part.entryID, part.sourceID); // call actual digitization procedure + } + } + mMC2ROFRecordsAccum.emplace_back(collID, -1, mDigitizer.getEventROFrameMin(), mDigitizer.getEventROFrameMax()); + accumulate(); + } + mDigitizer.fillOutputContainer(); + accumulate(); + + // here we have all digits and labels and we can send them to consumer (aka snapshot it onto output) + + pc.outputs().snapshot(Output{mOrigin, "DIGITSROF", 0}, mROFRecordsAccum); + if (mWithMCTruth) { + pc.outputs().snapshot(Output{mOrigin, "DIGITSMC2ROF", 0}, mMC2ROFRecordsAccum); + auto& sharedlabels = pc.outputs().make>(Output{mOrigin, "DIGITSMCTR", 0}); + mLabelsAccum.flatten_to(sharedlabels); + // free space of existing label containers + mLabels.clear_andfreememory(); + mLabelsAccum.clear_andfreememory(); + } + LOG(info) << mID.getName() << ": Sending ROMode= " << mROMode << " to GRPUpdater"; + pc.outputs().snapshot(Output{mOrigin, "ROMode", 0}, mROMode); + + timer.Stop(); + LOG(info) << "Digitization took " << timer.CpuTime() << "s"; + + // we should be only called once; tell DPL that this process is ready to exit + pc.services().get().readyToQuit(QuitRequest::Me); + + mFinished = true; + } + + void updateTimeDependentParams(ProcessingContext& pc) + { + static bool initOnce{false}; + if (!initOnce) { + initOnce = true; + auto& digipar = mDigitizer.getParams(); + + // configure digitizer + o2::trk::GeometryTGeo* geom = o2::trk::GeometryTGeo::Instance(); + geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::L2G)); // make sure L2G matrices are loaded + mDigitizer.setGeometry(geom); + + const auto& dopt = o2::trk::DPLDigitizerParam::Instance(); + pc.inputs().get*>("ITS_alppar"); + const auto& aopt = o2::itsmft::DPLAlpideParam::Instance(); + digipar.setContinuous(dopt.continuous); + digipar.setROFrameBiasInBC(aopt.roFrameBiasInBC); + if (dopt.continuous) { + auto frameNS = aopt.roFrameLengthInBC * o2::constants::lhc::LHCBunchSpacingNS; + digipar.setROFrameLengthInBC(aopt.roFrameLengthInBC); + digipar.setROFrameLength(frameNS); // RO frame in ns + digipar.setStrobeDelay(aopt.strobeDelay); // Strobe delay wrt beginning of the RO frame, in ns + digipar.setStrobeLength(aopt.strobeLengthCont > 0 ? aopt.strobeLengthCont : frameNS - aopt.strobeDelay); // Strobe length in ns + } else { + digipar.setROFrameLength(aopt.roFrameLengthTrig); // RO frame in ns + digipar.setStrobeDelay(aopt.strobeDelay); // Strobe delay wrt beginning of the RO frame, in ns + digipar.setStrobeLength(aopt.strobeLengthTrig); // Strobe length in ns + } + // parameters of signal time response: flat-top duration, max rise time and q @ which rise time is 0 + digipar.getSignalShape().setParameters(dopt.strobeFlatTop, dopt.strobeMaxRiseTime, dopt.strobeQRiseTime0); + digipar.setChargeThreshold(dopt.chargeThreshold); // charge threshold in electrons + digipar.setNoisePerPixel(dopt.noisePerPixel); // noise level + digipar.setTimeOffset(dopt.timeOffset); + digipar.setNSimSteps(dopt.nSimSteps); + + mROMode = digipar.isContinuous() ? o2::parameters::GRPObject::CONTINUOUS : o2::parameters::GRPObject::PRESENT; + LOG(info) << mID.getName() << " simulated in " + << ((mROMode == o2::parameters::GRPObject::CONTINUOUS) ? "CONTINUOUS" : "TRIGGERED") + << " RO mode"; + + // if (oTRKParams::Instance().useDeadChannelMap) { + // pc.inputs().get("TRK_dead"); // trigger final ccdb update + // } + + // init digitizer + mDigitizer.init(); + } + // Other time-dependent parameters can be added below + } + + void finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) + { + if (matcher == ConcreteDataMatcher(detectors::DetID::ITS, "ALPIDEPARAM", 0)) { + LOG(info) << mID.getName() << " Alpide param updated"; + const auto& par = o2::itsmft::DPLAlpideParam::Instance(); + par.printKeyValues(); + return; + } + // if (matcher == ConcreteDataMatcher(mOrigin, "DEADMAP", 0)) { + // LOG(info) << mID.getName() << " static dead map updated"; + // mDigitizer.setDeadChannelsMap((o2::itsmft::NoiseMap*)obj); + // return; + // } + } + + private: + bool mWithMCTruth{true}; + bool mFinished{false}; + bool mDisableQED{false}; + const o2::detectors::DetID mID{o2::detectors::DetID::TRK}; + const o2::header::DataOrigin mOrigin{o2::header::gDataOriginTRK}; + o2::trk::Digitizer mDigitizer{}; + std::vector mDigits{}; + std::vector mROFRecords{}; + std::vector mROFRecordsAccum{}; + std::vector mHits{}; + std::vector* mHitsP{&mHits}; + o2::dataformats::MCTruthContainer mLabels{}; + o2::dataformats::MCTruthContainer mLabelsAccum{}; + std::vector mMC2ROFRecordsAccum{}; + std::vector mSimChains{}; + + int mFixMC2ROF = 0; // 1st entry in mc2rofRecordsAccum to be fixed for ROFRecordID + o2::parameters::GRPObject::ROMode mROMode = o2::parameters::GRPObject::PRESENT; // readout mode +}; + +DataProcessorSpec getTRKDigitizerSpec(int channel, bool mctruth) +{ + std::string detStr = o2::detectors::DetID::getName(o2::detectors::DetID::TRK); + auto detOrig = o2::header::gDataOriginTRK; + std::vector inputs; + inputs.emplace_back("collisioncontext", "SIM", "COLLISIONCONTEXT", static_cast(channel), Lifetime::Timeframe); + inputs.emplace_back("ITS_alppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + // if (oTRKParams::Instance().useDeadChannelMap) { + // inputs.emplace_back("TRK_dead", "TRK", "DEADMAP", 0, Lifetime::Condition, ccdbParamSpec("TRK/Calib/DeadMap")); + // } + + return DataProcessorSpec{detStr + "Digitizer", + inputs, makeOutChannels(detOrig, mctruth), + AlgorithmSpec{adaptFromTask(mctruth)}, + Options{{"disable-qed", o2::framework::VariantType::Bool, false, {"disable QED handling"}}}}; +} + +} // namespace o2::trk diff --git a/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h new file mode 100644 index 0000000000000..5a1a59c3b9f5e --- /dev/null +++ b/Steer/DigitizerWorkflow/src/TRKDigitizerSpec.h @@ -0,0 +1,24 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef STEER_DIGITIZERWORKFLOW_TRKDIGITIZER_H_ +#define STEER_DIGITIZERWORKFLOW_TRKDIGITIZER_H_ + +#include "Framework/DataProcessorSpec.h" + +namespace o2::trk +{ +o2::framework::DataProcessorSpec getTRKDigitizerSpec(int channel, bool mctruth = true); +} +// namespace o2::trk +// end namespace o2 + +#endif From 5fad059651ea1d743e8d338c7229b5b7d7af1f0a Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 14 Mar 2025 16:21:22 +0100 Subject: [PATCH 0247/1914] Fix round-robin reading in DigitizationContext::retrieveHits --- .../include/SimulationDataFormat/DigitizationContext.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h b/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h index 5e1f5f66b3f65..b718b2d5eb804 100644 --- a/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h +++ b/DataFormats/simulation/include/SimulationDataFormat/DigitizationContext.h @@ -218,6 +218,10 @@ inline void DigitizationContext::retrieveHits(std::vector const& chains return; } br->SetAddress(&hits); + auto maxEntries = br->GetEntries(); + if (maxEntries) { + entryID %= maxEntries; + } br->GetEntry(entryID); } From 433615988f7250854c926fbe98d79f51c9377c7f Mon Sep 17 00:00:00 2001 From: apalasciano Date: Mon, 17 Feb 2025 11:51:21 +0100 Subject: [PATCH 0248/1914] Add DStar(2007)0 pdg code --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 2 ++ Common/Constants/include/CommonConstants/make_pdg_header.py | 1 + 2 files changed, 3 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index 6a8a791cffd22..c6fa3cddcdad5 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -52,6 +52,7 @@ enum Pdg { kDS1 = 10433, kDS2Star = 435, kDStar = 413, + kDStar0 = 423, kChiC1 = 20443, kJPsi = 443, kLambdaB0 = 5122, @@ -101,6 +102,7 @@ constexpr double MassDSStar = 2.1122; constexpr double MassDS1 = 2.53511; constexpr double MassDS2Star = 2.5691; constexpr double MassDStar = 2.01026; +constexpr double MassDStar0 = 2.00685; constexpr double MassChiC1 = 3.51067; constexpr double MassJPsi = 3.0969; constexpr double MassLambdaB0 = 5.6196; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index 5c1e4602a9fbb..e4f92e6e8b62d 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -107,6 +107,7 @@ class Pdg(Enum): kDS1 = 10433 kDS2Star = 435 kDStar = 413 + kDStar0 = 423 kChiC1 = 20443 kJPsi = 443 kLambdaB0 = 5122 From ed0781538b248a920150cdf3afb5fe1d1ee41aa3 Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Wed, 4 Sep 2024 15:58:37 +0300 Subject: [PATCH 0249/1914] FIT: add RecPoint reader and writer workflows for FV0 and FDD The main purpose of the commit: - Adding workflows for reading RecPoints for FV0 and FDD, these are needed for MC aQC Secondary purposes that arose during development: - ROOT macro for comparing two RecPoint files - Workflows for writing RecPoints added for FT0, FV0 and FDD (i.e. simply standalone RecPoint writers, not related to reconstrucion) - The two items above were needed to verify the new RecPoint reader workflows, namely by: reconstructing CTFs -> writing RecPoints to file -> Reading RecPoints from file -> Writing RecPoints to new file -> Comparing the two files - Minor cosmetics (e.g. reordering of includes) and utility functions (print and == operator functions) --- DataFormats/Detectors/FIT/FDD/CMakeLists.txt | 1 + .../FIT/FDD/include/DataFormatsFDD/RecPoint.h | 7 +- .../Detectors/FIT/FDD/src/RecPoint.cxx | 33 ++++++ .../FT0/include/DataFormatsFT0/RecPoints.h | 6 +- .../Detectors/FIT/FT0/src/RecPoints.cxx | 14 ++- .../FV0/include/DataFormatsFV0/RecPoints.h | 4 + .../Detectors/FIT/FV0/src/RecPoints.cxx | 19 ++- Detectors/FIT/FDD/workflow/CMakeLists.txt | 10 ++ .../include/FDDWorkflow/RecPointReaderSpec.h | 4 +- .../FDD/workflow/src/RecPointReaderSpec.cxx | 8 +- .../src/recpoints-reader-workflow.cxx | 57 +++++++++ .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/FT0/workflow/CMakeLists.txt | 5 + .../src/recpoints-reader-workflow.cxx | 37 +++--- .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/FV0/workflow/CMakeLists.txt | 10 ++ .../src/recpoints-reader-workflow.cxx | 58 +++++++++ .../src/recpoints-writer-workflow.cxx | 47 ++++++++ Detectors/FIT/macros/CMakeLists.txt | 5 + Detectors/FIT/macros/compareRecPoints.C | 110 ++++++++++++++++++ 20 files changed, 493 insertions(+), 36 deletions(-) create mode 100644 DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx create mode 100644 Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx create mode 100644 Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx create mode 100644 Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx create mode 100644 Detectors/FIT/macros/compareRecPoints.C diff --git a/DataFormats/Detectors/FIT/FDD/CMakeLists.txt b/DataFormats/Detectors/FIT/FDD/CMakeLists.txt index 6cf2deb3f988e..140ba1165bff8 100644 --- a/DataFormats/Detectors/FIT/FDD/CMakeLists.txt +++ b/DataFormats/Detectors/FIT/FDD/CMakeLists.txt @@ -11,6 +11,7 @@ o2_add_library(DataFormatsFDD SOURCES src/RawEventData.cxx + src/RecPoint.cxx src/CTF.cxx src/LookUpTable.cxx PUBLIC_LINK_LIBRARIES O2::FDDBase diff --git a/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h b/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h index 6615dc322180b..f784d99145728 100644 --- a/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h +++ b/DataFormats/Detectors/FIT/FDD/include/DataFormatsFDD/RecPoint.h @@ -9,8 +9,9 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file RecPoint.h +/// \file RecPoint.h /// \brief Definition of the FDD RecPoint class + #ifndef ALICEO2_FDD_RECPOINT_H #define ALICEO2_FDD_RECPOINT_H @@ -42,6 +43,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -80,6 +82,9 @@ class RecPoint int getFirstEntry() const { return mRef.getFirstEntry(); } int getEntriesInCurrentBC() const { return mRef.getEntries(); } + void print() const; + bool operator==(const RecPoint&) const = default; + private: o2::dataformats::RangeReference mRef; o2::InteractionRecord mIntRecord; diff --git a/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx b/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx new file mode 100644 index 0000000000000..854a09088a2f4 --- /dev/null +++ b/DataFormats/Detectors/FIT/FDD/src/RecPoint.cxx @@ -0,0 +1,33 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file RecPoint.cxx +/// \brief Implementation of the FDD RecPoint class +/// \author Andreas Molander andreas.molander@cern.ch + +#include "DataFormatsFDD/RecPoint.h" +#include "Framework/Logger.h" + +using namespace o2::fdd; + +void ChannelDataFloat::print() const +{ + LOG(info) << "ChannelDataFloat data:"; + LOG(info) << "Channel ID: " << mPMNumber << ", Time (ps): " << mTime << ", Charge (ADC): " << mChargeADC << ", QTC chain: " << adcId; +} + +void RecPoint::print() const +{ + LOG(info) << "RecPoint data:"; + LOG(info) << "Collision times: A: " << getCollisionTimeA() << ", C: " << getCollisionTimeC(); + LOG(info) << "Ref first: " << mRef.getFirstEntry() << ", Ref entries: " << mRef.getEntries(); + LOG(info) << "Triggers: " << mTriggers.print(); +} diff --git a/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h b/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h index 1178cc20a4da0..d688e076489b5 100644 --- a/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h +++ b/DataFormats/Detectors/FIT/FT0/include/DataFormatsFT0/RecPoints.h @@ -47,6 +47,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -74,8 +75,6 @@ class RecPoints } ~RecPoints() = default; - void print() const; - short getCollisionTime(int side) const { return mCollisionTime[side]; } short getCollisionTimeMean() const { return getCollisionTime(TimeMean); } short getCollisionTimeA() const { return getCollisionTime(TimeA); } @@ -96,6 +95,9 @@ class RecPoints gsl::span getBunchChannelData(const gsl::span tfdata) const; short static constexpr sDummyCollissionTime = 32767; + void print() const; + bool operator==(const RecPoints&) const = default; + private: std::array mCollisionTime = {sDummyCollissionTime, sDummyCollissionTime, diff --git a/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx b/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx index f580d0dd1ea8c..afd244f977f71 100644 --- a/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx +++ b/DataFormats/Detectors/FIT/FT0/src/RecPoints.cxx @@ -21,14 +21,22 @@ using namespace o2::ft0; +void ChannelDataFloat::print() const +{ + printf(" ChID% d | CFDtime=%f | QTCampl=%f QTC chain %d\n", ChId, CFDTime, QTCAmpl, ChainQTC); +} + gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const { // extract the span of channel data for this bunch from the whole TF data return ref.getEntries() ? gsl::span(tfdata).subspan(ref.getFirstEntry(), ref.getEntries()) : gsl::span(); } -void ChannelDataFloat::print() const +void RecPoints::print() const { - - printf(" ChID% d | CFDtime=%f | QTCampl=%f QTC chain %d\n", ChId, CFDTime, QTCAmpl, ChainQTC); + LOG(info) << "RecPoint data:"; + LOG(info) << "Collision times: mean: " << getCollisionTimeMean() << ", A: " << getCollisionTimeA() << ", C: " << getCollisionTimeC(); + LOG(info) << "Vertex: " << getVertex(); + LOG(info) << "Ref first: " << ref.getFirstEntry() << ", Ref entries: " << ref.getEntries(); + LOG(info) << "Triggers: " << mTriggers.print(); } diff --git a/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h b/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h index d7ee2e67613fc..b3527fdd049d2 100644 --- a/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h +++ b/DataFormats/Detectors/FIT/FV0/include/DataFormatsFV0/RecPoints.h @@ -42,6 +42,7 @@ struct ChannelDataFloat { } void print() const; + bool operator==(const ChannelDataFloat&) const = default; ClassDefNV(ChannelDataFloat, 1); }; @@ -77,6 +78,9 @@ class RecPoints gsl::span getBunchChannelData(const gsl::span tfdata) const; short static constexpr sDummyCollissionTime = 32767; + void print() const; + bool operator==(const RecPoints&) const = default; + private: o2::dataformats::RangeReference mRef; o2::InteractionRecord mIntRecord; diff --git a/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx b/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx index 18f2effc281e8..ef1554acf5419 100644 --- a/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx +++ b/DataFormats/Detectors/FIT/FV0/src/RecPoints.cxx @@ -14,13 +14,22 @@ using namespace o2::fv0; -gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const +void ChannelDataFloat::print() const { - // extract the span of channel data for this bunch from the whole TF data - return mRef.getEntries() ? gsl::span(tfdata).subspan(mRef.getFirstEntry(), mRef.getEntries()) : gsl::span(); + printf(" Channel=%d | time=%f | charge=%f | adcId=%d\n", channel, time, charge, adcId); } -void ChannelDataFloat::print() const +void RecPoints::print() const { - printf(" Channel=%d | time=%f | charge=%f | adcId=%d\n", channel, time, charge, adcId); + printf("RecPoint data:"); + printf("Collision times: first: %f, global mean: %f, selected mean: %f\n", getCollisionFirstTime(), getCollisionGlobalMeanTime(), getCollisionSelectedMeanTime()); + printf("Ref first: %d, Ref entries: %d\n", mRef.getFirstEntry(), mRef.getEntries()); + printf("Triggers: "); + mTriggers.print(); +} + +gsl::span RecPoints::getBunchChannelData(const gsl::span tfdata) const +{ + // extract the span of channel data for this bunch from the whole TF data + return mRef.getEntries() ? gsl::span(tfdata).subspan(mRef.getFirstEntry(), mRef.getEntries()) : gsl::span(); } diff --git a/Detectors/FIT/FDD/workflow/CMakeLists.txt b/Detectors/FIT/FDD/workflow/CMakeLists.txt index 4f7c7f44bc31b..a4bcc6f0de6fb 100644 --- a/Detectors/FIT/FDD/workflow/CMakeLists.txt +++ b/Detectors/FIT/FDD/workflow/CMakeLists.txt @@ -52,6 +52,16 @@ o2_add_executable(flp-dpl-workflow PUBLIC_LINK_LIBRARIES O2::FDDWorkflow O2::FDDRaw O2::FITWorkflow TARGETVARNAME fddflpexe) +o2_add_executable(recpoints-reader-workflow + SOURCES src/recpoints-reader-workflow.cxx + COMPONENT_NAME fdd + PUBLIC_LINK_LIBRARIES O2::FDDWorkflow) + +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME fdd + PUBLIC_LINK_LIBRARIES O2::FDDWorkflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME fdd diff --git a/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h b/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h index 500883d5badfa..6c3c9694f3e1c 100644 --- a/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h +++ b/Detectors/FIT/FDD/workflow/include/FDDWorkflow/RecPointReaderSpec.h @@ -31,7 +31,7 @@ namespace fdd class RecPointReader : public Task { public: - RecPointReader(bool useMC = true); + RecPointReader(bool useMC = false); ~RecPointReader() override = default; void init(InitContext& ic) final; void run(ProcessingContext& pc) final; @@ -42,7 +42,7 @@ class RecPointReader : public Task std::unique_ptr mFile; std::unique_ptr mTree; - bool mUseMC = true; // use MC truth + bool mUseMC = false; // use MC truth o2::header::DataOrigin mOrigin = o2::header::gDataOriginFDD; std::vector* mRecPoints = nullptr; diff --git a/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx b/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx index 9b612c31d28e6..3c4812c75b251 100644 --- a/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx +++ b/Detectors/FIT/FDD/workflow/src/RecPointReaderSpec.cxx @@ -11,16 +11,14 @@ /// @file RecPointReaderSpec.cxx -#include - -#include "TTree.h" - -#include "Framework/ControlService.h" #include "Framework/ConfigParamRegistry.h" +#include "Framework/ControlService.h" #include "Framework/Logger.h" #include "FDDWorkflow/RecPointReaderSpec.h" #include "CommonUtils/NameConf.h" +#include + using namespace o2::framework; using namespace o2::fdd; diff --git a/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx new file mode 100644 index 0000000000000..fcef4cc46901f --- /dev/null +++ b/Detectors/FIT/FDD/workflow/src/recpoints-reader-workflow.cxx @@ -0,0 +1,57 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-reader-workflow.cxx +/// \brief FDD RecPoints reader workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FDDWorkflow/RecPointReaderSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + o2::raw::HBFUtilsInitializer::addConfigOption(options); + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + WorkflowSpec specs; + DataProcessorSpec producer = o2::fdd::getFDDRecPointReaderSpec(!disableMC); + specs.push_back(producer); + + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit + o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); + return specs; +} diff --git a/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..e53ccd14c30ab --- /dev/null +++ b/Detectors/FIT/FDD/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FDD RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FDDWorkflow/RecPointWriterSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fdd::getFDDRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/FT0/workflow/CMakeLists.txt b/Detectors/FIT/FT0/workflow/CMakeLists.txt index 2dbbbae41e261..123a29293e2fb 100644 --- a/Detectors/FIT/FT0/workflow/CMakeLists.txt +++ b/Detectors/FIT/FT0/workflow/CMakeLists.txt @@ -98,6 +98,11 @@ o2_add_executable(recpoints-reader-workflow COMPONENT_NAME ft0 PUBLIC_LINK_LIBRARIES O2::FT0Workflow) +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME ft0 + PUBLIC_LINK_LIBRARIES O2::FT0Workflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME ft0 diff --git a/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx index d323b4135d7ea..b1d824e10687e 100644 --- a/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx +++ b/Detectors/FIT/FT0/workflow/src/recpoints-reader-workflow.cxx @@ -9,23 +9,24 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file recpoints-reader-workflow.cxx -/// \brief Implementation of FT0 digits reader +/// \file recpoints-reader-workflow.cxx +/// \brief FT0 RecPoints reader workflow /// -/// \author ruben.shahoyan@cern.ch +/// \author ruben.shahoyan@cern.ch, Andreas Molander andreas.molander@cern.ch -#include "Framework/CallbackService.h" -#include "Framework/ControlService.h" -#include "Framework/CallbacksPolicy.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Task.h" -#include "FT0Workflow/RecPointReaderSpec.h" #include "CommonUtils/ConfigurableParam.h" #include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FT0Workflow/RecPointReaderSpec.h" + +#include using namespace o2::framework; -void customize(std::vector& policies) +void customize(std::vector& policies) { o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); } @@ -33,12 +34,9 @@ void customize(std::vector& policies) // we need to add workflow options before including Framework/runDataProcessing void customize(std::vector& workflowOptions) { - // option allowing to set parameters - - std::vector options{ - {"disable-mc", o2::framework::VariantType::Bool, false, {"disable MC propagation even if available"}}}; - std::string keyvaluehelp("Semicolon separated key=value strings"); - options.push_back(ConfigParamSpec{"configKeyValues", VariantType::String, "", {keyvaluehelp}}); + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; o2::raw::HBFUtilsInitializer::addConfigOption(options); std::swap(workflowOptions, options); } @@ -47,10 +45,13 @@ void customize(std::vector& workflowOptions) WorkflowSpec defineDataProcessing(const ConfigContext& ctx) { - WorkflowSpec specs; o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); - DataProcessorSpec producer = o2::ft0::getRecPointReaderSpec(ctx.options().get("disable-mc")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::ft0::getRecPointReaderSpec(!disableMC); specs.push_back(producer); + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); return specs; diff --git a/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..c8baef76b5ccd --- /dev/null +++ b/Detectors/FIT/FT0/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FT0 RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FT0Workflow/RecPointWriterSpec.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::ft0::getRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/FV0/workflow/CMakeLists.txt b/Detectors/FIT/FV0/workflow/CMakeLists.txt index eec745d5fdf1e..a304adc61b5fd 100644 --- a/Detectors/FIT/FV0/workflow/CMakeLists.txt +++ b/Detectors/FIT/FV0/workflow/CMakeLists.txt @@ -53,6 +53,16 @@ o2_add_executable(flp-dpl-workflow PUBLIC_LINK_LIBRARIES O2::FV0Workflow O2::FITWorkflow O2::FV0Raw TARGETVARNAME fv0flpexe) +o2_add_executable(recpoints-reader-workflow + SOURCES src/recpoints-reader-workflow.cxx + COMPONENT_NAME fv0 + PUBLIC_LINK_LIBRARIES O2::FV0Workflow) + +o2_add_executable(recpoints-writer-workflow + SOURCES src/recpoints-writer-workflow.cxx + COMPONENT_NAME fv0 + PUBLIC_LINK_LIBRARIES O2::FV0Workflow) + o2_add_executable(integrate-cluster-workflow SOURCES src/cluster-integrator.cxx COMPONENT_NAME fv0 diff --git a/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx b/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx new file mode 100644 index 0000000000000..ecbe89b8bbed3 --- /dev/null +++ b/Detectors/FIT/FV0/workflow/src/recpoints-reader-workflow.cxx @@ -0,0 +1,58 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-reader-workflow.cxx +/// \brief FV0 RecPoints reader workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "CommonUtils/ConfigurableParam.h" +#include "DetectorsRaw/HBFUtilsInitializer.h" +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include "FV0Workflow/RecPointReaderSpec.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + o2::raw::HBFUtilsInitializer::addConfigOption(options); + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fv0::getRecPointReaderSpec(!disableMC); + specs.push_back(producer); + + // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit + o2::raw::HBFUtilsInitializer hbfIni(ctx, specs); + return specs; +} diff --git a/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx b/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx new file mode 100644 index 0000000000000..0fd3bd3bef2e8 --- /dev/null +++ b/Detectors/FIT/FV0/workflow/src/recpoints-writer-workflow.cxx @@ -0,0 +1,47 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file recpoints-writer-workflow.cxx +/// \brief FV0 RecPoints writer workflow +/// +/// \author Andreas Molander andreas.molander@cern.ch + +#include "FV0Workflow/RecPointWriterSpec.h" + +#include "CommonUtils/ConfigurableParam.h" +#include "Framework/ConfigParamSpec.h" +#include "Framework/Variant.h" + +#include + +using namespace o2::framework; + +// we need to add workflow options before including Framework/runDataProcessing +void customize(std::vector& workflowOptions) +{ + std::vector options{ + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}}; + workflowOptions.insert(workflowOptions.end(), options.begin(), options.end()); +} + +#include "Framework/runDataProcessing.h" + +WorkflowSpec defineDataProcessing(const ConfigContext& ctx) +{ + o2::conf::ConfigurableParam::updateFromString(ctx.options().get("configKeyValues")); + bool disableMC = ctx.options().get("disable-mc"); + + WorkflowSpec specs; + DataProcessorSpec producer = o2::fv0::getRecPointWriterSpec(!disableMC); + specs.push_back(producer); + return specs; +} diff --git a/Detectors/FIT/macros/CMakeLists.txt b/Detectors/FIT/macros/CMakeLists.txt index 81f2cc05e0b25..e7debb4184325 100644 --- a/Detectors/FIT/macros/CMakeLists.txt +++ b/Detectors/FIT/macros/CMakeLists.txt @@ -40,5 +40,10 @@ o2_add_test_root_macro(readFITDCSdata.C O2::CCDB LABELS fit) +o2_add_test_root_macro(compareRecPoints.C + PUBLIC_LINK_LIBRARIES O2::DataFormatsFT0 + O2::DataFormatsFIT + LABELS fit) + o2_data_file(COPY readFITDCSdata.C DESTINATION Detectors/FIT/macros/) o2_data_file(COPY readFITDeadChannelMap.C DESTINATION Detectors/FIT/macros/) \ No newline at end of file diff --git a/Detectors/FIT/macros/compareRecPoints.C b/Detectors/FIT/macros/compareRecPoints.C new file mode 100644 index 0000000000000..0ce077bc616ba --- /dev/null +++ b/Detectors/FIT/macros/compareRecPoints.C @@ -0,0 +1,110 @@ +// Copyright 2019-2024 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file compareRecPoints.C +/// \brief ROOT macro to compare two trees with RecPoints +/// +/// \author Artur Furs artur.furs@cern.ch, Andreas Molander andreas.molander@cern.ch + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include "DataFormatsFT0/RecPoints.h" +#include "DataFormatsFV0/RecPoints.h" +#include "DataFormatsFDD/RecPoint.h" + +#include "TFile.h" +#include "TTree.h" + +#include +#include +#include +#include +#endif + +void compareRecPoints(std::string filename1, std::string filename2) +{ + std::unique_ptr file1(TFile::Open(filename1.c_str(), "READ")); + TTree* tree1 = (TTree*)file1->Get("o2sim"); + + std::unique_ptr file2(TFile::Open(filename2.c_str(), "READ")); + TTree* tree2 = (TTree*)file2->Get("o2sim"); + + if (tree1->GetEntries() != tree2->GetEntries()) { + std::cout << "Non equal number of entries in trees!" << std::endl; + return; + } + + typedef typename o2::ft0::RecPoints RecPoint; + typedef typename o2::ft0::ChannelDataFloat ChannelDataFloat; + + std::vector vecRecPoints1; + std::vector* ptrVecRecPoints1 = &vecRecPoints1; + + std::vector vecChannelDataFloat1; + std::vector* ptrVecChannelDataFloat1 = &vecChannelDataFloat1; + + tree1->SetBranchAddress("FT0Cluster", &ptrVecRecPoints1); + tree1->SetBranchAddress("FT0RecChData", &ptrVecChannelDataFloat1); + + std::vector vecRecPoints2; + std::vector* ptrVecRecPoints2 = &vecRecPoints2; + + std::vector vecChannelDataFloat2; + std::vector* ptrVecChannelDataFloat2 = &vecChannelDataFloat2; + + tree2->SetBranchAddress("FT0Cluster", &ptrVecRecPoints2); + tree2->SetBranchAddress("FT0RecChData", &ptrVecChannelDataFloat2); + + for (int iEntry = 0; iEntry < tree1->GetEntries(); iEntry++) { + tree1->GetEntry(iEntry); + tree2->GetEntry(iEntry); + + if (vecRecPoints1 != vecRecPoints2) { + std::cout << "Non equal RecPoints vector!" << std::endl; + + if (vecRecPoints1.size() == vecRecPoints2.size()) { + for (int iEvent = 0; iEvent < vecRecPoints1.size(); iEvent++) { + const auto& recPoint1 = vecRecPoints1[iEvent]; + const auto& recPoint2 = vecRecPoints2[iEvent]; + + if (!(recPoint1 == recPoint2)) { + std::cout << "First RecPoint" << std::endl; + recPoint1.print(); + std::cout << "Second RecPoint" << std::endl; + recPoint2.print(); + } + } + } else { + std::cout << "Non equal number of RecPoints!" << std::endl; + } + } + if (vecChannelDataFloat1 != vecChannelDataFloat2) { + std::cout << "Non equal ChannelDataFloat vector!" << std::endl; + + if (vecChannelDataFloat1.size() == vecChannelDataFloat2.size()) { + for (int iEvent = 0; iEvent < vecChannelDataFloat1.size(); iEvent++) { + const auto& channelDataFloat1 = vecChannelDataFloat1[iEvent]; + const auto& channelDataFloat2 = vecChannelDataFloat2[iEvent]; + + if (!(channelDataFloat1 == channelDataFloat2)) { + std::cout << "First ChannelDataFloat" << std::endl; + channelDataFloat1.print(); + std::cout << "Second ChannelDataFloat" << std::endl; + channelDataFloat2.print(); + } + } + } else { + std::cout << "Non equal number of ChannelDataFloat!" << std::endl; + } + } + } + + return; +} \ No newline at end of file From 95d50444d9d733b58f8f7b4cc03c43bcefb64316 Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 18 Mar 2025 17:04:26 +0100 Subject: [PATCH 0250/1914] Fix BC filling in per TF digi contexts --- DataFormats/simulation/src/DigitizationContext.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/DataFormats/simulation/src/DigitizationContext.cxx b/DataFormats/simulation/src/DigitizationContext.cxx index e875cb61951a9..499a478bc5148 100644 --- a/DataFormats/simulation/src/DigitizationContext.cxx +++ b/DataFormats/simulation/src/DigitizationContext.cxx @@ -679,6 +679,7 @@ DigitizationContext DigitizationContext::extractSingleTimeframe(int timeframeid, } r.mSimPrefixes = mSimPrefixes; r.mMuBC = mMuBC; + r.mBCFilling = mBCFilling; try { auto tf_ranges = timeframeindices.at(timeframeid); From dcb02ef906d422fcdaae4dff21e335f6e8bba914 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:21:46 +0100 Subject: [PATCH 0251/1914] GPU Display: suppress compiler warning with sanitizers enabled --- GPU/GPUTracking/display/GPUDisplay.h | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index 73f65b6b24241..bb270cda23565 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -161,6 +161,7 @@ class GPUDisplay : public GPUDisplayInterface { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" snprintf(mInfoText2, 1024, args...); #pragma GCC diagnostic pop GPUInfo("%s", mInfoText2); From c903618bcbda071dbd6e1538276176d1197bc6fc Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 10:22:09 +0100 Subject: [PATCH 0252/1914] GPU Standalone can compile with sanitizers also without debug build --- GPU/GPUTracking/Standalone/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index b9620b9385c73..de245a71845c3 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -50,9 +50,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") - if (GPUCA_BUILD_DEBUG_SANITIZE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang - endif() set(CMAKE_BUILD_TYPE DEBUG) else() set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") @@ -67,6 +64,9 @@ else() set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() +if (GPUCA_BUILD_DEBUG_SANITIZE) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang +endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") From 5b6c4855a4a38b1960186dd0d64ea3c72220ed3d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:05:52 +0100 Subject: [PATCH 0253/1914] GPU: Disable clang warnings when using C variable length array extension in C++ --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index de245a71845c3..8fa8f0c2b68c9 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -67,7 +67,7 @@ endif() if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") # Find mandatory packages From 7a68fc21493361c9f9acb79d6411f60d6d8b1d88 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:20:26 +0100 Subject: [PATCH 0254/1914] GPU Standalone: Fix build using clang compiler with sanitizers --- GPU/GPUTracking/Standalone/CMakeLists.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 8fa8f0c2b68c9..6e536727a0c67 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -65,7 +65,10 @@ else() add_definitions(-DNDEBUG) endif() if (GPUCA_BUILD_DEBUG_SANITIZE) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") #TODO: Check why this does not work with clang + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address,undefined -fno-sanitize=vptr") + if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") + endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") @@ -247,6 +250,13 @@ if(GPUCA_CONFIG_ROOT) endif() target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) +if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) + get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" ABSOLUTE) + target_link_options(ca PRIVATE "-Wl,-rpath,${CLANG_ASAN_SO_PATH}") +endif() + # Installation install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") From 1eebd48731f33ec692a7d866908abcb9b498a4f1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 11:38:06 +0100 Subject: [PATCH 0255/1914] GPU: Suppress another clang warning --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a4074282da30f..a8a83fdbd9203 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -112,7 +112,12 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); +#pragma GCC diagnostic push } template From ce9dd396d2e570fb8897a351c588bedac11a2e1c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:11:30 +0100 Subject: [PATCH 0256/1914] GPU: Fix some minor issues indicated by clang sanitizer --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 22 +++++++++++++------ .../Global/GPUChainTrackingSectorTracker.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 11 +++++----- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 35e44d99d5c0c..8bae1df267412 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -147,8 +147,12 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; - mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; + mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; + } else { + mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } if (InitPhasePermanentMemory()) { return 1; } @@ -860,14 +864,18 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); - mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mUnmanagedChunks.clear(); - mVolatileMemoryStart = nullptr; mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); - mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); - mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + mVolatileMemoryStart = nullptr; + if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); + mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); + mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); + mDeviceMemoryPoolEnd = mDeviceMemoryPoolBlocked ? mDeviceMemoryPoolBlocked : ((char*)mDeviceMemoryBase + mDeviceMemorySize); + } else { + mHostMemoryPool = mDeviceMemoryPool = mHostMemoryPoolEnd = mDeviceMemoryPoolEnd = nullptr; + } } void GPUReconstruction::UpdateMaxMemoryUsed() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index dd71a797f2744..e161f74a31032 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -150,7 +150,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (param().rec.tpc.occupancyMapTimeBins || param().rec.tpc.sysClusErrorC12Norm) { uint32_t& occupancyTotal = *mInputsHost->mTPCClusterOccupancyMap; occupancyTotal = CAMath::Float2UIntRn(mRec->MemoryScalers()->nTPCHits / (mIOPtrs.settingsTF && mIOPtrs.settingsTF->hasNHBFPerTF ? mIOPtrs.settingsTF->nHBFPerTF : 128)); - mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); + mRec->UpdateParamOccupancyMap(param().rec.tpc.occupancyMapTimeBins ? mInputsHost->mTPCClusterOccupancyMap + 2 : nullptr, doGPU && param().rec.tpc.occupancyMapTimeBins ? mInputsShadow->mTPCClusterOccupancyMap + 2 : nullptr, occupancyTotal, streamInitAndOccMap); } int32_t streamMap[NSECTORS]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f373d56ea0395..eb1df3f37b6b5 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1706,20 +1706,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); + const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); continue; } - GPUTPCGMMergedTrackHit* cl = mClusters + iOutTrackFirstCluster; - GPUTPCGMMergedTrackHitXYZ* clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1760,7 +1760,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? clXYZ[0].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1791,12 +1791,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], &clXYZ[CEside ? (nHits - 1) : 0], iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3bd2257d02e01..d235b3398c062 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1091,7 +1091,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr GPUTPCGMTrackParam t = track.Param(); float Alpha = track.Alpha(); CADEBUG(int32_t nTrackHitsOld = nTrackHits; float ptOld = t.QPt()); - bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->ClustersXYZ() + track.FirstClusterRef(), nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); + bool ok = t.Fit(merger, iTrk, merger->Clusters() + track.FirstClusterRef(), merger->Param().par.earlyTpcTransform ? merger->ClustersXYZ() + track.FirstClusterRef() : nullptr, nTrackHits, NTolerated, Alpha, attempt, GPUCA_MAX_SIN_PHI, &track.OuterParam()); CADEBUG(printf("Finished Fit Track %d\n", iTrk)); CADEBUG(printf("OUTPUT hits %d -> %d+%d = %d, QPt %f -> %f, SP %f, ok %d chi2 %f chi2ndf %f\n", nTrackHitsOld, nTrackHits, NTolerated, nTrackHits + NTolerated, ptOld, t.QPt(), t.SinPhi(), (int32_t)ok, t.Chi2(), t.Chi2() / CAMath::Max(1, nTrackHits))); From 0064a3af79e6f92ee99725cf7a2d024b667bac2e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 18 Mar 2025 13:28:56 +0100 Subject: [PATCH 0257/1914] With -ffast-math, std::finite is UB and one shoult assume all float to be finite --- Common/MathUtils/include/MathUtils/detail/basicMath.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3fc3fe374b380..3565764435a68 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -113,7 +113,11 @@ GPUdi() int nint(double x) template <> GPUdi() bool finite(double x) { +#ifdef __FAST_MATH__ + return false; +#else return std::isfinite(x); +#endif } template <> GPUdi() double log(double x) From cb5e20f94f5717e0c00aa33d5f40f23f40dcb1d4 Mon Sep 17 00:00:00 2001 From: lietava Date: Tue, 18 Mar 2025 16:03:48 +0100 Subject: [PATCH 0258/1914] fix: debug removed --- Detectors/CTP/workflowScalers/src/RunManager.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index 9af5b5e104120..ac3eda60094e9 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -126,10 +126,9 @@ int CTPRunManager::loadRun(const std::string& cfg) } int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) { - std::cout << "Printing cfg:" << cfg << std::endl; + std::cout << "Printing run:" << runNumber << " cfg:" << cfg << std::endl; if (mBKClient) { try { - uint32_t runNumber = 1; mBKClient->run()->setRawCtpTriggerConfiguration(runNumber, cfg); } catch (std::runtime_error& error) { std::cerr << "An error occurred: " << error.what() << std::endl; From c71128566f99d77103251fd8e07c2d9e4ba5a947 Mon Sep 17 00:00:00 2001 From: Maurice Coquet Date: Tue, 18 Mar 2025 16:05:11 +0100 Subject: [PATCH 0259/1914] Fixing CCDB host address for MFT noise scan --- Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx index 617e02ebb9d9c..a34d8cc5f2975 100644 --- a/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx +++ b/Detectors/ITSMFT/MFT/calibration/src/NoiseCalibratorSpec.cxx @@ -59,7 +59,7 @@ void NoiseCalibratorSpec::init(InitContext& ic) mPathDcs = ic.options().get("path-DCS"); mOutputType = ic.options().get("send-to-server"); mNoiseMapForDcs.clear(); - api.init("http://alice-ccdb.cern.ch"); + api.init(o2::base::NameConf::getCCDBServer()); } void NoiseCalibratorSpec::run(ProcessingContext& pc) From ad59c78eed8256990430777517aa6f4ca89cd954 Mon Sep 17 00:00:00 2001 From: pillot Date: Sat, 15 Mar 2025 10:29:56 +0100 Subject: [PATCH 0260/1914] turn error into warning and continue the scan --- Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx index 236effc4b1182..ab716bc6eb752 100644 --- a/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx +++ b/Detectors/MUON/MCH/Conditions/src/scan-hvlv-ccdb.cxx @@ -479,8 +479,7 @@ void fillDataPoints(const std::vector& dps, std::map& d auto previousTS = dps2.rbegin()->first; if (ts != previousTS || getValue(*itDP) != dps2.rbegin()->second) { if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); - exit(1); + printf("\e[0;31mwarning: wrong data point order (%llu <= %llu)\e[0m\n", (ULL)ts, (ULL)previousTS); } if (printWarning) { printf("%s%s missing the previous data point (dt = %s%llu ms)", color.c_str(), header.c_str(), @@ -512,8 +511,9 @@ void fillDataPoints(const std::vector& dps, std::map& d for (++itDP; itDP < dps.end(); ++itDP) { ts = itDP->get_epoch_time(); if (ts <= previousTS) { - printf("error: wrong data point order (%llu <= %llu)\n", (ULL)ts, (ULL)previousTS); - exit(1); + printf("\e[0;31mwarning: wrong data point order (%llu <= %llu)\e[0m\n", (ULL)ts, (ULL)previousTS); + } else { + previousTS = ts; } if (ts < tMin && (warningLevel > 1 || (warningLevel == 1 && ts + tolerance < tMin))) { printf("%s%s data point outside of file validity range (dt = -%llu ms)\e[0m\n", @@ -523,7 +523,6 @@ void fillDataPoints(const std::vector& dps, std::map& d header.c_str(), (ULL)(ts - tMax)); } dps2.emplace(ts, getValue(*itDP)); - previousTS = ts; } } From 1b5b9b672f89afd25af46309bce2147da41b685b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barth=C3=A9l=C3=A9my=20von=20Haller?= Date: Fri, 14 Mar 2025 16:30:49 +0100 Subject: [PATCH 0261/1914] Avoid fatal in CTPRateFetcher --- DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx index 43fa9dbe7f3f3..67e59aad3ea24 100644 --- a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx +++ b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx @@ -46,7 +46,8 @@ double CTPRateFetcher::fetchNoPuCorr(o2::ccdb::BasicCCDBManager* ccdb, uint64_t LOG(info) << "Trying different class"; ret = fetchCTPratesClassesNoPuCorr(timeStamp, "CMTVX-NONE"); if (ret < 0) { - LOG(fatal) << "None of the classes used for lumi found"; + LOG(error) << "None of the classes used for lumi found"; + return -1.; } } return ret; @@ -245,17 +246,19 @@ void CTPRateFetcher::setupRun(int runNumber, o2::ccdb::BasicCCDBManager* ccdb, u return; } mRunNumber = runNumber; - LOG(info) << "Setting up CTP scalers for run " << mRunNumber; + LOG(info) << "Setting up CTP scalers for run " << mRunNumber << " and timestamp : " << timeStamp; auto ptrLHCIFdata = ccdb->getSpecific("GLO/Config/GRPLHCIF", timeStamp); if (ptrLHCIFdata == nullptr) { - LOG(fatal) << "GRPLHCIFData not in database, timestamp:" << timeStamp; + LOG(error) << "GRPLHCIFData not in database, timestamp:" << timeStamp; + return; } mLHCIFdata = *ptrLHCIFdata; std::map metadata; metadata["runNumber"] = std::to_string(mRunNumber); auto ptrConfig = ccdb->getSpecific("CTP/Config/Config", timeStamp, metadata); if (ptrConfig == nullptr) { - LOG(fatal) << "CTPRunConfig not in database, timestamp:" << timeStamp; + LOG(error) << "CTPRunConfig not in database, timestamp:" << timeStamp; + return; } mConfig = *ptrConfig; if (initScalers) { @@ -264,7 +267,7 @@ void CTPRateFetcher::setupRun(int runNumber, o2::ccdb::BasicCCDBManager* ccdb, u mScalers = *ptrScalers; mScalers.convertRawToO2(); } else { - LOG(fatal) << "CTPRunScalers not in database, timestamp:" << timeStamp; + LOG(error) << "CTPRunScalers not in database, timestamp:" << timeStamp; } } } From af7c9d3a03dcf99e6ee587c311c05494cec27a35 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 19 Mar 2025 22:30:19 +0100 Subject: [PATCH 0262/1914] GPU Standalone: Simplify CMake script --- .../Standalone/Benchmark/CMakeLists.txt | 4 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 70 +++++++------------ GPU/GPUTracking/display/CMakeLists.txt | 3 + 3 files changed, 32 insertions(+), 45 deletions(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index fe583be02fe6b..de2d683036193 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -22,8 +22,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") TARGETVARNAME targetName PUBLIC_LINK_LIBRARIES O2::GPUO2Interface O2::GPUTracking SOURCES ${SRCS}) - - target_compile_definitions(${targetName} PUBLIC $) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -31,6 +29,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName ca) endif() +target_compile_definitions(${targetName} PUBLIC $) + if(ROOT_FOUND) target_sources(${targetName} PRIVATE ../../qa/genEvents.cxx) endif() diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 6e536727a0c67..5ed511173f53e 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -187,40 +187,33 @@ include_directories(${GPU_DIR}/Common # Create main targets add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${GPUTRACKING_DIR}/utils/EmptyFile.cxx) +target_compile_definitions(standalone_support PUBLIC $) -target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support dl) +target_link_libraries(standalone_support PUBLIC# + dl + pthread + Microsoft.GSL::GSL + TPCFastTransformation) +target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support) target_link_libraries(ca PUBLIC GPUTracking) -target_link_libraries(standalone_support PUBLIC dl pthread) -target_compile_definitions(ca PUBLIC $) -target_compile_definitions(standalone_support PUBLIC $) # Add all sources and dependencies to to support based on Config File target_sources(standalone_support PRIVATE - ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx - ${O2_DIR}/Detectors/Base/src/Ray.cxx - ${O2_DIR}/Detectors/Base/src/Propagator.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) -if(CONFIG_O2_ITS_TRAITS) - target_sources(standalone_support PRIVATE - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/PrimaryVertexContext.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ClusterLines.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/TrackerTraitsCPU.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/VertexerTraits.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/ROframe.cxx) - target_link_libraries(standalone_support PUBLIC Boost::boost) -endif() + ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx + ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx + ${O2_DIR}/Detectors/Base/src/Ray.cxx + ${O2_DIR}/Detectors/Base/src/Propagator.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) if(GPUCA_CONFIG_FMT) target_link_libraries(standalone_support PUBLIC fmt::fmt) @@ -232,23 +225,14 @@ if(GPUCA_CONFIG_VC) target_link_libraries(TPCFastTransformation PUBLIC Vc::Vc) endif() -if(GPUCA_BUILD_EVENT_DISPLAY) - if(GPUCA_CONFIG_GL3W) - target_sources(GPUTrackingDisplay PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/gl3w.c) - endif() - target_sources(GPUTracking PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/HandMadeMath/HandMadeMathImpl.cxx) - target_include_directories(GPUTracking SYSTEM PUBLIC ${GPUTRACKING_DIR}/display/3rdparty) -endif() - if(GPUCA_CONFIG_ROOT) target_link_libraries(standalone_support PUBLIC - ROOT::Core - ROOT::RIO - ROOT::Hist - ROOT::Gui - ROOT::Tree) + ROOT::Core + ROOT::RIO + ROOT::Hist + ROOT::Gui + ROOT::Tree) endif() -target_link_libraries(standalone_support PUBLIC Microsoft.GSL::GSL TPCFastTransformation) if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 68385d7916234..b3107dbec8c79 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -179,9 +179,12 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() if(GPUCA_CONFIG_GL3W) target_compile_definitions(${targetName} PRIVATE GPUCA_DISPLAY_GL3W) + target_sources(${targetName} PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/gl3w.c) else() target_link_libraries(${targetName} PRIVATE ${GLEW_LIBRARIES}) endif() + target_sources(${targetName} PRIVATE ${GPUTRACKING_DIR}/display/3rdparty/HandMadeMath/HandMadeMathImpl.cxx) + target_include_directories(${targetName} SYSTEM PUBLIC ${GPUTRACKING_DIR}/display/3rdparty) endif() if (OPENGL_GLU_FOUND) From 9046e703929252511633b3ac75a06f618802e2b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 19 Mar 2025 22:35:24 +0100 Subject: [PATCH 0263/1914] GPU Standalone: trap on FPE by default only when not using ffast-math --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 40a7fc71cbb4d..a1f650a2bc56e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -521,7 +521,7 @@ AddOption(outputcontrolmem, uint64_t, 0, "outputMemory", 0, "Use predefined outp AddOption(inputcontrolmem, uint64_t, 0, "inputMemory", 0, "Use predefined input buffer of this size", min(0ul), message("Using %s bytes as input memory")) AddOption(cpuAffinity, int32_t, -1, "", 0, "Pin CPU affinity to this CPU core", min(-1)) AddOption(fifoScheduler, bool, false, "", 0, "Use FIFO realtime scheduler", message("Setting FIFO scheduler: %s")) -AddOption(fpe, bool, true, "", 0, "Trap on floating point exceptions") +AddOption(fpe, int8_t, -1, "", 0, "Trap on floating point exceptions (-1 = if no ffast-math)") AddOption(flushDenormals, bool, true, "", 0, "Enable FTZ and DAZ (Flush all denormals to zero)") AddOption(solenoidBzNominalGPU, float, -1e6f, "", 0, "Field strength of solenoid Bz in kGaus") AddOption(constBz, bool, false, "", 0, "Force constand Bz") diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index d6279df7c9188..e9cb7c5179c59 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -142,7 +142,11 @@ int32_t ReadConfiguration(int argc, char** argv) return 1; } } +#ifdef __FAST_MATH__ + if (configStandalone.fpe == 1) { +#else if (configStandalone.fpe) { +#endif feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW); } if (configStandalone.flushDenormals) { @@ -158,7 +162,7 @@ int32_t ReadConfiguration(int argc, char** argv) printf("FIFO Scheduler setting not supported on Windows\n"); return 1; } - if (configStandalone.fpe) { + if (configStandalone.fpe == 1) { printf("FPE not supported on Windows\n"); return 1; } From 90267bb6455493078cb2e97139d7d8443320034f Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 20 Mar 2025 08:36:09 +0100 Subject: [PATCH 0264/1914] DPL: enable early forwarding for AODs (#14088) Should improve parallelism for long trains. Requires FairMQ 1.9.2 and one needs to pass `--early-forwarding-policy always` for this to take effect. --- .../Framework/CompletionPolicyHelpers.h | 5 ++++ Framework/Core/include/Framework/InputSpan.h | 15 +++++++++++- Framework/Core/src/CompletionPolicy.cxx | 4 ++++ .../Core/src/CompletionPolicyHelpers.cxx | 18 ++++++++++++++ Framework/Core/src/DataProcessingDevice.cxx | 15 +++++++++++- Framework/Core/src/DataRelayer.cxx | 24 +++++++++++++++++-- Framework/Core/src/InputSpan.cxx | 7 ++++-- .../Core/test/test_InputRecordWalker.cxx | 2 +- Framework/Core/test/test_InputSpan.cxx | 2 +- Framework/Utils/test/RawPageTestData.h | 6 +++-- 10 files changed, 88 insertions(+), 10 deletions(-) diff --git a/Framework/Core/include/Framework/CompletionPolicyHelpers.h b/Framework/Core/include/Framework/CompletionPolicyHelpers.h index 547add44560ea..aa336d040d30d 100644 --- a/Framework/Core/include/Framework/CompletionPolicyHelpers.h +++ b/Framework/Core/include/Framework/CompletionPolicyHelpers.h @@ -43,6 +43,11 @@ struct CompletionPolicyHelpers { /// When any of the parts of the record have been received, consume them. static CompletionPolicy consumeWhenAny(const char* name, CompletionPolicy::Matcher matcher); + +#if __has_include() + /// When any of the parts which has arrived has a refcount of 1. + static CompletionPolicy consumeWhenAnyZeroCount(const char* name, CompletionPolicy::Matcher matcher); +#endif /// Default matcher applies for all devices static CompletionPolicy consumeWhenAny(CompletionPolicy::Matcher matcher = [](auto const&) -> bool { return true; }) { diff --git a/Framework/Core/include/Framework/InputSpan.h b/Framework/Core/include/Framework/InputSpan.h index c435276c7134f..cf8c8acda6796 100644 --- a/Framework/Core/include/Framework/InputSpan.h +++ b/Framework/Core/include/Framework/InputSpan.h @@ -46,7 +46,7 @@ class InputSpan /// index and the buffer associated. /// @nofPartsGetter is the getter for the number of parts associated with an index /// @a size is the number of elements in the span. - InputSpan(std::function getter, std::function nofPartsGetter, size_t size); + InputSpan(std::function getter, std::function nofPartsGetter, std::function refCountGetter, size_t size); /// @a i-th element of the InputSpan [[nodiscard]] DataRef get(size_t i, size_t partidx = 0) const @@ -66,6 +66,18 @@ class InputSpan return mNofPartsGetter(i); } + // Get the refcount for a given part + [[nodiscard]] int getRefCount(size_t i) const + { + if (i >= mSize) { + return 0; + } + if (!mRefCountGetter) { + return -1; + } + return mRefCountGetter(i); + } + /// Number of elements in the InputSpan [[nodiscard]] size_t size() const { @@ -236,6 +248,7 @@ class InputSpan private: std::function mGetter; std::function mNofPartsGetter; + std::function mRefCountGetter; size_t mSize; }; diff --git a/Framework/Core/src/CompletionPolicy.cxx b/Framework/Core/src/CompletionPolicy.cxx index 9d92fd07e6f5a..ec8997e32c5db 100644 --- a/Framework/Core/src/CompletionPolicy.cxx +++ b/Framework/Core/src/CompletionPolicy.cxx @@ -26,7 +26,11 @@ std::vector { return { CompletionPolicyHelpers::consumeWhenAllOrdered("internal-dpl-aod-writer"), +#if __has_include() + CompletionPolicyHelpers::consumeWhenAnyZeroCount("internal-dpl-injected-dummy-sink", [](DeviceSpec const& s) { return s.name.find("internal-dpl-injected-dummy-sink") != std::string::npos; }), +#else CompletionPolicyHelpers::consumeWhenAny("internal-dpl-injected-dummy-sink", [](DeviceSpec const& s) { return s.name.find("internal-dpl-injected-dummy-sink") != std::string::npos; }), +#endif CompletionPolicyHelpers::consumeWhenAll()}; } diff --git a/Framework/Core/src/CompletionPolicyHelpers.cxx b/Framework/Core/src/CompletionPolicyHelpers.cxx index 9dd895a6fed6d..e682f9a7c7dd6 100644 --- a/Framework/Core/src/CompletionPolicyHelpers.cxx +++ b/Framework/Core/src/CompletionPolicyHelpers.cxx @@ -19,6 +19,9 @@ #include "Framework/TimingInfo.h" #include "DecongestionService.h" #include "Framework/Signpost.h" +#if __has_include() +#include +#endif #include #include @@ -249,6 +252,21 @@ CompletionPolicy CompletionPolicyHelpers::consumeExistingWhenAny(const char* nam }}; } +#if __has_include() +CompletionPolicy CompletionPolicyHelpers::consumeWhenAnyZeroCount(const char* name, CompletionPolicy::Matcher matcher) +{ + auto callback = [](InputSpan const& inputs, std::vector const&, ServiceRegistryRef& ref) -> CompletionPolicy::CompletionOp { + for (size_t i = 0; i < inputs.size(); ++i) { + if (inputs.get(i).header != nullptr && inputs.getRefCount(i) == 1) { + return CompletionPolicy::CompletionOp::Consume; + } + } + return CompletionPolicy::CompletionOp::Wait; + }; + return CompletionPolicy{name, matcher, callback, false}; +} +#endif + CompletionPolicy CompletionPolicyHelpers::consumeWhenAny(const char* name, CompletionPolicy::Matcher matcher) { auto callback = [](InputSpan const& inputs, std::vector const&, ServiceRegistryRef& ref) -> CompletionPolicy::CompletionOp { diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index 7f42805cfdb1e..ae25d8d3a915c 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -57,6 +57,9 @@ #include #include #include +#if __has_include() +#include +#endif #include #include #include @@ -1214,12 +1217,14 @@ void DataProcessingDevice::fillContext(DataProcessorContext& context, DeviceCont if (forwarded.matcher.lifetime != Lifetime::Condition) { onlyConditions = false; } +#if !__has_include() if (strncmp(DataSpecUtils::asConcreteOrigin(forwarded.matcher).str, "AOD", 3) == 0) { context.canForwardEarly = false; overriddenEarlyForward = true; LOG(detail) << "Cannot forward early because of AOD input: " << DataSpecUtils::describe(forwarded.matcher); break; } +#endif if (DataSpecUtils::partialMatch(forwarded.matcher, o2::header::DataDescription{"RAWDATA"}) && mProcessingPolicies.earlyForward == EarlyForwardPolicy::NORAW) { context.canForwardEarly = false; overriddenEarlyForward = true; @@ -2230,7 +2235,15 @@ bool DataProcessingDevice::tryDispatchComputation(ServiceRegistryRef ref, std::v auto nofPartsGetter = [¤tSetOfInputs](size_t i) -> size_t { return currentSetOfInputs[i].getNumberOfPairs(); }; - return InputSpan{getter, nofPartsGetter, currentSetOfInputs.size()}; +#if __has_include() + auto refCountGetter = [¤tSetOfInputs](size_t idx) -> int { + auto& header = static_cast(*currentSetOfInputs[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + return InputSpan{getter, nofPartsGetter, refCountGetter, currentSetOfInputs.size()}; }; auto markInputsAsDone = [ref](TimesliceSlot slot) -> void { diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index 385d9a6c50c4a..f30866dc0aa1b 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -44,6 +44,10 @@ #include #include +#include +#if __has_include() +#include +#endif #include #include #include @@ -209,7 +213,15 @@ DataRelayer::ActivityStats DataRelayer::processDanglingInputs(std::vector(partial.size())}; +#if __has_include() + auto refCountGetter = [&partial](size_t idx) -> int { + auto& header = static_cast(*partial[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + InputSpan span{getter, nPartsGetter, refCountGetter, static_cast(partial.size())}; // Setup the input span if (expirator.checker(services, timestamp.value, span) == false) { @@ -755,7 +767,15 @@ void DataRelayer::getReadyToProcess(std::vector& comp auto nPartsGetter = [&partial](size_t idx) { return partial[idx].size(); }; - InputSpan span{getter, nPartsGetter, static_cast(partial.size())}; +#if __has_include() + auto refCountGetter = [&partial](size_t idx) -> int { + auto& header = static_cast(*partial[idx].header(0)); + return header.GetRefCount(); + }; +#else + std::function refCountGetter = nullptr; +#endif + InputSpan span{getter, nPartsGetter, refCountGetter, static_cast(partial.size())}; CompletionPolicy::CompletionOp action = mCompletionPolicy.callbackFull(span, mInputs, mContext); auto& variables = mTimesliceIndex.getVariablesForSlot(slot); diff --git a/Framework/Core/src/InputSpan.cxx b/Framework/Core/src/InputSpan.cxx index 510b55cd0b9b9..d1dffc85602a5 100644 --- a/Framework/Core/src/InputSpan.cxx +++ b/Framework/Core/src/InputSpan.cxx @@ -29,8 +29,11 @@ InputSpan::InputSpan(std::function getter, size_t size) { } -InputSpan::InputSpan(std::function getter, std::function nofPartsGetter, size_t size) - : mGetter{getter}, mNofPartsGetter{nofPartsGetter}, mSize{size} +InputSpan::InputSpan(std::function getter, + std::function nofPartsGetter, + std::function refCountGetter, + size_t size) + : mGetter{getter}, mNofPartsGetter{nofPartsGetter}, mRefCountGetter(refCountGetter), mSize{size} { } diff --git a/Framework/Core/test/test_InputRecordWalker.cxx b/Framework/Core/test/test_InputRecordWalker.cxx index 5b9004a1a9366..9af3c0dd2dbe2 100644 --- a/Framework/Core/test/test_InputRecordWalker.cxx +++ b/Framework/Core/test/test_InputRecordWalker.cxx @@ -42,7 +42,7 @@ struct DataSet { auto payload = static_cast(this->messages[i].second.at(2 * part + 1)->data()); return DataRef{nullptr, header, payload}; }, - [this](size_t i) { return i < this->messages.size() ? messages[i].second.size() / 2 : 0; }, this->messages.size()}, + [this](size_t i) { return i < this->messages.size() ? messages[i].second.size() / 2 : 0; }, nullptr, this->messages.size()}, record{schema, span, registry}, values{std::move(v)} { diff --git a/Framework/Core/test/test_InputSpan.cxx b/Framework/Core/test/test_InputSpan.cxx index 0622ad898d249..c5682aea80b6c 100644 --- a/Framework/Core/test/test_InputSpan.cxx +++ b/Framework/Core/test/test_InputSpan.cxx @@ -37,7 +37,7 @@ TEST_CASE("TestInputSpan") return inputs[i].size() / 2; }; - InputSpan span{getter, nPartsGetter, inputs.size()}; + InputSpan span{getter, nPartsGetter, nullptr, inputs.size()}; REQUIRE(span.size() == inputs.size()); routeNo = 0; for (; routeNo < span.size(); ++routeNo) { diff --git a/Framework/Utils/test/RawPageTestData.h b/Framework/Utils/test/RawPageTestData.h index 684fc4d0cf8a3..a6b800f7cba32 100644 --- a/Framework/Utils/test/RawPageTestData.h +++ b/Framework/Utils/test/RawPageTestData.h @@ -47,7 +47,9 @@ struct DataSet { auto payload = static_cast(this->messages[i].at(2 * part + 1)->data()); return DataRef{nullptr, header, payload}; }, - [this](size_t i) { return i < this->messages.size() ? messages[i].size() / 2 : 0; }, this->messages.size()}, + [this](size_t i) { return i < this->messages.size() ? messages[i].size() / 2 : 0; }, + nullptr, + this->messages.size()}, record{schema, span, registry}, values{std::move(v)} { @@ -63,5 +65,5 @@ struct DataSet { using AmendRawDataHeader = std::function; DataSet createData(std::vector const& inputspecs, std::vector const& dataheaders, AmendRawDataHeader amendRdh = nullptr); -} // namespace o2::framework +} // namespace o2::framework::test #endif // FRAMEWORK_UTILS_RAWPAGETESTDATA_H From 7263f799ce8c3b660d0595fb95efeaef9b8c2f6c Mon Sep 17 00:00:00 2001 From: Matthias Kleiner Date: Thu, 20 Mar 2025 09:17:59 +0100 Subject: [PATCH 0265/1914] TPC: Adding check for empty IDCs --- .../include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h index 5fe0d6a442dd5..35f51dd489115 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFourierTransformAggregatorSpec.h @@ -257,6 +257,11 @@ class TPCFourierTransformAggregatorSpec : public o2::framework::Task void makeTPCScaler(DataAllocator& output, const bool eos) { LOGP(info, "Making TPC scalers"); + if (mTPCScalerCont.idcs.empty()) { + LOGP(warning, "No IDCs received for TPC scaler creation"); + return; + } + // check if IDC scalers can be created - check length of continous received IDCs std::vector> times; times.reserve(mTPCScalerCont.idcs.size()); From 42d5ae19a64f14c858496dd77dd1f6b5520c3415 Mon Sep 17 00:00:00 2001 From: swenzel Date: Thu, 20 Mar 2025 15:07:16 +0100 Subject: [PATCH 0266/1914] remove deprecated confkey value --- Common/SimConfig/include/SimConfig/SimParams.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Common/SimConfig/include/SimConfig/SimParams.h b/Common/SimConfig/include/SimConfig/SimParams.h index 2c103f43b2b04..cf3ee2b01cf2e 100644 --- a/Common/SimConfig/include/SimConfig/SimParams.h +++ b/Common/SimConfig/include/SimConfig/SimParams.h @@ -36,7 +36,6 @@ struct SimCutParams : public o2::conf::ConfigurableParamHelper { float maxRTrackingZDC = 50; // R-cut applied in the tunnel leading to ZDC when z > beampipeZ (custom stepping function) float tunnelZ = 1900; // Z-value from where we apply maxRTrackingZDC (default value taken from standard "hall" dimensions) - float globalDensityFactor = 1.f; // global factor that scales all material densities for systematic studies bool lowneut = false; O2ParamDef(SimCutParams, "SimCutParams"); }; @@ -44,7 +43,7 @@ struct SimCutParams : public o2::conf::ConfigurableParamHelper { // parameter influencing material manager struct SimMaterialParams : public o2::conf::ConfigurableParamHelper { // Local density value takes precedence over global density value, i.e. local values overwrite the global value. - float globalDensityFactor = 1.f; + float globalDensityFactor = 1.f; // global factor that scales all material densities for systematic studies std::string localDensityFactor; // Expected format: "SimMaterialParams.localDensityFactor=:,:,..." O2ParamDef(SimMaterialParams, "SimMaterialParams"); From 97aeb5f7be62d251c196ea9c2d354b119bb8f29c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 22 Mar 2025 02:26:50 +0100 Subject: [PATCH 0267/1914] Common: allow to write c-style arrays in TreeStream Signed-off-by: Felix Schlepper --- Common/Utils/include/CommonUtils/TreeStream.h | 162 +++++++++--------- Common/Utils/src/TreeStream.cxx | 38 +++- Common/Utils/test/testTreeStream.cxx | 42 ++++- 3 files changed, 148 insertions(+), 94 deletions(-) diff --git a/Common/Utils/include/CommonUtils/TreeStream.h b/Common/Utils/include/CommonUtils/TreeStream.h index 2aa02f6509d2c..d1d4527ffc99d 100644 --- a/Common/Utils/include/CommonUtils/TreeStream.h +++ b/Common/Utils/include/CommonUtils/TreeStream.h @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include "GPUCommonDef.h" class TBranch; @@ -39,10 +41,79 @@ namespace utils /// /// See testTreeStream.cxx for functional example /// +namespace details +{ +template +struct IsTrivialRootType { + static constexpr bool value = + std::is_same_v || // Float_t + std::is_same_v || // Double_t + std::is_same_v || std::is_same_v || // ULong64_t or ULong_t + std::is_same_v || std::is_same_v || // Long64_t or Long_t + std::is_same_v || // UInt_t + std::is_same_v || // Int_t + std::is_same_v || // UShort_t + std::is_same_v || // Short_t + std::is_same_v || // UChar_t + std::is_same_v || std::is_same_v || std::is_same_v; // Char_t, int8_t, or Bool_t +}; + +template +struct IsTrivialRootType { + static constexpr bool value = IsTrivialRootType::value; +}; + +template +struct IsTrivialRootType { + static constexpr bool value = IsTrivialRootType::value; +}; + +template +concept TrivialRootType = IsTrivialRootType::value; + +template +concept ComplexRootType = !IsTrivialRootType::value; + +template +static constexpr char getRootTypeCode() +{ + if constexpr (std::is_array_v) { + return getRootTypeCode>(); + } else if constexpr (std::is_same_v) { + return 'F'; + } else if constexpr (std::is_same_v) { + return 'D'; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return 'l'; + } else if constexpr (std::is_same_v || + std::is_same_v) { + return 'L'; + } else if constexpr (std::is_same_v) { + return 'i'; + } else if constexpr (std::is_same_v) { + return 'I'; + } else if constexpr (std::is_same_v) { + return 's'; + } else if constexpr (std::is_same_v) { + return 'S'; + } else if constexpr (std::is_same_v) { + return 'b'; + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v) { + return 'B'; + } else { + static_assert(false, "unsupported type!"); + } +} +} // namespace details + class TreeStream { public: struct TreeDataElement { + int arsize = 1; ///< size of array char type = 0; ///< type of data element const TClass* cls = nullptr; ///< data type pointer const void* ptr = nullptr; ///< pointer to element @@ -64,87 +135,10 @@ class TreeStream void setID(int id) { mID = id; } int getID() const { return mID; } - TreeStream& operator<<(const Bool_t& b) - { - CheckIn('B', &b); - return *this; - } - - TreeStream& operator<<(const Char_t& c) - { - CheckIn('B', &c); - return *this; - } - - TreeStream& operator<<(const int8_t& i) - { - CheckIn('B', &i); - return *this; - } - - TreeStream& operator<<(const UChar_t& c) - { - CheckIn('b', &c); - return *this; - } - - TreeStream& operator<<(const Short_t& h) - { - CheckIn('S', &h); - return *this; - } - - TreeStream& operator<<(const UShort_t& h) - { - CheckIn('s', &h); - return *this; - } - - TreeStream& operator<<(const Int_t& i) - { - CheckIn('I', &i); - return *this; - } - - TreeStream& operator<<(const UInt_t& i) - { - CheckIn('i', &i); - return *this; - } - - TreeStream& operator<<(const Long_t& l) - { - CheckIn('L', &l); - return *this; - } - - TreeStream& operator<<(const ULong_t& l) - { - CheckIn('l', &l); - return *this; - } - - TreeStream& operator<<(const Long64_t& l) - { - CheckIn('L', &l); - return *this; - } - - TreeStream& operator<<(const ULong64_t& l) - { - CheckIn('l', &l); - return *this; - } - - TreeStream& operator<<(const Float_t& f) - { - CheckIn('F', &f); - return *this; - } - - TreeStream& operator<<(const Double_t& d) + template + TreeStream& operator<<(const T& t) { - CheckIn('D', &d); + CheckIn(details::getRootTypeCode(), &t); return *this; } @@ -157,7 +151,7 @@ class TreeStream return *this; } - template ::value, bool>::type* = nullptr> + template ::value, bool>::type* = nullptr> TreeStream& operator<<(const T& obj) { CheckIn(&obj); @@ -175,6 +169,7 @@ class TreeStream int mCurrentIndex = 0; ///< index of current element int mID = -1; ///< identifier of layout int mNextNameCounter = 0; ///< next name counter + int mNextArraySize = 0; ///< next array size int mStatus = 0; ///< status of the layout TString mNextName; ///< name for next entry @@ -191,8 +186,7 @@ Int_t TreeStream::CheckIn(const T* obj) } if (mCurrentIndex >= static_cast(mElements.size())) { - mElements.emplace_back(); - auto& element = mElements.back(); + auto& element = mElements.emplace_back(); element.cls = pClass; TString name = mNextName; if (name.Length()) { @@ -204,6 +198,8 @@ Int_t TreeStream::CheckIn(const T* obj) } element.name = name.Data(); element.ptr = obj; + element.arsize = mNextArraySize; + mNextArraySize = 1; // reset } else { auto& element = mElements[mCurrentIndex]; if (!element.cls) { diff --git a/Common/Utils/src/TreeStream.cxx b/Common/Utils/src/TreeStream.cxx index da25f25ad2eb1..cd0641a11d043 100644 --- a/Common/Utils/src/TreeStream.cxx +++ b/Common/Utils/src/TreeStream.cxx @@ -29,8 +29,7 @@ int TreeStream::CheckIn(Char_t type, const void* pointer) // Insert object if (mCurrentIndex >= static_cast(mElements.size())) { - mElements.emplace_back(); - auto& element = mElements.back(); + auto& element = mElements.emplace_back(); element.type = type; TString name = mNextName; if (name.Length()) { @@ -42,6 +41,8 @@ int TreeStream::CheckIn(Char_t type, const void* pointer) } element.name = name.Data(); element.ptr = pointer; + element.arsize = mNextArraySize; + mNextArraySize = 1; // reset } else { auto& element = mElements[mCurrentIndex]; if (element.type != type) { @@ -89,7 +90,13 @@ void TreeStream::BuildTree() } if (element.type > 0) { - TString nameC = TString::Format("%s/%c", name.Data(), element.type); + TString nameC; + if (element.arsize > 1) { + nameC = TString::Format("%s[%d]/%c", name.Data(), element.arsize, + element.type); + } else { + nameC = TString::Format("%s/%c", name.Data(), element.type); + } br = mTree.Branch(name.Data(), const_cast(element.ptr), nameC.Data()); if (entriesFilled) { br->SetAddress(nullptr); @@ -148,28 +155,43 @@ TreeStream& TreeStream::Endl() TreeStream& TreeStream::operator<<(const Char_t* name) { // Stream the branch name - // if (name[0] == '\n') { return Endl(); } - // + // if tree was already defined ignore if (mTree.GetEntries() > 0) { return *this; } + + int arsize = 1; + // check branch name if tree was not - // Int_t last = 0; for (last = 0;; last++) { if (name[last] == 0) { break; } } - if (last > 0 && name[last - 1] == '=') { mNextName = name; - mNextName[last - 1] = 0; + mNextName[last - 1] = 0; // remove '=' from string mNextNameCounter = 0; + + TString inName{name}; + auto brkStaPos = inName.Index('['); + + if (brkStaPos != kNPOS) { + auto brkEndPos = inName.Index(']'); + if (brkEndPos != kNPOS && brkEndPos > brkStaPos + 1) { + TString size = inName(brkStaPos + 1, brkEndPos - brkStaPos - 1); + arsize = size.Atoi(); + mNextName = inName(0, brkStaPos); // use parsed name + } + } } + + mNextArraySize = arsize; + return *this; } diff --git a/Common/Utils/test/testTreeStream.cxx b/Common/Utils/test/testTreeStream.cxx index 7ff6f165a1d37..2491fea7f6efd 100644 --- a/Common/Utils/test/testTreeStream.cxx +++ b/Common/Utils/test/testTreeStream.cxx @@ -53,12 +53,28 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) tstStream << "TrackTreeR" << "id=" << i << "x=" << x << "track=" << trc << "\n"; } + + // test for c-arrays + int iArray[6] = {1, 2, 3, 4, 5, 6}; + float fArray[6] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f}; + for (int i{0}; i < nit; ++i) { + for (int j{0}; j < 6; ++j) { + iArray[j] += i; + fArray[j] += (float)i; + } + tstStream << "ArrayTree" + << "id=" << i + << "iArray[6]=" << iArray + << "fArray[6]=" << fArray + << "\n"; + } + // on destruction of tstTreem the trees will be stored, but we can also force it by tstStream.Close(); } // - LOG(info) << "Testing reading back tree maid by the TreeStream "; - // read back tracks + LOG(info) << "Testing reading back tree made by the TreeStream "; + // read back tracks and arrays { TFile inpf(outFName.data()); BOOST_CHECK(!inpf.IsZombie()); @@ -80,6 +96,27 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) trc->printParam(); BOOST_CHECK(std::abs(x - trc->getX()) < 1e-4); } + + // check arrays + tree = (TTree*)inpf.GetObjectChecked("ArrayTree", "TTree"); + BOOST_CHECK(tree); + nent = tree->GetEntries(); + BOOST_CHECK(nent == nit); + int iArray[6]; + float fArray[6]; + BOOST_CHECK(!tree->SetBranchAddress("id", &id)); + BOOST_CHECK(!tree->SetBranchAddress("iArray", iArray)); + BOOST_CHECK(!tree->SetBranchAddress("fArray", fArray)); + for (int i = 0; i < nit; i++) { + BOOST_CHECK(tree->GetEntry(i) > 0); + BOOST_CHECK(id == i); + for (int j = 0; j < 6; j++) { + BOOST_CHECK(iArray[j] == (1 + j + i * (i + 1) / 2)); + } + for (int j = 0; j < 6; j++) { + BOOST_CHECK_CLOSE(fArray[j], (1.f + j + i * (i + 1) / 2.f + 0.1 * (j + 1)), 1e-5); + } + } } LOG(info) << "Testing loading tree via RootChain"; @@ -104,7 +141,6 @@ BOOST_AUTO_TEST_CASE(TreeStream_test) nit = 1000; BOOST_CHECK(UnitTestSparse(0.5, nit)); BOOST_CHECK(UnitTestSparse(0.1, nit)); - // } //_________________________________________________ From 6e8b562d7164cc2dd9892ac9a56847e98ef04cab Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sat, 22 Mar 2025 15:12:57 +0100 Subject: [PATCH 0268/1914] DPL: cleanup creation of DataProcessorInfo (#14096) - Use aggregate initialisation where possible - Drop unused bits Will simplify the plugins PR. --- Framework/Core/test/test_DataAllocator.cxx | 1 - .../Core/test/test_DeviceSpecHelpers.cxx | 31 +++++++++---------- .../Core/test/test_FrameworkDataFlowToDDS.cxx | 16 +++++----- .../test_FrameworkDataFlowToO2Control.cxx | 8 ++--- .../Core/test/test_WorkflowSerialization.cxx | 10 +++--- 5 files changed, 32 insertions(+), 34 deletions(-) diff --git a/Framework/Core/test/test_DataAllocator.cxx b/Framework/Core/test/test_DataAllocator.cxx index acdae51cab8e9..fefb6438b98d5 100644 --- a/Framework/Core/test/test_DataAllocator.cxx +++ b/Framework/Core/test/test_DataAllocator.cxx @@ -73,7 +73,6 @@ DataProcessorSpec getSourceSpec() { static_assert(enable_root_serialization::value, "enable_root_serialization must be true"); auto processingFct = [](ProcessingContext& pc) { - static int counter = 0; o2::test::TriviallyCopyable a(42, 23, 0xdead); o2::test::Polymorphic b(0xbeef); std::vector c{{0xaffe}, {0xd00f}}; diff --git a/Framework/Core/test/test_DeviceSpecHelpers.cxx b/Framework/Core/test/test_DeviceSpecHelpers.cxx index 6240e784d09d3..4184be848c5ec 100644 --- a/Framework/Core/test/test_DeviceSpecHelpers.cxx +++ b/Framework/Core/test/test_DeviceSpecHelpers.cxx @@ -16,7 +16,6 @@ #include "Framework/DriverConfig.h" #include "../src/DeviceSpecHelpers.h" #include -#include #include #include #include @@ -67,10 +66,10 @@ void check(const std::vector& arguments, std::vector dataProcessorInfos; for (auto& [name, _] : matrix) { dataProcessorInfos.push_back(DataProcessorInfo{ - name, - "executable-name", - arguments, - workflowOptions, + .name = name, + .executable = "executable-name", + .cmdLineArgs = arguments, + .workflowOptions = workflowOptions, }); } DriverConfig driverConfig{}; @@ -184,7 +183,7 @@ TEST_CASE("CheckOptionReworking") { { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, {}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[1] == "foo"); @@ -192,30 +191,30 @@ TEST_CASE("CheckOptionReworking") } { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, - {{}, {}, {"--driver-client-backend", "bar"}}}; + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "bar"}}}; REQUIRE_THROWS_AS(DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"), o2::framework::RuntimeErrorRef); } { std::vector infos = { - {{}, {}, {"--driver-client-backend", "foo"}}, - {{}, {}, {"--driver-client-backend", "foo"}}}; + {.cmdLineArgs = {"--driver-client-backend", "foo"}}, + {.cmdLineArgs = {"--driver-client-backend", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[1] == "foo"); REQUIRE(infos[1].cmdLineArgs[1] == "foo"); } { std::vector infos = { - {{}, {}, {"foo", "bar"}}, - {{}, {}, {"fnjcnak", "foo"}}}; + {.cmdLineArgs = {"foo", "bar"}}, + {.cmdLineArgs = {"fnjcnak", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[3] == "stdout://"); REQUIRE(infos[1].cmdLineArgs[3] == "stdout://"); } { std::vector infos = { - {{}, {}, {"foo", "bar", "--driver-client-backend", "bar"}}, - {{}, {}, {"fnjcnak", "foo"}}}; + {.cmdLineArgs = {"foo", "bar", "--driver-client-backend", "bar"}}, + {.cmdLineArgs = {"fnjcnak", "foo"}}}; DeviceSpecHelpers::reworkHomogeneousOption(infos, "--driver-client-backend", "stdout://"); REQUIRE(infos[0].cmdLineArgs[3] == "bar"); REQUIRE(infos[1].cmdLineArgs[3] == "bar"); @@ -277,8 +276,8 @@ TEST_CASE("CheckIntegerReworking") } { std::vector infos = { - {{}, {}, {"foo", "bar", "--readers", "3"}}, - {{}, {}, {"--readers", "2"}}}; + {.cmdLineArgs = {"foo", "bar", "--readers", "3"}}, + {.cmdLineArgs = {"--readers", "2"}}}; DeviceSpecHelpers::reworkIntegerOption( infos, "--readers", []() { return 1; }, 1, [](long long x, long long y) { return x > y ? x : y; }); REQUIRE(infos[0].cmdLineArgs.size() == 4); diff --git a/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx b/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx index 593728696e77a..dd3b2eb80d253 100644 --- a/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx +++ b/Framework/Core/test/test_FrameworkDataFlowToDDS.cxx @@ -142,10 +142,10 @@ TEST_CASE("TestDDS") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig = { .batch = true, @@ -406,10 +406,10 @@ TEST_CASE("TestDDSExpendable") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig = { .batch = true, diff --git a/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx b/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx index d5f402aa16caa..9cdbc357f9674 100644 --- a/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx +++ b/Framework/Core/test/test_FrameworkDataFlowToO2Control.cxx @@ -561,10 +561,10 @@ TEST_CASE("TestO2ControlDump") std::vector dataProcessorInfos = { { - {"A", "bcsadc/foo", {}, workflowOptions}, - {"B", "foo", {}, workflowOptions}, - {"C", "foo", {}, workflowOptions}, - {"D", "foo", {}, workflowOptions}, + {.name = "A", .executable = "bcsadc/foo", .workflowOptions = workflowOptions}, + {.name = "B", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "C", .executable = "foo", .workflowOptions = workflowOptions}, + {.name = "D", .executable = "foo", .workflowOptions = workflowOptions}, }}; DriverConfig driverConfig{ diff --git a/Framework/Core/test/test_WorkflowSerialization.cxx b/Framework/Core/test/test_WorkflowSerialization.cxx index 6e541f7d22f07..298956970713d 100644 --- a/Framework/Core/test/test_WorkflowSerialization.cxx +++ b/Framework/Core/test/test_WorkflowSerialization.cxx @@ -52,10 +52,10 @@ TEST_CASE("TestVerifyWorkflowSerialization") {{"key1", "v\"al'1"}, {"", "val2"}, {"key3", ""}, {"", ""}}}}; std::vector dataProcessorInfoOut{ - {"A", "test_Framework_test_SerializationWorkflow", {"foo"}, {ConfigParamSpec{"aBool", VariantType::Bool, true, {"A Bool"}}}}, - {"B", "test_Framework_test_SerializationWorkflow", {"b-bar", "bfoof", "fbdbfaso"}}, - {"C", "test_Framework_test_SerializationWorkflow", {}}, - {"D", "test_Framework_test_SerializationWorkflow", {}}, + {.name = "A", .executable = "test_Framework_test_SerializationWorkflow", .cmdLineArgs = {"foo"}, .workflowOptions = {ConfigParamSpec{"aBool", VariantType::Bool, true, {"A Bool"}}}}, + {.name = "B", .executable = "test_Framework_test_SerializationWorkflow", .cmdLineArgs = {"b-bar", "bfoof", "fbdbfaso"}}, + {.name = "C", .executable = "test_Framework_test_SerializationWorkflow"}, + {.name = "D", .executable = "test_Framework_test_SerializationWorkflow"}, }; CommandInfo commandInfoOut{"o2-dpl-workflow -b --option 1 --option 2"}; @@ -94,7 +94,7 @@ TEST_CASE("TestVerifyWildcard") }}; std::vector dataProcessorInfoOut{ - {"A", "test_Framework_test_SerializationWorkflow", {}}, + {.name = "A", .executable = "test_Framework_test_SerializationWorkflow"}, }; CommandInfo commandInfoOut{"o2-dpl-workflow -b --option 1 --option 2"}; From f882cd3f686a6dba7bc0993aca3e936b63f8cd96 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 20 Mar 2025 18:57:56 +0100 Subject: [PATCH 0269/1914] GPU Standalone: Modernize CMake, get rid of legacy include_directories(...) and add_definitions(...) --- GPU/GPUTracking/CMakeLists.txt | 54 ++++--- .../Standalone/Benchmark/CMakeLists.txt | 2 + GPU/GPUTracking/Standalone/CMakeLists.txt | 150 ++++++++---------- GPU/TPCFastTransformation/CMakeLists.txt | 2 + 4 files changed, 98 insertions(+), 110 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 5ce96d450f765..e69e11c91d157 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -262,6 +262,27 @@ string(REPLACE ".cxx" ".h" HDRS_TMP "${SRCS_DATATYPES}") set(HDRS_CINT_DATATYPES ${HDRS_CINT_DATATYPES} ${HDRS_TMP}) unset(HDRS_TMP) +set(INCDIRS + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/Definitions + ${CMAKE_CURRENT_SOURCE_DIR}/DataTypes + ${CMAKE_CURRENT_SOURCE_DIR}/Base + ${CMAKE_CURRENT_SOURCE_DIR}/SectorTracker + ${CMAKE_CURRENT_SOURCE_DIR}/TPCConvert + ${CMAKE_CURRENT_SOURCE_DIR}/dEdx + ${CMAKE_CURRENT_SOURCE_DIR}/ITS + ${CMAKE_CURRENT_SOURCE_DIR}/TRDTracking + ${CMAKE_CURRENT_SOURCE_DIR}/qa + ${CMAKE_CURRENT_SOURCE_DIR}/Global + ${CMAKE_CURRENT_SOURCE_DIR}/HLTHeaders + ${CMAKE_CURRENT_SOURCE_DIR}/Interface + ${CMAKE_CURRENT_SOURCE_DIR}/Merger + ${CMAKE_CURRENT_SOURCE_DIR}/Refit + ${CMAKE_CURRENT_SOURCE_DIR}/Debug + ${CMAKE_CURRENT_SOURCE_DIR}/DataCompression + ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder + ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) + # Main CMake part for O2 if(ALIGPU_BUILD_TYPE STREQUAL "O2") o2_add_library(GPUDataTypes @@ -297,25 +318,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PUBLIC_INCLUDE_DIRECTORIES . - Definitions - DataTypes - Base - SectorTracker - TPCConvert - dEdx - ITS - TRDTracking - qa - Global - HLTHeaders - Interface - Merger - Refit - Debug - DataCompression - TPCClusterFinder - ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly + PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) target_include_directories( @@ -349,15 +352,18 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H} ${SRCS_DATATYPES}) set(targetName ${MODULE}) add_library(O2::${MODULE} ALIAS ${MODULE}) - install(TARGETS ${MODULE}) + install(TARGETS ${targetName}) + target_link_libraries(${targetName} PUBLIC TPCFastTransformation) + target_include_directories(${targetName} PUBLIC ${INCDIRS}) if(GPUCA_CONFIG_ROOT) - ROOT_GENERATE_DICTIONARY(G__${MODULE} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} GPUTrackingLinkDef_Standalone.h) - target_sources(${MODULE} PRIVATE G__${MODULE}) + ROOT_GENERATE_DICTIONARY(G__${targetName} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_CINT_O2_ADDITIONAL} GPUTrackingLinkDef_Standalone.h MODULE ${targetName}) INSTALL(FILES - ${CMAKE_CURRENT_BINARY_DIR}/lib${MODULE}_rdict.pcm - ${CMAKE_CURRENT_BINARY_DIR}/lib${MODULE}.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/lib${targetName}_rdict.pcm + ${CMAKE_CURRENT_BINARY_DIR}/lib${targetName}.rootmap DESTINATION .) + else() + target_compile_definitions(${targetName} PRIVATE GPUCA_NO_ROOT) endif() endif() diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index de2d683036193..e418d94b62cb2 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -27,6 +27,8 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_executable(ca ${SRCS}) set(targetName ca) + target_link_libraries(${targetName} PUBLIC GPUTracking) + endif() target_compile_definitions(${targetName} PUBLIC $) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 5ed511173f53e..cf5a447c26669 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -80,7 +80,6 @@ if(GPUCA_CONFIG_VC) find_package(Vc REQUIRED) else() set(Vc_FOUND OFF) - add_definitions(-DGPUCA_NO_VC) endif() if(GPUCA_BUILD_EVENT_DISPLAY) @@ -111,118 +110,97 @@ else() set(OpenGL_FOUND OFF) endif() -if(GPUCA_CONFIG_O2) - add_definitions(-DGPUCA_TPC_GEOMETRY_O2) -endif() - if(GPUCA_CONFIG_ROOT) find_package(ROOT CONFIG REQUIRED) -else() - add_definitions(-DGPUCA_NO_ROOT) endif() find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") if(GPUCA_CONFIG_FMT) find_package(fmt REQUIRED HINTS $ENV{FMT_ROOT}) else() - add_definitions(-DGPUCA_NO_FMT) + set(fmt_FOUND OFF) endif() # Detect GPU Backends find_package(O2GPU) -# Global include directories -include_directories(${GPU_DIR}/Common - ${GPU_DIR}/Utils - ${GPU_DIR}/TPCFastTransformation - ${GPUTRACKING_DIR} - ${GPUTRACKING_DIR}/Debug - ${GPUTRACKING_DIR}/Definitions - ${GPUTRACKING_DIR}/DataTypes - ${GPUTRACKING_DIR}/Base - ${GPUTRACKING_DIR}/dEdx - ${GPUTRACKING_DIR}/TPCConvert - ${GPUTRACKING_DIR}/Global - ${GPUTRACKING_DIR}/HLTHeaders - ${GPUTRACKING_DIR}/Merger - ${GPUTRACKING_DIR}/Refit - ${GPUTRACKING_DIR}/qa - ${GPUTRACKING_DIR}/SectorTracker - ${GPUTRACKING_DIR}/DataCompression - ${GPUTRACKING_DIR}/TRDTracking - ${GPUTRACKING_DIR}/TPCClusterFinder - ${GPUTRACKING_DIR}/ITS - ${GPUTRACKING_DIR}/Interface - ${O2_DIR}/Common/Field/include - ${O2_DIR}/Common/Constants/include - ${O2_DIR}/Common/MathUtils/include - ${O2_DIR}/Common/Utils/include - ${O2_DIR}/DataFormats/common/include - ${O2_DIR}/DataFormats/Detectors/Common/include - ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include - ${O2_DIR}/DataFormats/Detectors/ITSMFT/ITS/include - ${O2_DIR}/DataFormats/Detectors/TOF/include - ${O2_DIR}/DataFormats/Detectors/TPC/include - ${O2_DIR}/DataFormats/Detectors/TRD/include - ${O2_DIR}/DataFormats/Headers/include - ${O2_DIR}/DataFormats/MemoryResources/include - ${O2_DIR}/DataFormats/Reconstruction/include - ${O2_DIR}/DataFormats/Reconstruction/src - ${O2_DIR}/DataFormats/simulation/include - ${O2_DIR}/Detectors/Base/include - ${O2_DIR}/Detectors/Base/src - ${O2_DIR}/Detectors/ITSMFT/common/base/include - ${O2_DIR}/Detectors/ITSMFT/ITS/base/include - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/include - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/cuda - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/hip - ${O2_DIR}/Detectors/Raw/include - ${O2_DIR}/Detectors/TOF/base/include - ${O2_DIR}/Detectors/TPC/base/include - ${O2_DIR}/Detectors/TRD/base/include - ${O2_DIR}/Detectors/TRD/base/src - ${O2_DIR}/Framework/Foundation/3rdparty/include) - # Create main targets add_subdirectory(../../ GPU) -add_library(standalone_support SHARED ${GPUTRACKING_DIR}/utils/EmptyFile.cxx) +add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx + ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx + ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx + ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx + ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx + ${O2_DIR}/Detectors/Base/src/Ray.cxx + ${O2_DIR}/Detectors/Base/src/Propagator.cxx + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) + target_compile_definitions(standalone_support PUBLIC $) +target_include_directories(standalone_support PUBLIC $) + +target_include_directories(standalone_support PUBLIC + ${GPU_DIR}/Common + ${GPU_DIR}/Utils + ${O2_DIR}/Common/Field/include + ${O2_DIR}/Common/Constants/include + ${O2_DIR}/Common/MathUtils/include + ${O2_DIR}/Common/Utils/include + ${O2_DIR}/DataFormats/common/include + ${O2_DIR}/DataFormats/Detectors/Common/include + ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include + ${O2_DIR}/DataFormats/Detectors/ITSMFT/ITS/include + ${O2_DIR}/DataFormats/Detectors/TOF/include + ${O2_DIR}/DataFormats/Detectors/TPC/include + ${O2_DIR}/DataFormats/Detectors/TRD/include + ${O2_DIR}/DataFormats/Headers/include + ${O2_DIR}/DataFormats/MemoryResources/include + ${O2_DIR}/DataFormats/Reconstruction/include + ${O2_DIR}/DataFormats/Reconstruction/src + ${O2_DIR}/DataFormats/simulation/include + ${O2_DIR}/Detectors/Base/include + ${O2_DIR}/Detectors/Base/src + ${O2_DIR}/Detectors/ITSMFT/common/base/include + ${O2_DIR}/Detectors/ITSMFT/ITS/base/include + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/include + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/cuda + ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/GPU/hip + ${O2_DIR}/Detectors/Raw/include + ${O2_DIR}/Detectors/TOF/base/include + ${O2_DIR}/Detectors/TPC/base/include + ${O2_DIR}/Detectors/TRD/base/include + ${O2_DIR}/Detectors/TRD/base/src + ${O2_DIR}/Framework/Foundation/3rdparty/include) target_link_libraries(standalone_support PUBLIC# dl pthread - Microsoft.GSL::GSL - TPCFastTransformation) -target_link_libraries(GPUTracking PUBLIC TPCFastTransformation standalone_support) -target_link_libraries(ca PUBLIC GPUTracking) - -# Add all sources and dependencies to to support based on Config File -target_sources(standalone_support PRIVATE - ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackLTIntegral.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx - ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx - ${O2_DIR}/Detectors/TRD/base/src/GeometryBase.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCylSet.cxx - ${O2_DIR}/Detectors/Base/src/MatLayerCyl.cxx - ${O2_DIR}/Detectors/Base/src/Ray.cxx - ${O2_DIR}/Detectors/Base/src/Propagator.cxx - ${O2_DIR}/Detectors/ITSMFT/ITS/tracking/src/Road.cxx) + Microsoft.GSL::GSL) + +target_link_libraries(GPUTracking PUBLIC standalone_support) +target_link_libraries(TPCFastTransformation PUBLIC standalone_support) + +if(GPUCA_CONFIG_O2) + target_compile_definitions(GPUTracking PRIVATE GPUCA_TPC_GEOMETRY_O2) +endif() if(GPUCA_CONFIG_FMT) target_link_libraries(standalone_support PUBLIC fmt::fmt) - target_link_libraries(TPCFastTransformation PUBLIC fmt::fmt) +else() + target_compile_definitions(standalone_support PUBLIC GPUCA_NO_FMT) endif() if(GPUCA_CONFIG_VC) target_link_libraries(standalone_support PUBLIC Vc::Vc) - target_link_libraries(TPCFastTransformation PUBLIC Vc::Vc) +else() + target_compile_definitions(standalone_support PUBLIC GPUCA_NO_VC) endif() if(GPUCA_CONFIG_ROOT) diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index 133bf35281b55..c7869467d15b5 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -119,6 +119,8 @@ endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_library(${MODULE} SHARED ${SRCS}) + set(targetName ${MODULE}) + target_include_directories(${targetName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) endif() install (FILES macro/TPCFastTransformInit.C From 4f0674122bb8903ad679dbc4e0456687857d9977 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:15:59 +0100 Subject: [PATCH 0270/1914] GPU CMake: remove unnecessary compile definitions --- GPU/Common/CMakeLists.txt | 2 -- GPU/Utils/CMakeLists.txt | 2 -- 2 files changed, 4 deletions(-) diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index bacf4454c39fd..efac4fc436758 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -37,8 +37,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC $ $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) - # cuda test, only compile if CUDA if(CUDA_ENABLED) o2_add_test(GPUsortCUDA NAME test_GPUsortCUDA diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index 01ca3eb59c029..bf23c792c2034 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -31,7 +31,5 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT} LINKDEF GPUUtilsLinkDef.h) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() From 66174ae4805cb78c453c771dd1b24221380c2093 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:31:19 +0100 Subject: [PATCH 0271/1914] GPU Standalone: Fix compilation without FMT --- GPU/GPUTracking/Definitions/GPULogging.h | 2 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPULogging.h b/GPU/GPUTracking/Definitions/GPULogging.h index 4ad6b70b2fd8b..c8ba635d1af7a 100644 --- a/GPU/GPUTracking/Definitions/GPULogging.h +++ b/GPU/GPUTracking/Definitions/GPULogging.h @@ -69,7 +69,7 @@ #define GPUCritical(...) GPUWarning(__VA_ARGS__) #define GPUFatal(string, ...) \ { \ - fprintf(stderr, string "\n", __VA_ARGS__); \ + fprintf(stderr, string "\n", ##__VA_ARGS__); \ exit(1); \ } #endif diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index cf5a447c26669..5a39f082f2bbd 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -128,7 +128,6 @@ find_package(O2GPU) add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx - ${O2_DIR}/DataFormats/simulation/src/MCCompLabel.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx ${O2_DIR}/DataFormats/Reconstruction/src/Vertex.cxx From 0c4029015067d8f381ec76d1d5dd612556f8b7a5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:33:14 +0100 Subject: [PATCH 0272/1914] GPU: Fix incorrect printf syntax --- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 2 +- GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index a48050a6cacbc..4047dcae0a6b3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -124,7 +124,7 @@ void GPUChainTracking::TPCClusterizerEnsureZSOffsets(uint32_t iSector, const CfF } if (pagesEndpoint != nPagesExpected) { - GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %lu", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); + GPUFatal("Sector %d, Endpoint %d, Fragment %d: TPC raw page count mismatch: expected %d / buffered %u", iSector, endpoint, fragment.index, pagesEndpoint, nPagesExpected); } if (nAdcDecoded != nAdcsExpected) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 38e1cd0036c16..c42d9622f5332 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -258,13 +258,13 @@ void GPUChainTracking::SanityCheck() const auto& ref = trk.getClusterRef(); if (ref.getFirstEntry() > mIOPtrs.nOutputClusRefsTPCO2) { if (nErrors++ < 1000) { - GPUError("Invalid getFirst() entry in cluster reference: %u > $u", ref.getFirstEntry(), mIOPtrs.nOutputClusRefsTPCO2); + GPUError("Invalid getFirst() entry in cluster reference: %u > %u", ref.getFirstEntry(), mIOPtrs.nOutputClusRefsTPCO2); continue; } } if (ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2 > mIOPtrs.nOutputClusRefsTPCO2) { if (nErrors++ < 1000) { - GPUError("Invalid getEntries() entry in cluster reference: %u > $u", ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2, mIOPtrs.nOutputClusRefsTPCO2); + GPUError("Invalid getEntries() entry in cluster reference: %u > %u", ref.getFirstEntry() + (ref.getEntries() * 3 + 1) / 2, mIOPtrs.nOutputClusRefsTPCO2); continue; } } From 8df4ac28e698bd5294e442367a89e9d71bd0b48a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 22 Mar 2025 20:41:33 +0100 Subject: [PATCH 0273/1914] GPU Standalone: Add CMake check to require ROOT if building event display --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 5a39f082f2bbd..1f48b4fc5ade1 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -112,6 +112,8 @@ endif() if(GPUCA_CONFIG_ROOT) find_package(ROOT CONFIG REQUIRED) +elseif(GPUCA_BUILD_EVENT_DISPLAY) + message(FATAL_ERROR "Cannot build event display without ROOT") endif() find_package(Microsoft.GSL REQUIRED HINTS "$ENV{MS_GSL_ROOT}/share/cmake") From ee497d7867e8f9893dfa6437f2ebf483c7a3687c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 23 Mar 2025 19:30:12 +0100 Subject: [PATCH 0274/1914] GPU CMake: Shuffle stuff a bit and move all NO_FAST_MATH stuff to GPU/... and FindO2GPU.cmake --- GPU/CMakeLists.txt | 1 - dependencies/FindO2GPU.cmake | 26 +++++++++++++++----------- dependencies/O2CompileFlags.cmake | 7 ------- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 3c83c583eebfc..75cd5eddc0b24 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -16,7 +16,6 @@ if(NOT DEFINED GPUCA_NO_FAST_MATH) set(GPUCA_NO_FAST_MATH 0) endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") if(${GPUCA_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") endif() diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index f8d41c032078f..69241ea30a375 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -64,9 +64,20 @@ function(set_target_hip_arch target) endif() endfunction() -# Detect and enable CUDA -STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # Need to strip c++17 imposed by alidist defaults +# Need to strip c++17 imposed by alidist defaults +STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") +# ---------------------------------- Fast Math / Deterministic Mode ---------------------------------- +if(GPUCA_NO_FAST_MATH_WHOLEO2) + set(GPUCA_NO_FAST_MATH 1) + add_definitions(-DGPUCA_NO_FAST_MATH) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") + set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") +endif() +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") + +# ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) @@ -124,7 +135,6 @@ if(ENABLE_CUDA) else() set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") endif() - set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}") set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") @@ -146,7 +156,7 @@ if(ENABLE_CUDA) endif() endif() -# Detect and enable OpenCL 1.2 from AMD +# ---------------------------------- HIP ---------------------------------- if(ENABLE_OPENCL) find_package(OpenCL) if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO") @@ -154,11 +164,6 @@ if(ENABLE_OPENCL) else() set_package_properties(OpenCL PROPERTIES TYPE OPTIONAL) endif() -endif() - -# Detect and enable OpenCL 2.x -if(ENABLE_OPENCL) - find_package(OpenCL) find_package(LLVM) if(LLVM_FOUND) find_package(Clang) @@ -196,7 +201,7 @@ if(ENABLE_OPENCL) endif() endif() -# Detect and enable HIP +# ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") @@ -303,7 +308,6 @@ if(ENABLE_HIP) endif() message(FATAL_ERROR "HIP requested but some of the above packages are not found") endif() - endif() # if we end up here without a FATAL, it means we have found the "O2GPU" package diff --git a/dependencies/O2CompileFlags.cmake b/dependencies/O2CompileFlags.cmake index 08dd388cbdf36..eeddc189e8897 100644 --- a/dependencies/O2CompileFlags.cmake +++ b/dependencies/O2CompileFlags.cmake @@ -138,11 +138,4 @@ if(DEFINED ENV{O2_CXXFLAGS_OVERRIDE}) message(STATUS "Setting CXXFLAGS Override $ENV{O2_CXXFLAGS_OVERRIDE}") endif() -if(GPUCA_NO_FAST_MATH_WHOLEO2) - set(GPUCA_NO_FAST_MATH 1) - add_definitions(-DGPUCA_NO_FAST_MATH) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") -endif() - message(STATUS "Using build type: ${CMAKE_BUILD_TYPE} - CXXFLAGS: ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") From e836e3fd219c12a40b306065d893680585d5aebf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 23 Mar 2025 20:56:42 +0100 Subject: [PATCH 0275/1914] GPU: Replace GPUCA_NO_FAST_MATH by more fine-grain GPUCA_DETERMINISTIC_MODE --- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 4 +-- GPU/CMakeLists.txt | 7 ----- GPU/Common/GPUCommonMath.h | 6 ++-- GPU/GPUTracking/Base/GPUReconstruction.cxx | 4 +-- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +-- GPU/GPUTracking/CMakeLists.txt | 15 ++++++---- .../Definitions/GPUDefGPUParameters.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 24 +++++++-------- GPU/GPUTracking/Standalone/CMakeLists.txt | 10 +------ GPU/GPUTracking/Standalone/cmake/config.cmake | 16 +++++----- dependencies/FindO2GPU.cmake | 30 ++++++++++++++----- 13 files changed, 66 insertions(+), 60 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index ce93523319e99..a66eba7c3bacb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -43,7 +43,7 @@ #define THRUST_NAMESPACE thrust::hip #endif -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE #define GPU_BLOCKS 1 #define GPU_THREADS 1 #else @@ -1452,4 +1452,4 @@ template void processNeighboursHandler<7>(const int startLayer, const o2::base::PropagatorF::MatCorrType matCorrType, const int nBlocks, const int nThreads); -} // namespace o2::its \ No newline at end of file +} // namespace o2::its diff --git a/GPU/CMakeLists.txt b/GPU/CMakeLists.txt index 75cd5eddc0b24..7e2b797ae714f 100644 --- a/GPU/CMakeLists.txt +++ b/GPU/CMakeLists.txt @@ -13,13 +13,6 @@ # CMake, variables are defined for Sources / Headers first. Then, the actual # CMake build scripts use these variables. -if(NOT DEFINED GPUCA_NO_FAST_MATH) - set(GPUCA_NO_FAST_MATH 0) -endif() -if(${GPUCA_NO_FAST_MATH}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") -endif() - add_subdirectory(Common) add_subdirectory(Utils) add_subdirectory(TPCFastTransformation) diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index c412662fc0c64..b7a44c1df0f38 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -250,7 +250,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } @@ -286,7 +286,7 @@ GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) { -#if defined(GPUCA_NO_FAST_MATH) && !defined(__OPENCL__) +#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__) s = sin((double)x); c = cos((double)x); #elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) @@ -392,7 +392,7 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, GPUdi() float GPUCommonMath::InvSqrt(float _x) { -#if defined(GPUCA_NO_FAST_MATH) || defined(__OPENCL__) +#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__) return 1.f / Sqrt(_x); #elif defined(__CUDACC__) || defined(__HIPCC__) return __frsqrt_rn(_x); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 8bae1df267412..5582084fd0e17 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -261,8 +261,8 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; } if (mProcessingSettings.deterministicGPUReconstruction) { -#ifndef GPUCA_NO_FAST_MATH - GPUError("Warning, deterministicGPUReconstruction needs GPUCA_NO_FAST_MATH for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); +#ifndef GPUCA_DETERMINISTIC_MODE + GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 5b2e53179e50c..f8203c2dc5858 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -172,7 +172,7 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 21a641c0cc7c0..fee43eb6d8b0d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -230,7 +230,7 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 89d2f386f768f..3da5b77f80d86 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -24,10 +24,10 @@ set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) -if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) +if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() -set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) + set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) endif() set(OCL_DEFINECL "-D$,$-D>" "-I$,EXCLUDE,^/usr/include/?>,$-I>" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e69e11c91d157..dedfcf5953394 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -14,8 +14,13 @@ set(MODULE GPUTracking) # set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this... # set(GPUCA_BUILD_DEBUG 1) -if(NOT "${GPUCA_NO_FAST_MATH}" AND NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + endif() +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() include(cmake/helpers.cmake) @@ -375,7 +380,7 @@ set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx TARGET_DIRECTORY ${targetName} PROPERTIES COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_NO_FAST_MATH") + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) @@ -417,6 +422,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() endif() -if(${GPUCA_NO_FAST_MATH}) - target_compile_definitions(${targetName} PUBLIC GPUCA_NO_FAST_MATH) +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) + target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE) endif() diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 55f2e76344bd5..6d6645850408f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -566,7 +566,7 @@ #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif -#ifdef GPUCA_NO_FAST_MATH +#ifdef GPUCA_DETERMINISTIC_MODE #undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #undef GPUCA_DEDX_STORAGE_TYPE diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index eb1df3f37b6b5..5ede29d7fd851 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -723,13 +723,13 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea if (iThread == 0) { if (iBlock == 0) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); }); #else GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); #endif } else if (iBlock == 1) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); }); #else GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); @@ -749,7 +749,7 @@ namespace // anonymous struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); #else return a.fMax < b.fMax; @@ -759,7 +759,7 @@ struct MergeBorderTracks_compMax { struct MergeBorderTracks_compMin { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); #else return a.fMin < b.fMin; @@ -906,7 +906,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea mTrackLinks[b1.TrackID()] = iBest2; if (mergeMode > 0) { -#if defined(GPUCA_NO_FAST_MATH) // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()); #else mTrackLinks[iBest2] = b1.TrackID(); @@ -1469,7 +1469,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper { if (a1.row != b1.row) { return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a1.id != b1.id) { return (a1.id > b1.id); } @@ -1490,7 +1490,7 @@ struct GPUTPCGMMerger_CompareClusterIds { if (a.row != b.row) { return (a.row > b.row); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.id != b.id) { return (a.id > b.id); } @@ -1569,7 +1569,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a->X() != b->X()) { return (a->X() > b->X()); } @@ -1834,7 +1834,7 @@ struct GPUTPCGMMergerSortTracks_comp { if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.NClusters() != b.NClusters()) { return a.NClusters() > b.NClusters(); } @@ -1858,7 +1858,7 @@ struct GPUTPCGMMergerSortTracksQPt_comp { { const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); } @@ -1907,7 +1907,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (a.NClusters() != b.NClusters()) { return a.NClusters() > b.NClusters(); } @@ -1937,7 +1937,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; -#ifdef GPUCA_NO_FAST_MATH // TODO: Use a better define as swith +#ifdef GPUCA_DETERMINISTIC_MODE if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); } diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 1f48b4fc5ade1..ed4fc5c9f7e2d 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -52,15 +52,7 @@ if(GPUCA_BUILD_DEBUG) set(CMAKE_CXX_FLAGS "-O0 -ggdb") set(CMAKE_BUILD_TYPE DEBUG) else() - set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb -minline-all-stringops -funroll-loops -fno-stack-protector") - if(DEFINED GPUCA_NO_FAST_MATH AND ${GPUCA_NO_FAST_MATH}) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-fast-math -ffp-contract=off") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") - endif() - if (NOT CMAKE_CXX_COMPILER STREQUAL "clang++") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ftracer -fprefetch-loop-arrays") - endif() + set(CMAKE_CXX_FLAGS "-O3 -march=native -ggdb") set(CMAKE_BUILD_TYPE RELEASE) add_definitions(-DNDEBUG) endif() diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 87716d700abc8..af7c96bb96fbb 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -27,13 +27,13 @@ set(GPUCA_CONFIG_GL3W 0) set(GPUCA_CONFIG_O2 1) set(GPUCA_BUILD_DEBUG 0) set(GPUCA_BUILD_DEBUG_SANITIZE 0) -set(GPUCA_NO_FAST_MATH 0) -#set(GPUCA_CUDA_GCCBIN c++-13) -#set(GPUCA_OPENCL_CLANGBIN clang-18) -#set(HIP_AMDGPUTARGET "gfx906;gfx908;gfx90a") -set(HIP_AMDGPUTARGET "default") -#set(CUDA_COMPUTETARGET 86 89) -set(CUDA_COMPUTETARGET "default") -#set(GPUCA_CUDA_COMPILE_MODE perkernel) +set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2 +#set(GPUCA_CUDA_GCCBIN c++-14) +#set(GPUCA_OPENCL_CLANGBIN clang-19) +set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a" +set(CUDA_COMPUTETARGET "default") # 86 89 +#set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc #set(GPUCA_HIP_COMPILE_MODE perkernel) #set(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE 1) +#set(GPUCA_CONFIG_COMPILER gcc) # gcc / clang +#add_definitions(-DGPUCA_GPU_DEBUG_PRINT) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 69241ea30a375..650a269209d9b 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -68,14 +68,30 @@ endfunction() STRING(REGEX REPLACE "\-std=[^ ]*" "" O2_GPU_CMAKE_CXX_FLAGS_NOSTD "${CMAKE_CXX_FLAGS}") # ---------------------------------- Fast Math / Deterministic Mode ---------------------------------- -if(GPUCA_NO_FAST_MATH_WHOLEO2) - set(GPUCA_NO_FAST_MATH 1) - add_definitions(-DGPUCA_NO_FAST_MATH) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -fno-fast-math -ffp-contract=off") +# set(GPUCA_DETERMINISTIC_MODE WHOLEO2) # Override +set(GPUCA_DETERMINISTIC_MODE_MAP_OFF 0) +set(GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH 1) # No -ffast-math and similar compile flags for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_OPTO2 2) # In addition, -O2 optimization on host for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_GPU 3) # In addition, GPUCA_DETERMINISTIC_MODE define for GPU folder +set(GPUCA_DETERMINISTIC_MODE_MAP_ON 3) # Synonym for GPU +set(GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2 4) # As GPU but for whole O2 code +if(NOT DEFINED GPUCA_DETERMINISTIC_MODE) + set(GPUCA_DETERMINISTIC_MODE 0) +elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") + if(NOT DEFINED GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}) + message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE") + endif() + set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) + message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") +if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) + add_definitions(-DGPUCA_DETERMINISTIC_MODE) + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +endif() + # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) @@ -135,7 +151,7 @@ if(ENABLE_CUDA) else() set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") endif() - if(DEFINED GPUCA_NO_FAST_MATH AND "${GPUCA_NO_FAST_MATH}") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")# @@ -274,7 +290,7 @@ if(ENABLE_HIP) if(HIP_AMDGPUTARGET) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection endif() - if(NOT DEFINED GPUCA_NO_FAST_MATH OR NOT ${GPUCA_NO_FAST_MATH}) + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math") endif() set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}") From b6ad4bc54e104ec8842eb77cfed4af2edbda6d85 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:26:00 +0100 Subject: [PATCH 0276/1914] DPL: Print error in case we receive data with bogus runNumber / tfCounter --- Framework/Core/src/ExternalFairMQDeviceProxy.cxx | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index 823ef8f5fd5a0..52e0413d48dab 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -493,6 +493,12 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL } } + int fmqRunNumber = -1; + try { + fmqRunNumber = atoi(device->fConfig->GetProperty("runNumber", "").c_str()); + } catch (...) { + } + for (int msgidx = 0; msgidx < parts.Size(); msgidx += 2) { if (parts.At(msgidx).get() == nullptr) { LOG(error) << "unexpected nullptr found. Skipping message pair."; @@ -521,6 +527,9 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); + if (dh->runNumber == 0 || dh->tfCounter == 0 || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber; + } OutputSpec query{dh->dataOrigin, dh->dataDescription, dh->subSpecification}; LOG(debug) << "processing " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " time slice " << dph->startTime << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts; From b83452a140d60b615fe88ee156f6311abe7aa72d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:26:34 +0100 Subject: [PATCH 0277/1914] GPU: Can insert marker earlier, no need to wait --- GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6c79d87e50465..36a947dda9dc3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -32,8 +32,8 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto } uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { - TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); RecordMarker(&mEvents->single, 0); + TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); for (uint32_t i = 0; i < n; i++) { int32_t stream = i % mRec->NStreams(); runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, stream && i < (uint32_t)mRec->NStreams() ? &mEvents->single : nullptr}}, i, withinSector, mergeMode); From fe2f93e7e42de2fcfbefd55a7e4246e85bd6813c Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 24 Mar 2025 18:26:21 +0100 Subject: [PATCH 0278/1914] Fix method MCTrackInfo::getNITSClusCont --- .../GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx index 92107d90b48ed..204e0c741a675 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackMCStudyTypes.cxx @@ -25,7 +25,9 @@ int MCTrackInfo::getNITSClusCont() const int longest = 0, current = 0; for (int i = 0; i < 7; i++) { if (pattITSCl & (0x1 << i)) { - longest = ++current; + if (++current > longest) { + longest = current; + } } else { current = 0; } From e7d0f6ad02eb19bc6dab981b65bfa838234c865f Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Mon, 24 Mar 2025 17:40:10 +0100 Subject: [PATCH 0279/1914] ALICE3: Fix TRKConfigParam dictionary creation --- Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt | 3 ++- .../ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt index 856fd310fe5a2..a1cb0279efef8 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/CMakeLists.txt @@ -30,4 +30,5 @@ o2_target_root_dictionary(TRKSimulation include/TRKSimulation/TRKServices.h include/TRKSimulation/TRKPetalCase.h include/TRKSimulation/TRKPetalLayer.h - include/TRKSimulation/TRKPetalDisk.h) \ No newline at end of file + include/TRKSimulation/TRKPetalDisk.h + include/TRKSimulation/DPLDigitizerParam.h) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h index 876810b5bef9d..d80027593cef0 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKSimulationLinkDef.h @@ -24,9 +24,9 @@ #pragma link C++ class o2::base::DetImpl < o2::trk::Detector> + ; #pragma link C++ class o2::trk::Digitizer + ; -// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; -// #pragma link C++ class o2::itsmft::DPLDigitizerParam < o2::detectors::DetID::ITS> + ; -// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; -// #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; +#pragma link C++ class o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK> + ; +#pragma link C++ class o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3> + ; +#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::TRK>> + ; +#pragma link C++ class o2::conf::ConfigurableParamHelper < o2::trk::DPLDigitizerParam < o2::detectors::DetID::FT3>> + ; #endif From a31999e1aca0bd85c649a4995011a7317013a227 Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 24 Mar 2025 15:48:44 +0100 Subject: [PATCH 0280/1914] Make ITS reco start layer configurable --- .../ITS/tracking/include/ITStracking/TrackingConfigParam.h | 1 + Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx | 3 +++ 2 files changed, 4 insertions(+) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h index ec96321765534..cb6512248bf0c 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingConfigParam.h @@ -62,6 +62,7 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper0, otherwise use code defaults + uint8_t startLayerMask[MaxIter] = {}; // mask of start layer for this iteration (if >0) float minPtIterLgt[MaxIter * (MaxTrackLength - MinTrackLength + 1)] = {}; // min.pT for given track length at this iteration, used only if >0, otherwise use code defaults float sysErrY2[7] = {0}; // systematic error^2 in Y per layer float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 2c94c9bdb1f46..613402ce56e97 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -78,6 +78,9 @@ void ITSTrackingInterface::initialise() param.TrackletsPerClusterLimit = 1.e3f; // check if something was overridden via configurable params if (ip < trackConf.MaxIter) { + if (trackConf.startLayerMask[ip] > 0) { + trackParams[2].StartLayerMask = trackConf.startLayerMask[ip]; + } if (trackConf.minTrackLgtIter[ip] > 0) { param.MinTrackLength = trackConf.minTrackLgtIter[ip]; } From c2aa2f542ded67b6f2c3bde53111241675e90501 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:57:07 +0100 Subject: [PATCH 0281/1914] GPU: Remove hack to disable synchronization in thrust::sort, which was not working any more --- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 24 ++----------------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 +++---- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 3 files changed, 7 insertions(+), 27 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h index fdc5c16d91f35..676610b5e4c52 100644 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h @@ -22,12 +22,12 @@ namespace o2::gpu { -class ThrustVolatileAsyncAllocator +class ThrustVolatileAllocator { public: typedef char value_type; - ThrustVolatileAsyncAllocator(GPUReconstruction* r) : mRec(r) {} + ThrustVolatileAllocator(GPUReconstruction* r) : mRec(r) {} char* allocate(std::ptrdiff_t n) { return (char*)mRec->AllocateVolatileDeviceMemory(n); } void deallocate(char* ptr, size_t) {} @@ -38,24 +38,4 @@ class ThrustVolatileAsyncAllocator } // namespace o2::gpu -#ifndef __HIPCC__ -// Override synchronize call at end of thrust algorithm running on stream, just don't run cudaStreamSynchronize -namespace thrust::cuda_cub -{ - -typedef thrust::cuda_cub::execution_policy thrustStreamPolicy; -template <> -__host__ __device__ inline cudaError_t synchronize(thrustStreamPolicy& policy) -{ -#ifndef GPUCA_GPUCODE_DEVICE - // Do not synchronize! - return cudaSuccess; -#else - return synchronize_stream(derived_cast(policy)); -#endif -} - -} // namespace thrust::cuda_cub -#endif // __HIPCC__ - #endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 5ede29d7fd851..92ecd380fa38f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -773,7 +773,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); if (cmpMax) { thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); } else { @@ -1878,7 +1878,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -1886,7 +1886,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2111,7 +2111,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 13f204d0f940a..c366133bbae21 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -105,7 +105,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAsyncAllocator alloc(this); + ThrustVolatileAllocator alloc(this); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From d9b654e5df67ecd8b5a81aa3625f3c9bb6164261 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 14:28:46 +0100 Subject: [PATCH 0282/1914] GPU: Add getThrustVolatileDeviceAllocator function --- GPU/Common/GPUCommonHelpers.h | 19 +++++++++ GPU/GPUTracking/Base/GPUReconstruction.cxx | 10 +++++ GPU/GPUTracking/Base/GPUReconstruction.h | 2 + GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h | 41 ------------------- .../Base/cuda/GPUReconstructionCUDA.cu | 1 - .../GPUReconstructionCUDAExternalProvider.cu | 1 - .../cuda/GPUReconstructionCUDAHelpers.inc | 7 ++++ .../Base/cuda/GPUReconstructionCUDAKernels.cu | 1 - GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 ++-- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 3 +- 12 files changed, 47 insertions(+), 52 deletions(-) delete mode 100644 GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h diff --git a/GPU/Common/GPUCommonHelpers.h b/GPU/Common/GPUCommonHelpers.h index 915d93c9bc791..2927ddab6bd0c 100644 --- a/GPU/Common/GPUCommonHelpers.h +++ b/GPU/Common/GPUCommonHelpers.h @@ -35,6 +35,7 @@ #include "GPUCommonDef.h" #include "GPUCommonLogger.h" #include +#include namespace o2::gpu::internal { @@ -60,4 +61,22 @@ static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* f #undef GPUCOMMON_INTERNAL_CAT } // namespace o2::gpu::internal +namespace o2::gpu +{ +class GPUReconstruction; +class ThrustVolatileAllocator +{ + public: + typedef char value_type; + + char* allocate(std::ptrdiff_t n); + void deallocate(char* ptr, size_t); + + private: + ThrustVolatileAllocator(GPUReconstruction* r); + std::function mAlloc; + friend class GPUReconstruction; +}; +} // namespace o2::gpu + #endif diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 5582084fd0e17..d96d5aad74622 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -30,6 +30,7 @@ #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" +#include "GPUCommonHelpers.h" #include "GPUMemoryResource.h" #include "GPUChain.h" @@ -1193,3 +1194,12 @@ void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } + +ThrustVolatileAllocator::ThrustVolatileAllocator(GPUReconstruction* r) +{ + mAlloc = [&r](size_t n) { return (char*)r->AllocateVolatileDeviceMemory(n); }; +} +ThrustVolatileAllocator GPUReconstruction::getThrustVolatileDeviceAllocator() +{ + return ThrustVolatileAllocator(this); +} diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 5e03c77f08230..18098396e1349 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -47,6 +47,7 @@ struct GPUMemorySizeScalers; struct GPUReconstructionPipelineContext; struct GPUReconstructionThreading; class GPUROOTDumpCore; +class ThrustVolatileAllocator; namespace gpu_reconstruction_kernels { @@ -165,6 +166,7 @@ class GPUReconstruction void ClearAllocatedMemory(bool clearOutputs = true); void ReturnVolatileDeviceMemory(); void ReturnVolatileMemory(); + ThrustVolatileAllocator getThrustVolatileDeviceAllocator(); void PushNonPersistentMemory(uint64_t tag); void PopNonPersistentMemory(RecoStep step, uint64_t tag); void BlockStackedMemory(GPUReconstruction* rec); diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index f8203c2dc5858..99c59afd2011a 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -18,7 +18,7 @@ endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) -set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h) +set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h b/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h deleted file mode 100644 index 676610b5e4c52..0000000000000 --- a/GPU/GPUTracking/Base/cuda/CUDAThrustHelpers.h +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file CUDAThrustHelpers.h -/// \author David Rohr - -#ifndef GPU_CUDATHRUSTHELPERS_H -#define GPU_CUDATHRUSTHELPERS_H - -#include "GPULogging.h" -#include -#include - -namespace o2::gpu -{ - -class ThrustVolatileAllocator -{ - public: - typedef char value_type; - - ThrustVolatileAllocator(GPUReconstruction* r) : mRec(r) {} - char* allocate(std::ptrdiff_t n) { return (char*)mRec->AllocateVolatileDeviceMemory(n); } - - void deallocate(char* ptr, size_t) {} - - private: - GPUReconstruction* mRec; -}; - -} // namespace o2::gpu - -#endif // GPU_CUDATHRUSTHELPERS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 202edd49bc44c..175fd205153ea 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -19,7 +19,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" #include "GPUReconstructionIncludes.h" #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu index f341a778076b8..534f5e8606897 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAExternalProvider.cu @@ -16,7 +16,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" #include diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc index a34f940a1337a..c2b6f6d05dd7f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAHelpers.inc @@ -16,6 +16,7 @@ #define GPURECONSTRUCTIONCUDAHELPERS_INC_H #include "GPUCommonHelpers.h" +#include "GPUReconstruction.h" namespace o2::gpu::internal { @@ -28,4 +29,10 @@ int32_t __attribute__((weak)) GPUReconstructionCUDAChkErr(const int64_t error, c } } // namespace o2::gpu::internal +namespace o2::gpu +{ +char* __attribute__((weak)) ThrustVolatileAllocator::allocate(std::ptrdiff_t n) { return mAlloc(n); } +void __attribute__((weak)) ThrustVolatileAllocator::deallocate(char* ptr, size_t) {} +} // namespace o2::gpu + #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 0c83223ba238a..f1f459fe021bc 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -16,7 +16,6 @@ #include "GPUReconstructionCUDA.h" #include "GPUReconstructionCUDAInternals.h" -#include "CUDAThrustHelpers.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index fee43eb6d8b0d..16e6e72d56e9a 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu CUDAThrustHelpers.h GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesHost.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -63,7 +63,7 @@ endif() set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) -set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h ${GPUCA_HIP_SOURCE_DIR}/HIPThrustHelpers.h) +set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesHost.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 92ecd380fa38f..c57ca9e5e1436 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -773,7 +773,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { thrust::device_ptr p(range); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); if (cmpMax) { thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); } else { @@ -1878,7 +1878,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -1886,7 +1886,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2111,7 +2111,7 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index c366133bbae21..9594391163586 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -105,7 +105,8 @@ template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAllocator alloc(this); + ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); + ; thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From ac0408b22dc8acdd41a64308de4a24b2a32b6264 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 15:14:02 +0100 Subject: [PATCH 0283/1914] GPU: Add GPUCommonAlgorithm::sortOnDevice function for starting sort on device from host --- GPU/Common/GPUCommonAlgorithm.h | 4 ++++ GPU/Common/GPUCommonAlgorithmThrust.h | 8 ++++++++ .../Base/cuda/GPUReconstructionCUDA.h | 2 ++ .../Global/GPUChainTrackingMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 18 +++++------------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +---- 6 files changed, 21 insertions(+), 18 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index a733f0ff99f26..b82c0059b9e15 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -43,6 +43,10 @@ class GPUCommonAlgorithm GPUd() static void sortInBlock(T* begin, T* end, const S& comp); template GPUd() static void sortDeviceDynamic(T* begin, T* end, const S& comp); +#ifndef __OPENCL__ + template + GPUh() static void sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); +#endif template GPUd() static void swap(T& a, T& b); diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 0208c12f1cd08..f37445ede9c84 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -87,6 +87,14 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co thrust::sort(GPUCA_THRUST_NAMESPACE::par, thrustBegin, thrustEnd, comp); } +template +GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp) +{ + thrust::device_ptr p(begin); + auto alloc = rec->getThrustVolatileDeviceAllocator(); + thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); +} + } // namespace gpu } // namespace o2 diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index f78270d40146c..30bbc76d4c415 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -54,6 +54,8 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void getRTCKernelCalls(std::vector& kernels); + template + friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); GPUReconstructionCUDAInternals* mInternals; }; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 36a947dda9dc3..8fba328f8ac38 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -121,7 +121,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) for (uint32_t i = 0; i < NSECTORS; i++) { runKernel({{1, -WarpSize(), 0, deviceType}}, i); runKernel(GetGridAuto(0, deviceType), i); - runKernel(GetGridAuto(0, deviceType), i); + runKernel(GetGridAuto(0, deviceType), i); // TODO: Why all in stream 0? } if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{1, -WarpSize(), 0, deviceType}}, NSECTORS); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index c57ca9e5e1436..c8c844eee748a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -772,12 +772,10 @@ struct MergeBorderTracks_compMin { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { - thrust::device_ptr p(range); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); if (cmpMax) { - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMax()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); } else { - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), p, p + N, MergeBorderTracks_compMin()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); } } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3> @@ -1877,17 +1875,13 @@ struct GPUTPCGMMergerSortTracksQPt_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackOrderProcess()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort((uint32_t*)mProcessorsShadow->tpcMerger.TrackSort()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt @@ -2110,9 +2104,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr params(mProcessorsShadow->tpcMerger.LooperCandidates()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), params, params + processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9594391163586..8056f22484e70 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -104,10 +104,7 @@ struct GPUTPCGMO2OutputSort_comp { template <> inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) { - thrust::device_ptr trackSort(mProcessorsShadow->tpcMerger.TrackSortO2()); - ThrustVolatileAllocator alloc = getThrustVolatileDeviceAllocator(); - ; - thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(mInternals->Streams[_xyz.x.stream]), trackSort, trackSort + processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread From 15df642b632fea3b9bf45a47b785d3eac54da262 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 15:34:16 +0100 Subject: [PATCH 0284/1914] GPU: Switch to C++11 nested namespace style in more places --- GPU/Common/GPUCommonAlgorithm.h | 28 ++++++--------------------- GPU/Common/GPUCommonAlgorithmThrust.h | 8 ++------ GPU/Common/GPUCommonMath.h | 7 ++----- GPU/Common/GPUCommonTransform3D.h | 7 ++----- GPU/Common/GPUROOTCartesianFwd.h | 7 ++----- GPU/Common/GPUROOTSMatrixFwd.h | 7 ++----- 6 files changed, 16 insertions(+), 48 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index b82c0059b9e15..4970876f39e4c 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -24,9 +24,7 @@ // ----------------------------- SORTING ----------------------------- -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUCommonAlgorithm { @@ -75,13 +73,6 @@ class GPUCommonAlgorithm template GPUd() static void IterSwap(I a, I b) noexcept; }; -} // namespace gpu -} // namespace o2 - -namespace o2 -{ -namespace gpu -{ #ifndef GPUCA_ALGORITHM_STD template @@ -221,8 +212,7 @@ GPUdi() void GPUCommonAlgorithm::QuickSort(I f, I l) noexcept typedef GPUCommonAlgorithm CAAlgo; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #if (((defined(__CUDACC__) && !defined(__clang__)) || defined(__HIPCC__))) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_HOSTONLY) @@ -230,9 +220,7 @@ typedef GPUCommonAlgorithm CAAlgo; #else -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -251,15 +239,12 @@ GPUdi() void GPUCommonAlgorithm::sortDeviceDynamic(T* begin, T* end, const S& co GPUCommonAlgorithm::sort(begin, end, comp); } -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // THRUST // sort and sortInBlock below are not taken from Thrust, since our implementations are faster -namespace o2 -{ -namespace gpu +namespace o2::gpu { template @@ -332,8 +317,7 @@ GPUdi() void GPUCommonAlgorithm::swap(T& a, T& b) } #endif -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu // ----------------------------- WORK GROUP FUNCTIONS ----------------------------- diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index f37445ede9c84..2da4b6a4f965d 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -30,9 +30,7 @@ #define GPUCA_THRUST_NAMESPACE thrust::hip #endif -namespace o2 -{ -namespace gpu +namespace o2::gpu { // - Our quicksort and bubble sort implementations are faster @@ -94,8 +92,6 @@ GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begi auto alloc = rec->getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); } - -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index b7a44c1df0f38..58f046161aa8b 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -42,9 +42,7 @@ #define GPUCA_CHOICE(c1, c2, c3) (c1) // Select first option for Host #endif // clang-format on -namespace o2 -{ -namespace gpu +namespace o2::gpu { class GPUCommonMath @@ -540,7 +538,6 @@ GPUdii() void GPUCommonMath::AtomicMinInternal(GPUglobalref() GPUgeneric() GPUAt #undef GPUCA_CHOICE -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif // GPUCOMMONMATH_H diff --git a/GPU/Common/GPUCommonTransform3D.h b/GPU/Common/GPUCommonTransform3D.h index 4c5cca1f00ddc..2f517aded7eed 100644 --- a/GPU/Common/GPUCommonTransform3D.h +++ b/GPU/Common/GPUCommonTransform3D.h @@ -17,9 +17,7 @@ #include "GPUCommonDef.h" -namespace o2 -{ -namespace gpu +namespace o2::gpu { class Transform3D { @@ -79,7 +77,6 @@ class Transform3D kZZ = 10, kDZ = 11 }; }; -} // namespace gpu -} // namespace o2 +} // namespace o2::gpu #endif diff --git a/GPU/Common/GPUROOTCartesianFwd.h b/GPU/Common/GPUROOTCartesianFwd.h index 89b0aa44eb78c..c631637a3bc6e 100644 --- a/GPU/Common/GPUROOTCartesianFwd.h +++ b/GPU/Common/GPUROOTCartesianFwd.h @@ -46,9 +46,7 @@ class DefaultCoordinateSystemTag; } // namespace Math } // namespace ROOT -namespace o2 -{ -namespace math_utils +namespace o2::math_utils { namespace detail @@ -79,7 +77,6 @@ template using Vector3D = detail::GPUPoint3D; #endif -} // namespace math_utils -} // namespace o2 +} // namespace o2::math_utils #endif diff --git a/GPU/Common/GPUROOTSMatrixFwd.h b/GPU/Common/GPUROOTSMatrixFwd.h index 44b2254949df2..0159cc8922140 100644 --- a/GPU/Common/GPUROOTSMatrixFwd.h +++ b/GPU/Common/GPUROOTSMatrixFwd.h @@ -35,9 +35,7 @@ class MatRepStd; } // namespace Math } // namespace ROOT -namespace o2 -{ -namespace math_utils +namespace o2::math_utils { namespace detail @@ -72,7 +70,6 @@ template using MatRepStd = detail::MatRepStdGPU; #endif -} // namespace math_utils -} // namespace o2 +} // namespace o2::math_utils #endif From 30efe2e219398cdefbac4ea06d431b140d042bed Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 13:56:25 +0100 Subject: [PATCH 0285/1914] GPUCommonAlgorithm: Cleanup preprocessor defines --- GPU/Common/GPUCommonAlgorithm.h | 4 ++++ GPU/Common/GPUCommonAlgorithmThrust.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index 4970876f39e4c..5c19dda27f593 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -446,4 +446,8 @@ GPUdi() T warp_broadcast(T v, int32_t i) #endif +#ifdef GPUCA_ALGORITHM_STD +#undef GPUCA_ALGORITHM_STD +#endif + #endif diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 2da4b6a4f965d..6bf605a370050 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -52,7 +52,7 @@ GPUdi() void GPUCommonAlgorithm::sort(T* begin, T* end, const S& comp) } template -GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) +GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end) // TODO: Try cub::BlockMergeSort { if (get_local_id(0) == 0) { sortDeviceDynamic(begin, end); @@ -94,4 +94,6 @@ GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begi } } // namespace o2::gpu +#undef GPUCA_THRUST_NAMESPACE + #endif From 7a706ae53b64cfddf58b69f231c13246b11dba26 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 16:14:36 +0100 Subject: [PATCH 0286/1914] GPUCommonAlgorithm: Use CUB for soring on device instead of Thrust --- GPU/Common/GPUCommonAlgorithmThrust.h | 18 ++++++++++++++++-- GPU/GPUTracking/Base/GPUGeneralKernels.h | 14 +++++++------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithmThrust.h b/GPU/Common/GPUCommonAlgorithmThrust.h index 6bf605a370050..049071227a58e 100644 --- a/GPU/Common/GPUCommonAlgorithmThrust.h +++ b/GPU/Common/GPUCommonAlgorithmThrust.h @@ -23,11 +23,16 @@ #pragma GCC diagnostic pop #include "GPUCommonDef.h" +#include "GPUCommonHelpers.h" -#ifdef __CUDACC__ +#ifndef __HIPCC__ // CUDA #define GPUCA_THRUST_NAMESPACE thrust::cuda -#else +#define GPUCA_CUB_NAMESPACE cub +#include +#else // HIP #define GPUCA_THRUST_NAMESPACE thrust::hip +#define GPUCA_CUB_NAMESPACE hipcub +#include #endif namespace o2::gpu @@ -89,11 +94,20 @@ template GPUhi() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp) { thrust::device_ptr p(begin); +#if 0 // Use Thrust auto alloc = rec->getThrustVolatileDeviceAllocator(); thrust::sort(GPUCA_THRUST_NAMESPACE::par(alloc).on(rec->mInternals->Streams[stream]), p, p + N, comp); +#else // Use CUB + size_t tempSize = 0; + void* tempMem = nullptr; + GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream])); + tempMem = rec->AllocateVolatileDeviceMemory(tempSize); + GPUChkErrS(GPUCA_CUB_NAMESPACE::DeviceMergeSort::SortKeys(tempMem, tempSize, begin, N, comp, rec->mInternals->Streams[stream])); +#endif } } // namespace o2::gpu #undef GPUCA_THRUST_NAMESPACE +#undef GPUCA_CUB_NAMESPACE #endif diff --git a/GPU/GPUTracking/Base/GPUGeneralKernels.h b/GPU/GPUTracking/Base/GPUGeneralKernels.h index ce93e2e5eead8..eb816c91f5909 100644 --- a/GPU/GPUTracking/Base/GPUGeneralKernels.h +++ b/GPU/GPUTracking/Base/GPUGeneralKernels.h @@ -27,9 +27,9 @@ #endif #if defined(__HIPCC__) -#define GPUCA_CUB hipcub +#define GPUCA_CUB_NAMESPACE hipcub #else -#define GPUCA_CUB cub +#define GPUCA_CUB_NAMESPACE cub #endif namespace o2::gpu @@ -54,7 +54,7 @@ class GPUKernelTemplate struct GPUSharedMemoryWarpScan64 { // Provides the shared memory resources for warp wide CUB collectives #if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY) - typedef GPUCA_CUB::WarpScan WarpScan; + typedef GPUCA_CUB_NAMESPACE::WarpScan WarpScan; union { typename WarpScan::TempStorage cubWarpTmpMem; }; @@ -65,9 +65,9 @@ class GPUKernelTemplate struct GPUSharedMemoryScan64 { // Provides the shared memory resources for CUB collectives #if (defined(__CUDACC__) || defined(__HIPCC__)) && defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_HOSTONLY) - typedef GPUCA_CUB::BlockScan BlockScan; - typedef GPUCA_CUB::BlockReduce BlockReduce; - typedef GPUCA_CUB::WarpScan WarpScan; + typedef GPUCA_CUB_NAMESPACE::BlockScan BlockScan; + typedef GPUCA_CUB_NAMESPACE::BlockReduce BlockReduce; + typedef GPUCA_CUB_NAMESPACE::WarpScan WarpScan; union { typename BlockScan::TempStorage cubTmpMem; typename BlockReduce::TempStorage cubReduceTmpMem; @@ -110,6 +110,6 @@ class GPUitoa : public GPUKernelTemplate } // namespace o2::gpu -#undef GPUCA_CUB +#undef GPUCA_CUB_NAMESPACE #endif From 3e56e5536e6e20e0ae89bc4dd3b55a60dd4fdf17 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 24 Mar 2025 18:24:23 +0100 Subject: [PATCH 0287/1914] GPU: Improve synchronization during track-merging, no need to serialize the last kernel --- .../Global/GPUChainTrackingMerger.cxx | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 8fba328f8ac38..ffab3ba0be063 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -50,19 +50,13 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto gputpcgmmergertypes::GPUTPCGMBorderRange* range2 = MergerShadow.BorderRange(jSector) + *processors()->tpcTrackers[jSector].NTracks(); runKernel({{1, -WarpSize(), stream, deviceType}}, range1, n1, 0); runKernel({{1, -WarpSize(), stream, deviceType}}, range2, n2, 1); - deviceEvent* e = nullptr; - int32_t ne = 0; - if (i == n - 1) { // Synchronize all execution on stream 0 with the last kernel - ne = std::min(n, mRec->NStreams()); - for (int32_t j = 1; j < ne; j++) { - RecordMarker(&mEvents->sector[j], j); - } - e = &mEvents->sector[1]; - ne--; - stream = 0; - } - runKernel({GetGridAuto(stream, deviceType), krnlRunRangeNone, {nullptr, e, ne}}, i, withinSector, mergeMode); + runKernel({GetGridAuto(stream, deviceType)}, i, withinSector, mergeMode); + } + int32_t ne = std::min(n, mRec->NStreams()) - 1; // Stream 0 must wait for all streams, Note n > 1 + for (int32_t j = 0; j < ne; j++) { + RecordMarker(&mEvents->sector[j], j + 1); } + StreamWaitForEvents(0, &mEvents->sector[0], ne); } else { for (uint32_t i = 0; i < n; i++) { runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); From 8f6726b8474012cf20bbda11ed3af125ec8b7c33 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 11:26:44 +0100 Subject: [PATCH 0288/1914] GPU: Change GPUCA_DETERMINISTIC_MODE define to GPUCA_DETERMINISTIC_CODE macro, that can be used also in RTC --- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 9 +- GPU/Common/GPUCommonDef.h | 10 +- GPU/Common/GPUCommonMath.h | 54 +++--- .../Definitions/GPUDefGPUParameters.h | 9 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 160 +++++++----------- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- GPU/GPUTracking/dEdx/GPUdEdx.cxx | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 14 +- 10 files changed, 116 insertions(+), 150 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index a66eba7c3bacb..583452d0c429c 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -43,13 +43,8 @@ #define THRUST_NAMESPACE thrust::hip #endif -#ifdef GPUCA_DETERMINISTIC_MODE -#define GPU_BLOCKS 1 -#define GPU_THREADS 1 -#else -#define GPU_BLOCKS 99999 -#define GPU_THREADS 99999 -#endif +#define GPU_BLOCKS GPUCA_DETERMINISTIC_CODE(1, 99999) +#define GPU_THREADS GPUCA_DETERMINISTIC_CODE(1, 99999) // O2 track model #include "ReconstructionDataFormats/Track.h" diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index b4a788e66a81c..78da104a0c029 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -68,10 +68,18 @@ #define GPUCA_DEBUG_STREAMER_CHECK(...) #endif -#ifndef GPUCA_RTC_SPECIAL_CODE +#ifndef GPUCA_RTC_SPECIAL_CODE // By default, we ignore special RTC code #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_DETERMINISTIC_CODE + #ifdef GPUCA_DETERMINISTIC_MODE + #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path + #else + #define GPUCA_DETERMINISTIC_CODE(det, indet) indet // otherwise the fast default code path + #endif +#endif + // API Definitions for GPU Compilation #include "GPUCommonDefAPI.h" diff --git a/GPU/Common/GPUCommonMath.h b/GPU/Common/GPUCommonMath.h index 58f046161aa8b..6d97250e7f2f4 100644 --- a/GPU/Common/GPUCommonMath.h +++ b/GPU/Common/GPUCommonMath.h @@ -248,7 +248,7 @@ GPUdi() uint32_t GPUCommonMath::Float2UIntReint(const float& x) #endif } -#ifdef GPUCA_DETERMINISTIC_MODE +GPUCA_DETERMINISTIC_CODE( // clang-format off GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), roundf(x), round(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return (int32_t)Round(x); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), (float)sqrt((double)x), sqrt(x)); } @@ -264,7 +264,7 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE((float GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE((float)exp((double)x), (float)exp((double)x), exp(x)); } GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return GPUCA_CHOICE(std::isfinite(x), isfinite(x), isfinite(x)); } GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return GPUCA_CHOICE(std::isnan(x), isnan(x), isnan(x)); } -#else +, // !GPUCA_DETERMINISTIC_CODE GPUdi() constexpr float GPUCommonMath::Round(float x) { return GPUCA_CHOICE(roundf(x), rintf(x), rint(x)); } GPUdi() constexpr int32_t GPUCommonMath::Float2IntRn(float x) { return GPUCA_CHOICE((int32_t)Round(x), __float2int_rn(x), (int32_t)Round(x)); } GPUhdi() constexpr float GPUCommonMath::Sqrt(float x) { return GPUCA_CHOICE(sqrtf(x), sqrtf(x), sqrt(x)); } @@ -280,20 +280,22 @@ GPUdi() constexpr float GPUCommonMath::Log(float x) { return GPUCA_CHOICE(logf(x GPUdi() constexpr float GPUCommonMath::Exp(float x) { return GPUCA_CHOICE(expf(x), expf(x), exp(x)); } GPUdi() constexpr bool GPUCommonMath::Finite(float x) { return true; } GPUdi() constexpr bool GPUCommonMath::IsNaN(float x) { return false; } -#endif +) // clang-format on GPUhdi() void GPUCommonMath::SinCos(float x, float& s, float& c) { -#if defined(GPUCA_DETERMINISTIC_MODE) && !defined(__OPENCL__) - s = sin((double)x); - c = cos((double)x); -#elif !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) - __sincosf(x, &s, &c); + GPUCA_DETERMINISTIC_CODE( // clang-format off + s = sin((double)x); + c = cos((double)x); + , // !GPUCA_DETERMINISTIC_CODE +#if !defined(GPUCA_GPUCODE_DEVICE) && defined(__APPLE__) + __sincosf(x, &s, &c); #elif !defined(GPUCA_GPUCODE_DEVICE) && (defined(__GNU_SOURCE__) || defined(_GNU_SOURCE) || defined(GPUCA_GPUCODE)) - sincosf(x, &s, &c); + sincosf(x, &s, &c); #else - GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); + GPUCA_CHOICE((void)((s = sinf(x)) + (c = cosf(x))), sincosf(x, &s, &c), s = sincos(x, &c)); #endif + ) // clang-format on } GPUhdi() void GPUCommonMath::SinCosd(double x, double& s, double& c) @@ -390,22 +392,26 @@ GPUdi() T GPUCommonMath::MaxWithRef(T x, T y, T z, T w, S refX, S refY, S refZ, GPUdi() float GPUCommonMath::InvSqrt(float _x) { -#if defined(GPUCA_DETERMINISTIC_MODE) || defined(__OPENCL__) - return 1.f / Sqrt(_x); -#elif defined(__CUDACC__) || defined(__HIPCC__) - return __frsqrt_rn(_x); -#elif defined(__FAST_MATH__) - return 1.f / sqrtf(_x); + GPUCA_DETERMINISTIC_CODE( // clang-format off + return 1.f / Sqrt(_x); + , // !GPUCA_DETERMINISTIC_CODE +#if defined(__CUDACC__) || defined(__HIPCC__) + return __frsqrt_rn(_x); +#elif defined(__OPENCL__) && defined(__clang__) + return 1.f / sqrt(_x); +#elif !defined(__OPENCL__) && (defined(__FAST_MATH__) || defined(__clang__)) + return 1.f / sqrtf(_x); #else - union { - float f; - int32_t i; - } x = {_x}; - const float xhalf = 0.5f * x.f; - x.i = 0x5f3759df - (x.i >> 1); - x.f = x.f * (1.5f - xhalf * x.f * x.f); - return x.f; + union { + float f; + int32_t i; + } x = {_x}; + const float xhalf = 0.5f * x.f; + x.i = 0x5f3759df - (x.i >> 1); + x.f = x.f * (1.5f - xhalf * x.f * x.f); + return x.f; #endif + ) // clang-format on } template <> diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 6d6645850408f..5b5a89cc8bc39 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -25,6 +25,7 @@ #error Please include GPUDef.h #endif +#include "GPUCommonDef.h" #include "GPUDefMacros.h" // GPU Run Configuration @@ -566,12 +567,8 @@ #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif -#ifdef GPUCA_DETERMINISTIC_MODE -#undef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#undef GPUCA_DEDX_STORAGE_TYPE -#define GPUCA_DEDX_STORAGE_TYPE float -#endif +#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) #ifndef GPUCA_WARP_SIZE #ifdef GPUCA_GPUCODE diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index c8c844eee748a..288a24dee5d99 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -723,17 +723,9 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea if (iThread == 0) { if (iBlock == 0) { -#ifdef GPUCA_DETERMINISTIC_MODE - GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); }); -#else - GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); -#endif + GPUCommonAlgorithm::sortDeviceDynamic(range1, range1 + N1, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); } else if (iBlock == 1) { -#ifdef GPUCA_DETERMINISTIC_MODE - GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); }); -#else - GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); -#endif + GPUCommonAlgorithm::sortDeviceDynamic(range2, range2 + N2, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); } } #else @@ -749,21 +741,13 @@ namespace // anonymous struct MergeBorderTracks_compMax { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_DETERMINISTIC_MODE - return (a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId); -#else - return a.fMax < b.fMax; -#endif + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); } }; struct MergeBorderTracks_compMin { GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { -#ifdef GPUCA_DETERMINISTIC_MODE - return (a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId); -#else - return a.fMin < b.fMin; -#endif + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); } }; } // anonymous namespace @@ -904,11 +888,7 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<2>(int32_t nBlocks, int32_t nThrea mTrackLinks[b1.TrackID()] = iBest2; if (mergeMode > 0) { -#ifdef GPUCA_DETERMINISTIC_MODE - CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()); -#else - mTrackLinks[iBest2] = b1.TrackID(); -#endif + GPUCA_DETERMINISTIC_CODE(CAMath::AtomicMax(&mTrackLinks[iBest2], b1.TrackID()), mTrackLinks[iBest2] = b1.TrackID()); } } // GPUInfo("STAT: sectors %d, %d: all %d merged %d", iSector1, iSector2, statAll, statMerged); @@ -1467,14 +1447,7 @@ struct GPUTPCGMMerger_CompareClusterIdsLooper { if (a1.row != b1.row) { return ((a1.row > b1.row) ^ ((a.leg - leg) & 1) ^ outwards); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a1.id != b1.id) { - return (a1.id > b1.id); - } - return aa > bb; -#else - return a1.id > b1.id; -#endif + return GPUCA_DETERMINISTIC_CODE((a1.id != b1.id) ? (a1.id > b1.id) : (aa > bb), a1.id > b1.id); } }; @@ -1488,14 +1461,7 @@ struct GPUTPCGMMerger_CompareClusterIds { if (a.row != b.row) { return (a.row > b.row); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.id != b.id) { - return (a.id > b.id); - } - return aa > bb; -#else - return (a.id > b.id); -#endif + return GPUCA_DETERMINISTIC_CODE((a.id != b.id) ? (a.id > b.id) : (aa > bb), a.id > b.id); } }; } // anonymous namespace @@ -1567,20 +1533,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread // unpack and sort clusters if (nParts > 1 && leg == 0) { GPUCommonAlgorithm::sort(trackParts, trackParts + nParts, [](const GPUTPCGMSectorTrack* a, const GPUTPCGMSectorTrack* b) { -#ifdef GPUCA_DETERMINISTIC_MODE - if (a->X() != b->X()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a->X() != b->X()) { + return (a->X() > b->X()); + } + if (a->Y() != b->Y()) { + return (a->Y() > b->Y()); + } + if (a->Z() != b->Z()) { + return (a->Z() > b->Z()); + } + return a->QPt() > b->QPt(); + , // !GPUCA_DETERMINISTIC_CODE return (a->X() > b->X()); - } - if (a->Y() != b->Y()) { - return (a->Y() > b->Y()); - } - if (a->Z() != b->Z()) { - return (a->Z() > b->Z()); - } - return a->QPt() > b->QPt(); -#else - return (a->X() > b->X()); -#endif + ) // clang-format on }); } @@ -1832,20 +1798,18 @@ struct GPUTPCGMMergerSortTracks_comp { if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.NClusters() != b.NClusters()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE return a.NClusters() > b.NClusters(); - } - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; -#else - return a.NClusters() > b.NClusters(); -#endif + ) // clang-format on } }; @@ -1856,17 +1820,16 @@ struct GPUTPCGMMergerSortTracksQPt_comp { { const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; -#ifdef GPUCA_DETERMINISTIC_MODE - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); -#else - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); -#endif + ) // clang-format on } }; } // anonymous namespace @@ -1901,20 +1864,18 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ if (a.Legs() != b.Legs()) { return a.Legs() > b.Legs(); } -#ifdef GPUCA_DETERMINISTIC_MODE - if (a.NClusters() != b.NClusters()) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE return a.NClusters() > b.NClusters(); - } - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; -#else - return a.NClusters() > b.NClusters(); -#endif + ) // clang-format on }; GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); @@ -1931,17 +1892,16 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; -#ifdef GPUCA_DETERMINISTIC_MODE - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } - if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); -#else - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); -#endif + ) // clang-format on }; GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 4e225a61661c2..238b04510862e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000, struct InterpolationErrorHit { float posY, posZ; - GPUCA_MERGER_INTERPOLATION_ERROR_TYPE errorY, errorZ; + GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; }; struct InterpolationErrors { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 0c171a74d4e42..f1aac3da9a7a2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -663,7 +663,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)0) { + if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = rejectDirect; } else { int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index d235b3398c062..29524fb80ace0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -308,7 +308,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)0) { + if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { const float Iz0 = interpolation.hit[ihit].posY - mP[0]; const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index b7da0de4c0e29..fd2aeda2828e3 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -55,7 +55,7 @@ GPUd() void GPUdEdx::computedEdx(GPUdEdxInfo& GPUrestrict() output, const GPUPar output.NHitsSubThresholdOROC3 = countOROC3; } -GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) +GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) { trunclow = count * trunclow / 128; trunchigh = count * trunchigh / 128; @@ -65,7 +65,7 @@ GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* GPUrestrict() ar CAAlgo::sort(array, array + count); float mean = 0; for (int32_t i = trunclow; i < trunchigh; i++) { - mean += (float)array[i] * (1.f / scalingFactor::factor); + mean += (float)array[i] * (1.f / scalingFactor::factor); } return (mean / (trunchigh - trunclow)); } diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index bcd75af468c28..4d3b652bdc5d1 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -37,7 +37,7 @@ class GPUdEdx GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: - GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE* array, int32_t count, int32_t trunclow, int32_t trunchigh); + GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); GPUd() void checkSubThresh(int32_t roc); template @@ -62,8 +62,8 @@ class GPUdEdx static constexpr int32_t MAX_NCL = GPUCA_ROW_COUNT; // Must fit in mNClsROC (uint8_t)! - GPUCA_DEDX_STORAGE_TYPE mChargeTot[MAX_NCL]; // No need for default, just some memory - GPUCA_DEDX_STORAGE_TYPE mChargeMax[MAX_NCL]; // No need for default, just some memory + GPUCA_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory + GPUCA_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory float mSubThreshMinTot = 0.f; float mSubThreshMinMax = 0.f; uint8_t mNClsROC[4] = {0}; @@ -78,8 +78,8 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) if (roc != mLastROC) { if (mNSubThresh && mCount + mNSubThresh <= MAX_NCL) { for (int32_t i = 0; i < mNSubThresh; i++) { - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); } mNClsROC[mLastROC] += mNSubThresh; mNClsROCSubThresh[mLastROC] += mNSubThresh; @@ -151,8 +151,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= residualGainMapGain; qtot /= residualGainMapGain; - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE)(qtot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE)(qmax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); mNClsROC[roc]++; if (qtot < mSubThreshMinTot) { mSubThreshMinTot = qtot; From d4a5ca78d81244bad4709591011aed486600b248 Mon Sep 17 00:00:00 2001 From: swenzel Date: Tue, 25 Mar 2025 13:02:28 +0100 Subject: [PATCH 0289/1914] GeneratorHybrid: improve unit treatment So far, units are treated solely in the Generator::addTrack function. This works well for fundamental generators. However, the hybrid generator is a meta generator potentially consisting of a collection of underlying generators that may have completely different units. This may currently lead to wrong generator output, in certain cases. This commit fixes these bugs and introduces unit handling within GeneratorHybrid. --- Generators/include/Generators/Generator.h | 4 ++ .../include/Generators/GeneratorHybrid.h | 1 - Generators/src/GeneratorHybrid.cxx | 60 +++++++++++++++---- 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/Generators/include/Generators/Generator.h b/Generators/include/Generators/Generator.h index 640cc80931862..bd35a00793e2d 100644 --- a/Generators/include/Generators/Generator.h +++ b/Generators/include/Generators/Generator.h @@ -78,9 +78,13 @@ class Generator : public FairGenerator /** setters **/ void setMomentumUnit(double val) { mMomentumUnit = val; }; + double getMomentumUnit() const { return mMomentumUnit; } void setEnergyUnit(double val) { mEnergyUnit = val; }; + double getEnergyUnit() const { return mEnergyUnit; } void setPositionUnit(double val) { mPositionUnit = val; }; + double getPositionUnit() const { return mPositionUnit; } void setTimeUnit(double val) { mTimeUnit = val; }; + double getTimeUnit() const { return mTimeUnit; } void setBoost(Double_t val) { mBoost = val; }; void setTriggerMode(ETriggerMode_t val) { mTriggerMode = val; }; void addTrigger(Trigger trigger) { mTriggers.push_back(trigger); }; diff --git a/Generators/include/Generators/GeneratorHybrid.h b/Generators/include/Generators/GeneratorHybrid.h index 955240d6a28fa..b92437b02d874 100644 --- a/Generators/include/Generators/GeneratorHybrid.h +++ b/Generators/include/Generators/GeneratorHybrid.h @@ -54,7 +54,6 @@ class GeneratorHybrid : public Generator { public: - GeneratorHybrid() = default; GeneratorHybrid(const std::string& inputgens); ~GeneratorHybrid(); diff --git a/Generators/src/GeneratorHybrid.cxx b/Generators/src/GeneratorHybrid.cxx index 729d69527c384..83a694703c259 100644 --- a/Generators/src/GeneratorHybrid.cxx +++ b/Generators/src/GeneratorHybrid.cxx @@ -25,6 +25,12 @@ namespace eventgen GeneratorHybrid::GeneratorHybrid(const std::string& inputgens) { + // This generator has trivial unit conversions + setTimeUnit(1.); + setPositionUnit(1.); + setMomentumUnit(1.); + setEnergyUnit(1.); + if (!parseJSON(inputgens)) { LOG(fatal) << "Failed to parse JSON configuration from input generators"; exit(1); @@ -382,6 +388,27 @@ bool GeneratorHybrid::importParticles() } } } + + auto unit_transformer = [](auto& p, auto pos_unit, auto time_unit, auto en_unit, auto mom_unit) { + p.SetMomentum(p.Px() * mom_unit, p.Py() * mom_unit, p.Pz() * mom_unit, p.Energy() * en_unit); + p.SetProductionVertex(p.Vx() * pos_unit, p.Vy() * pos_unit, p.Vz() * pos_unit, p.T() * time_unit); + }; + + auto index_transformer = [](auto& p, int offset) { + for (int i = 0; i < 2; ++i) { + if (p.GetMother(i) != -1) { + const auto newindex = p.GetMother(i) + offset; + p.SetMother(i, newindex); + } + } + if (p.GetNDaughters() > 0) { + for (int i = 0; i < 2; ++i) { + const auto newindex = p.GetDaughter(i) + offset; + p.SetDaughter(i, newindex); + } + } + }; + // Clear particles and event header mParticles.clear(); mMCEventHeader.clearInfo(); @@ -391,23 +418,20 @@ bool GeneratorHybrid::importParticles() LOG(info) << "Importing particles for task " << subIndex; auto subParticles = gens[subIndex]->getParticles(); + auto time_unit = gens[subIndex]->getTimeUnit(); + auto pos_unit = gens[subIndex]->getPositionUnit(); + auto mom_unit = gens[subIndex]->getMomentumUnit(); + auto energy_unit = gens[subIndex]->getEnergyUnit(); + // The particles carry mother and daughter indices, which are relative // to the sub-generator. We need to adjust these indices to reflect that particles // are now embedded into a cocktail. auto offset = mParticles.size(); for (auto& p : subParticles) { - for (int i = 0; i < 2; ++i) { - if (p.GetMother(i) != -1) { - const auto newindex = p.GetMother(i) + offset; - p.SetMother(i, newindex); - } - } - if (p.GetNDaughters() > 0) { - for (int i = 0; i < 2; ++i) { - const auto newindex = p.GetDaughter(i) + offset; - p.SetDaughter(i, newindex); - } - } + // apply the mother-daugher index transformation + index_transformer(p, offset); + // apply unit transformation of sub-generator + unit_transformer(p, pos_unit, time_unit, energy_unit, mom_unit); } mParticles.insert(mParticles.end(), subParticles.begin(), subParticles.end()); @@ -420,6 +444,18 @@ bool GeneratorHybrid::importParticles() LOG(info) << "Importing particles for task " << genIndex; // at this moment the mIndex-th generator is ready to be used mParticles = gens[genIndex]->getParticles(); + + auto time_unit = gens[genIndex]->getTimeUnit(); + auto pos_unit = gens[genIndex]->getPositionUnit(); + auto mom_unit = gens[genIndex]->getMomentumUnit(); + auto energy_unit = gens[genIndex]->getEnergyUnit(); + + // transform units to units of the hybrid generator + for (auto& p : mParticles) { + // apply unit transformation + unit_transformer(p, pos_unit, time_unit, energy_unit, mom_unit); + } + // fetch the event Header information from the underlying generator gens[genIndex]->updateHeader(&mMCEventHeader); mInputTaskQueue.push(genIndex); From 614112ed251d225b96f7476104f427568b359f8e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 16:20:42 +0100 Subject: [PATCH 0290/1914] GPU RTC: Add deterministic mode --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 18 ++++++++++++------ .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 3 +++ .../Base/cuda/GPUReconstructionCUDArtc.cu | 1 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 18 ++++++++++++------ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 99c59afd2011a..10b37496a2a62 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -85,8 +85,7 @@ add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} add_custom_command( OUTPUT ${GPU_RTC_BIN}.command COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM + COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" ) create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) @@ -94,13 +93,20 @@ create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) add_custom_command( OUTPUT ${GPU_RTC_BIN}.command.arch COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing CUDA RTC ARCH file ${GPU_RTC_BIN}.command.arch" + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing CUDA RTC ARCH command file ${GPU_RTC_BIN}.command.arch" ) create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) -set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.no_fast_math + COMMAND echo -n "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing CUDA RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.arch" +) +create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o) + +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 1a4721035818e..e789dc9b9ebc3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -29,11 +29,13 @@ using namespace o2::gpu; QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_src); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); +QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; @@ -52,6 +54,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); + baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string(""); char shasource[21], shaparam[21], shacmd[21], shakernels[21]; if (mProcessingSettings.rtc.cacheOutput) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 31f9c54c5e7f8..2b6ec52f25831 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -15,6 +15,7 @@ #define GPUCA_GPUCODE_GENRTC #define GPUCA_GPUCODE_COMPILEKERNELS #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) +#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 16e6e72d56e9a..33963d72be9ab 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -123,8 +123,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command - COMMAND_EXPAND_LISTS - VERBATIM + COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" ) create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) @@ -132,13 +131,20 @@ create_binary_resource(${GPU_RTC_BIN}.command ${GPU_RTC_BIN}.command.o) add_custom_command( OUTPUT ${GPU_RTC_BIN}.command.arch COMMAND echo -n "${GPU_RTC_FLAGS_ARCH}" > ${GPU_RTC_BIN}.command.arch - COMMAND_EXPAND_LISTS - VERBATIM - COMMENT "Preparing HIP RTC ARCH file ${GPU_RTC_BIN}.command.arch" + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing HIP RTC ARCH command file ${GPU_RTC_BIN}.command.arch" ) create_binary_resource(${GPU_RTC_BIN}.command.arch ${GPU_RTC_BIN}.command.arch.o) -set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o) +add_custom_command( + OUTPUT ${GPU_RTC_BIN}.command.no_fast_math + COMMAND echo -n "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" > ${GPU_RTC_BIN}.command.no_fast_math + COMMAND_EXPAND_LISTS VERBATIM + COMMENT "Preparing HIP RTC NO_FAST_MATH command file ${GPU_RTC_BIN}.command.no_fast_math" +) +create_binary_resource(${GPU_RTC_BIN}.command.no_fast_math ${GPU_RTC_BIN}.command.no_fast_math.o) + +set(SRCS ${SRCS} ${GPU_RTC_BIN}.src.o ${GPU_RTC_BIN}.command.o ${GPU_RTC_BIN}.command.arch.o ${GPU_RTC_BIN}.command.no_fast_math.o) # -------------------------------- End RTC ------------------------------------------------------- if(ALIGPU_BUILD_TYPE STREQUAL "O2") diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index a1f650a2bc56e..e0c5f845a475e 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -208,6 +208,7 @@ BeginSubConfig(GPUSettingsProcessingRTC, rtc, configStandalone.proc, "RTC", 0, " AddOption(cacheOutput, bool, false, "", 0, "Cache RTC compilation results") AddOption(optConstexpr, bool, true, "", 0, "Replace constant variables by static constexpr expressions") AddOption(optSpecialCode, int8_t, -1, "", 0, "Insert GPUCA_RTC_SPECIAL_CODE special code during RTC") +AddOption(deterministic, bool, false, "", 0, "Compile RTC in deterministic mode, with NO_FAST_MATH flags and GPUCA_DETERMINISTIC_MODE define") AddOption(compilePerKernel, bool, true, "", 0, "Run one RTC compilation per kernel") AddOption(enable, bool, false, "", 0, "Use RTC to optimize GPU code") AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just test RTC compilation (1 full test, 2 test only compilation)") From 8459848bd63c60c36b8b455fd830785f1f239dfe Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 17:25:00 +0100 Subject: [PATCH 0291/1914] GPU RTC: Some cosmetic CMake changes to make RTC command lines more homogeneous --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu | 1 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 10b37496a2a62..c1cc63aa0fc02 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -84,7 +84,7 @@ add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR} add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${CMAKE_CUDA_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -fatbin" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_CUDA_COMPILER} -forward-unknown-to-host-compiler ${GPU_RTC_DEFINES} ${GPU_RTC_FLAGS_SEPARATED} -x cu -fatbin" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing CUDA RTC command file ${GPU_RTC_BIN}.command" ) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 2b6ec52f25831..919b5c11477ef 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -16,6 +16,7 @@ #define GPUCA_GPUCODE_COMPILEKERNELS #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +// GPUReconstructionCUDAIncludesHost.h auto-prependended without preprocessor running #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 33963d72be9ab..dd47f4fe8e78b 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -122,7 +122,7 @@ add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/ add_custom_command( OUTPUT ${GPU_RTC_BIN}.command - COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_FLAGS_SEPARATED} ${GPU_RTC_DEFINES} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command + COMMAND echo -n "${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_FLAGS_SEPARATED} -x hip --cuda-device-only" > ${GPU_RTC_BIN}.command COMMAND_EXPAND_LISTS VERBATIM COMMENT "Preparing HIP RTC command file ${GPU_RTC_BIN}.command" ) From 9e23cd32483a055c5770c572bc3bccb54d6be913 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 17:25:15 +0100 Subject: [PATCH 0292/1914] GPU RTC: Fix float precision for constexpr optimization --- GPU/GPUTracking/utils/qconfig.h | 12 ++++++------ GPU/GPUTracking/utils/qconfig_helpers.h | 16 +++++++++++----- GPU/GPUTracking/utils/qconfigrtc.h | 1 + 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/utils/qconfig.h b/GPU/GPUTracking/utils/qconfig.h index 79a9bd757b531..bc755e583c3b7 100644 --- a/GPU/GPUTracking/utils/qconfig.h +++ b/GPU/GPUTracking/utils/qconfig.h @@ -250,12 +250,12 @@ enum qConfigRetVal { qcrOK = 0, #define AddVariable(name, type, default) out << qon_mxstr(type) << " " << qon_mxstr(name) << ";\n"; #define AddOptionArray(name, type, count, default, optname, optnameshort, help, ...) out << qon_mxstr(type) << " " << qon_mxstr(name) << "[" << qon_mxstr(count) << "];\n"; #define AddOptionVec(name, type, optname, optnameshort, help, ...) out << "std::vector<" << qon_mxstr(type) << "> " << qon_mxstr(name) << ";\n"; -#define AddVariableRTC(name, type, default) \ - if (useConstexpr) { \ - out << "static constexpr " << qon_mxstr(type) << " " << qon_mxstr(name) << " = " << qConfig::print_type(std::get(tSrc)->name) << ";\n"; \ - out << qon_mxstr(type) << " " << qon_mxstr(qon_mxcat(_dummy_, name)) << ";\n"; \ - } else { \ - AddOption(name, type, default, optname, optnameshort, help); \ +#define AddVariableRTC(name, type, default) \ + if (useConstexpr) { \ + out << "static constexpr " << qon_mxstr(type) << " " << qon_mxstr(name) << " = " << qConfig::print_type(std::get(tSrc)->name, true) << ";\n"; \ + out << qon_mxstr(type) << " " << qon_mxstr(qon_mxcat(_dummy_, name)) << ";\n"; \ + } else { \ + AddOption(name, type, default, optname, optnameshort, help); \ } #define AddOptionRTC(name, type, default, optname, optnameshort, help, ...) AddVariableRTC(name, type, default) #define AddOptionArrayRTC(name, type, count, default, optname, optnameshort, help, ...) \ diff --git a/GPU/GPUTracking/utils/qconfig_helpers.h b/GPU/GPUTracking/utils/qconfig_helpers.h index e721f08ccfa90..51c89b759e9cf 100644 --- a/GPU/GPUTracking/utils/qconfig_helpers.h +++ b/GPU/GPUTracking/utils/qconfig_helpers.h @@ -17,6 +17,7 @@ #include #include +#include #define qon_mcat(a, b) a##b #define qon_mxcat(a, b) qon_mcat(a, b) @@ -30,29 +31,34 @@ namespace qConfig { template -inline std::string print_type(T val) +inline std::string print_type(T val, bool precise = false) { std::ostringstream s; + if constexpr (std::is_same_v || std::is_same_v) { + if (precise) { + s << std::hexfloat; + } + } s << val; return s.str(); }; template <> -inline std::string print_type(char val) +inline std::string print_type(char val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(int8_t val) +inline std::string print_type(int8_t val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(uint8_t val) +inline std::string print_type(uint8_t val, bool precise) { return std::to_string(val); }; template <> -inline std::string print_type(bool val) +inline std::string print_type(bool val, bool precise) { return val ? "true" : "false"; }; diff --git a/GPU/GPUTracking/utils/qconfigrtc.h b/GPU/GPUTracking/utils/qconfigrtc.h index 97365a6412c5d..8d33ef0ac6355 100644 --- a/GPU/GPUTracking/utils/qconfigrtc.h +++ b/GPU/GPUTracking/utils/qconfigrtc.h @@ -31,6 +31,7 @@ template static std::string qConfigPrintRtc(const T& tSrc, bool useConstexpr) { std::stringstream out; + out << std::hexfloat; #define QCONFIG_PRINT_RTC #include "qconfig.h" #undef QCONFIG_PRINT_RTC From 2ddad04f95a74fda82460408df82a3ae8c35aae8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 25 Mar 2025 21:38:52 +0100 Subject: [PATCH 0293/1914] GPU CMake: Clean up and collect files / kernels to be compiled in deterministic mode in one place --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 14 +++------- GPU/GPUTracking/Base/hip/CMakeLists.txt | 14 +++------- GPU/GPUTracking/CMakeLists.txt | 26 +++++++++++-------- .../Standalone/Benchmark/CMakeLists.txt | 3 +-- GPU/GPUTracking/cmake/kernel_helpers.cmake | 23 ++++++++++++++++ GPU/GPUTracking/display/CMakeLists.txt | 3 ++- 6 files changed, 47 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c1cc63aa0fc02..de54f09fdc2e1 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -122,8 +122,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PUBLIC $) - install(FILES ${HDRS} DESTINATION include/GPU) endif() @@ -131,11 +129,14 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) + add_library(O2::${MODULE} ALIAS ${MODULE}) target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) install(TARGETS GPUTrackingCUDA) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endif() +target_compile_definitions(${targetName} PRIVATE $) + # Setting target architecture and adding GPU libraries target_link_libraries(${targetName} PRIVATE cuda cudart nvrtc) set_target_cuda_arch(${targetName}) @@ -170,15 +171,6 @@ elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel") target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingCUDAKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) - # Disable all non-deterministic floating point to make TPC track model encoding / decoding precise - set_source_files_properties(${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step0attached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step1unattached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step0attached.cu - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step1unattached.cu - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_CUDA_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "CUDA RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index dd47f4fe8e78b..43259decef956 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -160,8 +160,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PUBLIC $) - install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP @@ -175,11 +173,14 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName "${MODULE}") set(TMP_BASELIB GPUTracking) add_library(${MODULE} SHARED ${SRCS}) + add_library(O2::${MODULE} ALIAS ${MODULE}) target_link_libraries(${MODULE} PUBLIC ${TMP_BASELIB}) install(TARGETS GPUTrackingHIP) include_directories(${GPUCA_HIP_SOURCE_DIR}) endif() +target_compile_definitions(${targetName} PRIVATE $) + add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. target_compile_definitions(${MODULE}_CXX PRIVATE $) target_include_directories(${MODULE}_CXX PRIVATE $) @@ -228,15 +229,6 @@ elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel") target_sources(${targetName} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o) set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/GPUTrackingHIPKernelModules.o PROPERTIES EXTERNAL_OBJECT true GENERATED true) - # Disable all non-deterministic floating point to make TPC track model encoding / decoding precise - set_source_files_properties(${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step0attached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCCompressionKernels_step1unattached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step0attached.hip - ${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_GPUTPCDecompressionKernels_step1unattached.hip - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") elseif(GPUCA_HIP_COMPILE_MODE STREQUAL "rdc") message(FATAL_ERROR "HIP RDC compilation of GPUReconstruction ios not yet working!") target_compile_definitions(${targetName} PRIVATE GPUCA_KERNEL_COMPILE_MODE=2) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index dedfcf5953394..ba2b9d05a3192 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -372,16 +372,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() endif() -# Disable all non-deterministic floating point to make TPC track model encoding / decoding precise -set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx - DataCompression/GPUTPCCompressionKernels.cxx - DataCompression/TPCClusterDecompressor.cxx - DataCompression/GPUTPCDecompressionKernels.cxx - TARGET_DIRECTORY ${targetName} - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) set_source_files_properties(Base/GPUReconstructionLibrary.cxx @@ -423,5 +413,19 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2" OR ALIGPU_BUILD_TYPE STREQUAL "Standalone") endif() if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) - target_compile_definitions(${targetName} PUBLIC GPUCA_DETERMINISTIC_MODE) + target_compile_definitions(${targetName} PRIVATE GPUCA_DETERMINISTIC_MODE) endif() + +# Disable all non-deterministic floating point to make TPC track model encoding / decoding precise +set_source_files_properties(DataCompression/GPUTPCCompressionTrackModel.cxx + DataCompression/GPUTPCCompressionKernels.cxx + DataCompression/TPCClusterDecompressor.cxx + DataCompression/GPUTPCDecompressionKernels.cxx + TARGET_DIRECTORY ${targetName} + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") +o2_gpu_kernel_set_deterministic(GPUTPCCompressionKernels_step0attached + GPUTPCCompressionKernels_step1unattached + GPUTPCDecompressionKernels_step0attached + GPUTPCDecompressionKernels_step1unattached) diff --git a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt index e418d94b62cb2..eeafcfc44142d 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/Benchmark/CMakeLists.txt @@ -28,10 +28,9 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") add_executable(ca ${SRCS}) set(targetName ca) target_link_libraries(${targetName} PUBLIC GPUTracking) - endif() -target_compile_definitions(${targetName} PUBLIC $) +target_compile_definitions(${targetName} PRIVATE $) if(ROOT_FOUND) target_sources(${targetName} PRIVATE ../../qa/genEvents.cxx) diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 1f35b6fc468b2..e63b915640e8a 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -142,3 +142,26 @@ function(o2_gpu_kernel_file_list list) list(REMOVE_DUPLICATES TMP_FILE_LIST) set_property(TARGET O2_GPU_KERNELS PROPERTY O2_GPU_KERNELS_FILE_LIST_${list} "${TMP_FILE_LIST}") endfunction() + +function(o2_gpu_kernel_set_deterministic) + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" + TARGET_DIRECTORY O2::GPUTrackingCUDA + PROPERTIES + COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" + TARGET_DIRECTORY O2::GPUTrackingHIP + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + endforeach() + endif() +endfunction() diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index b3107dbec8c79..592ba3b38ff30 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -131,7 +131,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") PUBLIC_INCLUDE_DIRECTORIES . SOURCES ${SRCS} ${SRCS_NO_H}) - target_compile_definitions(${targetName} PRIVATE $) target_compile_definitions(${targetName} PRIVATE GPUCA_BUILD_EVENT_DISPLAY_GLFW GPUCA_DISPLAY_GL3W GPUCA_DISPLAY_OPENGL_CORE) install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION include/GPU) @@ -158,6 +157,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) endif() +target_compile_definitions(${targetName} PRIVATE $) + message(STATUS "Building GPU Event Display (Vulkan ${GPUCA_EVENT_DISPLAY_VULKAN}, Wayland ${GPUCA_EVENT_DISPLAY_WAYLAND}, Freetype ${GPUCA_EVENT_DISPLAY_FREETYPE}, Fontconfig ${Fontconfig_FOUND}, Qt ${GPUCA_EVENT_DISPLAY_QT})") target_link_libraries(${targetName} PUBLIC ${GLFW_LIBRARIES} OpenGL::GL) target_include_directories(${targetName} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) From 8b6d22efaaccadcd2ca2d3c991ce692d5529cfe8 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Wed, 26 Mar 2025 10:17:13 +0100 Subject: [PATCH 0294/1914] Update CODEOWNERS (#14110) * Update CODEOWNERS Add @fprino to the ITS-related code owners. --- CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 3f6f4a9e42600..a22b122d0e6cd 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -34,7 +34,7 @@ /DataFormats/Detectors/GlobalTracking @shahor02 /DataFormats/Detectors/GlobalTrackingWorkflow @shahor02 /DataFormats/Detectors/HMPID @gvolpe79 -/DataFormats/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02 +/DataFormats/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02 /DataFormats/Detectors/MUON @AliceO2Group/muon-experts @shahor02 /DataFormats/Detectors/PHOS @peressounko @kharlov /DataFormats/Detectors/Passive @sawenzel @@ -65,7 +65,7 @@ /Detectors/GlobalTracking @shahor02 /Detectors/GlobalTrackingWorkflow @shahor02 /Detectors/HMPID @gvolpe79 -/Detectors/ITSMFT @mcoquet642 @mconcas @shahor02 +/Detectors/ITSMFT @fprino @mcoquet642 @mconcas @shahor02 /Detectors/MUON @AliceO2Group/muon-experts @shahor02 /Detectors/PHOS @peressounko @kharlov /Detectors/Passive @sawenzel From f26e72529547ff5eeca7889f6e0e6ccf1a9bf71d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 08:32:53 +0100 Subject: [PATCH 0295/1914] GPU: Cleanup unused template parameter --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 1 + GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu | 8 +++----- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index d96d5aad74622..9b6562d8e77ee 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -273,6 +273,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() if (mProcessingSettings.createO2Output > 1) { mProcessingSettings.createO2Output = 1; } + mProcessingSettings.rtc.deterministic = 1; } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 175fd205153ea..75ac0b5f18327 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -618,7 +618,7 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; #define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ + getRTCkernelNum(mInternals->kernelFunctions.size()); \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ if (mProcessingSettings.debugLevel >= 3) { \ diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 30bbc76d4c415..9b80880b4fb3c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -49,7 +49,7 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase template class backendInternal; - template + template static int32_t getRTCkernelNum(int32_t k = -1); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f1f459fe021bc..f0c9ba46c4a56 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -55,7 +55,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -111,7 +111,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs +template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) { static int32_t num = k; @@ -121,9 +121,7 @@ int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) return num; } -#define GPUCA_KRNL(x_class, ...) \ - template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); \ - template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); +#define GPUCA_KRNL(x_class, ...) template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL From 95ae41ee037afd6c167e95a6bb98c1f92c389811 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 13:15:36 +0100 Subject: [PATCH 0296/1914] GPU: Unify creation of list of kernel names and kernel numbers --- .../Base/GPUReconstructionProcessing.cxx | 36 +++++++++++++++++++ .../Base/GPUReconstructionProcessing.h | 16 ++++----- .../Base/cuda/GPUReconstructionCUDA.cu | 8 ++--- .../Base/cuda/GPUReconstructionCUDA.h | 5 --- .../cuda/GPUReconstructionCUDAInternals.h | 1 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 16 +-------- .../Base/opencl/GPUReconstructionOCL.cxx | 2 +- .../Base/opencl/GPUReconstructionOCL.h | 2 -- .../opencl/GPUReconstructionOCLIncludesHost.h | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 19 ++-------- 10 files changed, 51 insertions(+), 56 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 51c48ebbfc0b2..074c4faeb2926 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -124,3 +124,39 @@ std::unique_ptr GPUReconstructionProc gpu_reconstruction_kernels::threadContext::threadContext() = default; gpu_reconstruction_kernels::threadContext::~threadContext() = default; + +template +uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k) +{ + static int32_t num = k; + if (num < 0) { + throw std::runtime_error("Internal Error - Kernel Number not Set"); + } + return num; +} + +namespace o2::gpu::internal +{ +static std::vector initKernelNames() +{ + std::vector retVal; +#define GPUCA_KRNL(x_class, ...) \ + GPUReconstructionProcessing::GetKernelNum(retVal.size()); \ + retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class))); +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + return retVal; +} +} // namespace o2::gpu::internal + +const std::vector GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames(); + +#define GPUCA_KRNL(x_class, ...) \ + template uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t); \ + template <> \ + const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 43560616782db..4e763f07b4396 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -74,7 +74,10 @@ class GPUReconstructionProcessing : public GPUReconstruction // Interface to query name of a kernel template - constexpr static const char* GetKernelName(); + static const char* GetKernelName(); + const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; } + template + static uint32_t GetKernelNum(int32_t k = -1); // Public queries for timers auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } @@ -100,6 +103,8 @@ class GPUReconstructionProcessing : public GPUReconstruction GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + static const std::vector mKernelNames; + int32_t mActiveHostKernelThreads = 0; // Number of currently active threads on the host for kernels uint32_t mNActiveThreadsOuterLoop = 1; // Number of threads currently running an outer loop @@ -174,15 +179,6 @@ HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t nu return timer->timer[num]; } -#define GPUCA_KRNL(x_class, ...) \ - template <> \ - constexpr const char* GPUReconstructionProcessing::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 75ac0b5f18327..f87d5c8189cdc 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -610,7 +610,7 @@ void GPUReconstructionCUDABackend::PrintKernelOccupancies() GPUChkErr(cuOccupancyMaxActiveBlocksPerMultiprocessor(&maxBlocks, *mInternals->kernelFunctions[i], threads, 0)); GPUChkErr(cuFuncGetAttribute(&nRegs, CU_FUNC_ATTRIBUTE_NUM_REGS, *mInternals->kernelFunctions[i])); GPUChkErr(cuFuncGetAttribute(&sMem, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, *mInternals->kernelFunctions[i])); - GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", mInternals->kernelNames[i].c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); + GPUInfo("Kernel: %50s Block size: %4d, Maximum active blocks: %3d, Suggested blocks: %3d, Regs: %3d, smem: %3d", GetKernelName(i).c_str(), threads, maxBlocks, suggestedBlocks, nRegs, sMem); } } @@ -618,9 +618,10 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) { uint32_t j = 0; #define GPUCA_KRNL(x_class, ...) \ - getRTCkernelNum(mInternals->kernelFunctions.size()); \ + if (GetKernelNum() != j) { \ + GPUFatal("kernel numbers out of sync"); \ + } \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ - mInternals->kernelNames.emplace_back(GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class)))); \ if (mProcessingSettings.debugLevel >= 3) { \ GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ @@ -628,7 +629,6 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) j++; #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - if (j != mInternals->kernelModules.size()) { GPUFatal("Did not load all kernels (%u < %u)", j, (uint32_t)mInternals->kernelModules.size()); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 9b80880b4fb3c..a98b14a873ca0 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -46,11 +46,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); - template - class backendInternal; - - template - static int32_t getRTCkernelNum(int32_t k = -1); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index c85d98d85420e..f3fc21243ef0e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -30,7 +30,6 @@ namespace o2::gpu struct GPUReconstructionCUDAInternals { std::vector> kernelModules; // module for RTC compilation std::vector> kernelFunctions; // vector of ptrs to RTC kernels - std::vector kernelNames; // names of kernels cudaStream_t Streams[GPUCA_MAX_STREAMS]; // Pointer to array of CUDA Streams static void getArgPtrs(const void** pArgs) {} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f0c9ba46c4a56..4b3f8a767226c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -55,7 +55,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet #endif pArgs[arg_offset] = &y.index; GPUReconstructionCUDAInternals::getArgPtrs(&pArgs[arg_offset + 1], args...); - GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[getRTCkernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); + GPUChkErr(cuLaunchKernel(*mInternals->kernelFunctions[GetKernelNum()], x.nBlocks, 1, 1, x.nThreads, 1, 1, 0, mInternals->Streams[x.stream], (void**)pArgs, nullptr)); } } @@ -111,20 +111,6 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs -int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k) -{ - static int32_t num = k; - if (num < 0) { - throw std::runtime_error("Invalid kernel"); - } - return num; -} - -#define GPUCA_KRNL(x_class, ...) template int32_t GPUReconstructionCUDABackend::getRTCkernelNum(int32_t k); -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) { #define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e92205b9864e6..e724f0f2cbfcd 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -399,7 +399,7 @@ int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() clReleaseMemObject(mInternals->mem_gpu); clReleaseMemObject(mInternals->mem_constant); for (uint32_t i = 0; i < mInternals->kernels.size(); i++) { - clReleaseKernel(mInternals->kernels[i].first); + clReleaseKernel(mInternals->kernels[i]); } mInternals->kernels.clear(); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 2abae229c74bb..29951cd43f167 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -56,8 +56,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase template int32_t AddKernel(); - template - uint32_t FindKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 97316cf9aa32e..0bb2f25093789 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -49,7 +49,7 @@ struct GPUReconstructionOCLInternals { cl_mem mem_host; cl_program program; - std::vector> kernels; + std::vector kernels; }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index ce6b6553ae1f7..fff69038c056f 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -58,20 +58,6 @@ void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs(args.s, vals...); }, args.v); } -template -inline uint32_t GPUReconstructionOCLBackend::FindKernel() -{ - std::string name(GetKernelName()); - - for (uint32_t k = 0; k < mInternals->kernels.size(); k++) { - if (mInternals->kernels[k].second == name) { - return (k); - } - } - GPUError("Could not find OpenCL kernel %s", name.c_str()); - throw ::std::runtime_error("Requested unsupported OpenCL kernel"); -} - template int32_t GPUReconstructionOCLBackend::AddKernel() { @@ -84,15 +70,14 @@ int32_t GPUReconstructionOCLBackend::AddKernel() GPUError("Error creating OPENCL Kernel: %s", name.c_str()); return 1; } - mInternals->kernels.emplace_back(krnl, name); + mInternals->kernels.emplace_back(krnl); return 0; } template S& GPUReconstructionOCLBackend::getKernelObject() { - static uint32_t krnl = FindKernel(); - return mInternals->kernels[krnl].first; + return mInternals->kernels[GetKernelNum()]; } int32_t GPUReconstructionOCLBackend::AddKernels() From 5f90f0cc2e80b9f801a72fce3f7c8ba68a27275a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 13:18:16 +0100 Subject: [PATCH 0297/1914] GPU: Solve a todo to make the timer atomic flag a member variable --- .../Base/GPUReconstructionProcessing.cxx | 40 ++++++++----------- .../Base/GPUReconstructionProcessing.h | 3 ++ 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 074c4faeb2926..bae95ac8d3f38 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -57,17 +57,24 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr } } -namespace o2::gpu -{ -namespace // anonymous +uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) { -static std::atomic_flag timerFlag = ATOMIC_FLAG_INIT; // TODO: Should be a class member not global, but cannot be moved to header due to ROOT limitation -} // anonymous namespace -} // namespace o2::gpu + if (condition && mProcessingSettings.inKernelParallel != 1) { + mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + } else { + mNActiveThreadsOuterLoop = 1; + } + if (mProcessingSettings.debugLevel >= 5) { + printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); + } + return mNActiveThreadsOuterLoop; +} + +std::atomic_flag GPUReconstructionProcessing::mTimerFlag = ATOMIC_FLAG_INIT; GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step) { - while (timerFlag.test_and_set()) { + while (mTimerFlag.test_and_set()) { } if (mTimers.size() <= id) { mTimers.resize(id + 1); @@ -81,20 +88,20 @@ GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::insertTimer mTimers[id]->count++; } timerMeta* retVal = mTimers[id].get(); - timerFlag.clear(); + mTimerFlag.clear(); return retVal; } GPUReconstructionProcessing::timerMeta* GPUReconstructionProcessing::getTimerById(uint32_t id, bool increment) { timerMeta* retVal = nullptr; - while (timerFlag.test_and_set()) { + while (mTimerFlag.test_and_set()) { } if (mTimers.size() > id && mTimers[id]) { retVal = mTimers[id].get(); retVal->count += increment; } - timerFlag.clear(); + mTimerFlag.clear(); return retVal; } @@ -104,19 +111,6 @@ uint32_t GPUReconstructionProcessing::getNextTimerId() return id.fetch_add(1); } -uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) -{ - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; - } else { - mNActiveThreadsOuterLoop = 1; - } - if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); - } - return mNActiveThreadsOuterLoop; -} - std::unique_ptr GPUReconstructionProcessing::GetThreadContext() { return std::make_unique(); diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4e763f07b4396..b0466efceac24 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -20,6 +20,7 @@ #include "utils/timer.h" #include +#include namespace o2::gpu { @@ -135,6 +136,8 @@ class GPUReconstructionProcessing : public GPUReconstruction uint32_t getNextTimerId(); timerMeta* getTimerById(uint32_t id, bool increment = true); timerMeta* insertTimer(uint32_t id, std::string&& name, int32_t J, int32_t num, int32_t type, RecoStep step); + + static std::atomic_flag mTimerFlag; }; template From fa403b41e99557774185f49657b0910bb2dbd327 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 08:33:25 +0100 Subject: [PATCH 0298/1914] GPU: Propagate list of noFastMathKernels to GPU RTC and apply special compile settings --- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 13 ++++-- GPU/GPUTracking/CMakeLists.txt | 4 ++ .../cmake/GPUNoFastMathKernels.template.h | 23 +++++++++++ GPU/GPUTracking/cmake/kernel_helpers.cmake | 40 +++++++++---------- 4 files changed, 57 insertions(+), 23 deletions(-) create mode 100644 GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index e789dc9b9ebc3..51d3bd4044e8d 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -31,11 +31,12 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch); QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); +#include "GPUNoFastMathKernels.h" + int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + - std::string(mProcessingSettings.rtc.deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; @@ -54,7 +55,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) std::string baseCommand = (mProcessingSettings.RTCprependCommand != "" ? (mProcessingSettings.RTCprependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); baseCommand += std::string(" ") + (mProcessingSettings.RTCoverrideArchitecture != "" ? mProcessingSettings.RTCoverrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); - baseCommand += mProcessingSettings.rtc.deterministic ? (std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string(""); char shasource[21], shaparam[21], shacmd[21], shakernels[21]; if (mProcessingSettings.rtc.cacheOutput) { @@ -169,13 +169,20 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall; kernel += "}"; - if (fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() || + bool deterministic = mProcessingSettings.rtc.deterministic || o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end(); + const std::string deterministicStr = std::string(deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n"); + + if (fwrite(deterministicStr.c_str(), 1, deterministicStr.size(), fp) != deterministicStr.size() || + fwrite(rtcparam.c_str(), 1, rtcparam.size(), fp) != rtcparam.size() || fwrite(_binary_GPUReconstructionCUDArtc_src_start, 1, _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len || fwrite(kernel.c_str(), 1, kernel.size(), fp) != kernel.size()) { throw std::runtime_error("Error writing file"); } fclose(fp); std::string command = baseCommand; + if (deterministic) { + command += std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len); + } command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension; if (mProcessingSettings.debugLevel < 0) { command += " &> /dev/null"; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ba2b9d05a3192..631f9f0edff4f 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -242,6 +242,10 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h INPUT Base/GPUReconstructionIncludesDeviceAll.template.h ) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h + INPUT cmake/GPUNoFastMathKernels.template.h +) if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) endif() diff --git a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h new file mode 100644 index 0000000000000..dac93277d5ec9 --- /dev/null +++ b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h @@ -0,0 +1,23 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUNoFastMathKernels.h +/// \author David Rohr + +#include +#include + +namespace o2::gpu::internal +{ +// clang-format off +static const std::unordered_set noFastMathKernels = {$>,APPEND,">,PREPEND,">,$ >}; +// clang-format on +} // namespace o2::gpu::internal diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index e63b915640e8a..99699cc72e940 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -17,6 +17,7 @@ define_property(TARGET PROPERTY O2_GPU_KERNELS) define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) +define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") @@ -144,24 +145,23 @@ function(o2_gpu_kernel_file_list list) endfunction() function(o2_gpu_kernel_set_deterministic) - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_GPU}) - list(LENGTH ARGV n) - math(EXPR n "${n} - 1") - foreach(i RANGE 0 ${n}) - if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) - set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" - TARGET_DIRECTORY O2::GPUTrackingCUDA - PROPERTIES - COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - endif() - if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) - set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" - TARGET_DIRECTORY O2::GPUTrackingHIP - PROPERTIES - COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" - COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") - endif() - endforeach() - endif() + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NO_FAST_MATH "${ARGV${i}}") + if(CUDA_ENABLED AND (NOT DEFINED GPUCA_CUDA_COMPILE_MODE OR GPUCA_CUDA_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.cu" + TARGET_DIRECTORY O2::GPUTrackingCUDA + PROPERTIES + COMPILE_FLAGS "${GPUCA_CUDA_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + if(HIP_ENABLED AND (NOT DEFINED GPUCA_HIP_COMPILE_MODE OR GPUCA_HIP_COMPILE_MODE STREQUAL "perkernel")) + set_source_files_properties("${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${ARGV${i}}.hip" + TARGET_DIRECTORY O2::GPUTrackingHIP + PROPERTIES + COMPILE_FLAGS "${GPUCA_CXX_NO_FAST_MATH_FLAGS}" + COMPILE_DEFINITIONS "GPUCA_DETERMINISTIC_MODE") + endif() + endforeach() endfunction() From 78d0f9c0d666f374acc8ed88efb383d75fdab388 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 14:11:58 +0100 Subject: [PATCH 0299/1914] GPU Compile Flags: Set denormals to zero unconditionally Before I kept them in NO_FAST_MATH mode, but this yields warning by nvcc, which for some stupid reason cannot be disabled. And in principle, with denormals globally disabled, it is also deterministic again, and disabling it everywhere is as simple as enabling them everywhere. --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 2 +- GPU/GPUTracking/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Definitions/GPUDefGPUParameters.h | 12 ++++++------ dependencies/FindO2GPU.cmake | 12 +++++++++--- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 3da5b77f80d86..48f292a198b9c 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -27,7 +27,7 @@ set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() - set(OCL_FLAGS ${OCL_FLAGS} -cl-fp32-correctly-rounded-divide-sqrt) + set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS}) endif() set(OCL_DEFINECL "-D$,$-D>" "-I$,EXCLUDE,^/usr/include/?>,$-I>" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 631f9f0edff4f..ad7dd9c210cd1 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -17,10 +17,10 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}") endif() elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}") endif() include(cmake/helpers.cmake) diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h index 5b5a89cc8bc39..910907368e891 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h @@ -603,11 +603,11 @@ // #define GPUCA_KERNEL_DEBUGGER_OUTPUT // Some assertions to make sure out parameters are not invalid - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - #ifdef GPUCA_GPUCODE - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); - #endif +static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); +static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); +#ifdef GPUCA_GPUCODE + static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); +#endif // Derived parameters #ifdef GPUCA_USE_TEXTURES @@ -621,5 +621,5 @@ #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT - // clang-format on +// clang-format on #endif diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 650a269209d9b..56b53e1be8879 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -84,8 +84,14 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") -set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=false --prec-div=true --prec-sqrt=true --fmad false") +if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") + set(GPUCA_CLANG_FTZ "") +else() + set(GPUCA_CLANG_FTZ "-mdaz-ftz") +endif() +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false") +set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) add_definitions(-DGPUCA_DETERMINISTIC_MODE) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") @@ -172,7 +178,7 @@ if(ENABLE_CUDA) endif() endif() -# ---------------------------------- HIP ---------------------------------- +# ---------------------------------- OpenCL ---------------------------------- if(ENABLE_OPENCL) find_package(OpenCL) if(ENABLE_OPENCL AND NOT ENABLE_OPENCL STREQUAL "AUTO") From 81dad271ef926e4af9809038b0aa32c0bcaf0f66 Mon Sep 17 00:00:00 2001 From: Andrea Sofia Triolo Date: Thu, 27 Mar 2025 08:37:46 +0100 Subject: [PATCH 0300/1914] ITS Efficiency study: modified cuts and code cleanup (#13995) * new method and checks * Rebinning of Z and Phi efficiency plots * ITS efficiency study: code cleanup * Please consider the following formatting changes * ITS efficiency study: raw pointers converted to smart pointers * Please consider the following formatting changes --------- Co-authored-by: Andrea Sofia Triolo Co-authored-by: ALICE Action Bot --- .../studies/include/ITSStudies/Efficiency.h | 10 +- .../postprocessing/studies/src/Efficiency.cxx | 1675 +++++++++-------- 2 files changed, 841 insertions(+), 844 deletions(-) diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h index 19df2279a2813..b6f43bb772390 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/include/ITSStudies/Efficiency.h @@ -28,15 +28,8 @@ namespace study using mask_t = o2::dataformats::GlobalTrackID::mask_t; o2::framework::DataProcessorSpec getEfficiencyStudy(mask_t srcTracksMask, mask_t srcClustersMask, bool useMC, std::shared_ptr kineReader); -////// phi cuts for B=0 -float mPhiCutsL0[10][2] = {{-122.5, -122}, {-91.8, -91.7}, {-61, -60}, {-30.1, -29.8}, {30, 30.2}, {59, 59.5}, {88, 89}, {117, 118.5}, {147, 147.8}, {176.5, 176.6}}; -float mPhiCutsL1[12][2] = {{-137, -136.5}, {-114, -113.5}, {-91.5, -91}, {-68.5, -68}, {-45.6, -45.4}, {-23.1, -22.9}, {45.4, 45.6}, {67.4, 67.6}, {89.4, 89.6}, {110.4, 110.6}, {132.4, 132.6}, {154.4, 154.6}}; -float mPhiCutsL2[17][2] = {{-162.85, -162.65}, {-145, -144.5}, {-127, -126.5}, {-109, -108.5}, {-91, -90.5}, {-73, -72.5}, {-55.1, -54.9}, {-37.35, -37.15}, {-19.5, -19}, {36.8, 37}, {54.4, 54.6}, {71.9, 72.1}, {89, 89.5}, {106.4, 106.6}, {123.65, 123.85}, {141.4, 141.6}, {158.9, 159.1}}; - float mEtaCuts[2] = {-1.0, 1.0}; -// float mPtCuts[2] = {1, 4.5}; //// for B=5 float mPtCuts[2] = {0, 10}; /// no cut for B=0 -int mChi2cut = 100; // values obtained from the dca study for B=5 // float dcaXY[3] = {-0.000326, -0.000217, -0.000187}; @@ -55,6 +48,9 @@ int dcaCut = 8; float mDCACutsXY[3][2] = {{dcaXY[0] - dcaCut * sigmaDcaXY[0], dcaXY[0] + dcaCut* sigmaDcaXY[0]}, {dcaXY[1] - dcaCut * sigmaDcaXY[1], dcaXY[1] + dcaCut* sigmaDcaXY[1]}, {dcaXY[2] - dcaCut * sigmaDcaXY[2], dcaXY[2] + dcaCut* sigmaDcaXY[2]}}; // cuts at 8 sigma for each layer for xy. The values represent m-8sigma and m+8sigma float mDCACutsZ[3][2] = {{dcaZ[0] - dcaCut * sigmaDcaZ[0], dcaZ[0] + dcaCut* sigmaDcaZ[0]}, {dcaZ[1] - dcaCut * sigmaDcaZ[1], dcaZ[1] + dcaCut* sigmaDcaZ[1]}, {dcaZ[2] - dcaCut * sigmaDcaZ[2], dcaZ[2] + dcaCut* sigmaDcaZ[2]}}; +/// excluding bad chips in MC that are not present in data: to be checked based on the anchoring +std::vector mExcludedChipMC = {66, 67, 68, 75, 76, 77, 84, 85, 86, 93, 94, 95, 102, 103, 104, 265, 266, 267, 274, 275, 276, 283, 284, 285, 413, 414, 415, 422, 423, 424, 431, 432, 433}; + } // namespace study } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx index 28e09e5d9a3be..bca1ec1e85001 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #define NLAYERS 3 @@ -75,8 +77,6 @@ class EfficiencyStudy : public Task void studyClusterSelectionMC(); void countDuplicatedAfterCuts(); void getEfficiency(bool isMC); - void getEfficiencyAndTrackInfo(bool isMC); - void saveDataInfo(); void process(o2::globaltracking::RecoContainer&); void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } @@ -105,20 +105,14 @@ class EfficiencyStudy : public Task // Data GTrackID::mask_t mTracksSrc{}; std::shared_ptr mDataRequest; - unsigned short mMask = 0x7f; // Utils std::shared_ptr mGGCCDBRequest; std::unique_ptr mOutFile; int mDuplicated_layer[NLAYERS] = {0}; - const o2::parameters::GRPMagField* mGRPMagField = nullptr; //// Histos - // Distance betweeen original and duplicated clusters - std::unique_ptr mDistanceClustersX[NLAYERS]; - std::unique_ptr mDistanceClustersY[NLAYERS]; - std::unique_ptr mDistanceClustersZ[NLAYERS]; - std::unique_ptr mDistanceClusters[NLAYERS]; + // DCA betweeen track and original cluster std::unique_ptr mDCAxyOriginal[NLAYERS]; std::unique_ptr mDCAzOriginal[NLAYERS]; @@ -132,16 +126,12 @@ class EfficiencyStudy : public Task // phi, eta, pt of the cluster std::unique_ptr mPhiOriginal[NLAYERS]; - std::unique_ptr mPhiTrackOriginal[NLAYERS]; std::unique_ptr mEtaOriginal[NLAYERS]; std::unique_ptr mPtOriginal[NLAYERS]; - TH1D* mPtDuplicated[NLAYERS]; - TH1D* mEtaDuplicated[NLAYERS]; - TH1D* mPhiDuplicated[NLAYERS]; - TH1D* mPhiTrackDuplicated[NLAYERS]; - TH2D* mPhiTrackDuplicatedvsphiDuplicated[NLAYERS]; - TH2D* mPhiTrackoriginalvsphioriginal[NLAYERS]; - TH1D* mPhiOriginalIfDuplicated[NLAYERS]; + std::unique_ptr mPtDuplicated[NLAYERS]; + std::unique_ptr mEtaDuplicated[NLAYERS]; + std::unique_ptr mPhiDuplicated[NLAYERS]; + std::unique_ptr mPhiOriginalIfDuplicated[NLAYERS]; std::unique_ptr mZvsPhiDUplicated[NLAYERS]; @@ -151,13 +141,6 @@ class EfficiencyStudy : public Task std::unique_ptr m2DClusterOriginalPositions; std::unique_ptr m2DClusterDuplicatedPositions; - std::unique_ptr mXoriginal; - std::unique_ptr mYoriginal; - std::unique_ptr mZoriginal; - std::unique_ptr mXduplicated; - std::unique_ptr mYduplicated; - std::unique_ptr mZduplicated; - // Efficiency histos std::unique_ptr mEfficiencyGoodMatch; std::unique_ptr mEfficiencyFakeMatch; @@ -165,29 +148,37 @@ class EfficiencyStudy : public Task std::unique_ptr mEfficiencyGoodMatch_layer[NLAYERS]; std::unique_ptr mEfficiencyFakeMatch_layer[NLAYERS]; std::unique_ptr mEfficiencyTotal_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPt_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPt_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchEta_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchEta_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhi_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhiTrack_layer[NLAYERS]; - TH2D* mEfficiencyGoodMatchPhiOriginal_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPhi_layer[NLAYERS]; - TH2D* mEfficiencyFakeMatchPhiTrack_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPt_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchPt_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchEta_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchEta_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPhi_layer[NLAYERS]; + std::unique_ptr mEfficiencyGoodMatchPhiOriginal_layer[NLAYERS]; + std::unique_ptr mEfficiencyFakeMatchPhi_layer[NLAYERS]; + + // std::unique_ptr mEfficiencyColEta[NLAYERS]; + std::unique_ptr mDenColEta[NLAYERS]; + std::unique_ptr mNumColEta[NLAYERS]; + std::unique_ptr mDenRowPhi[NLAYERS]; + std::unique_ptr mNumRowPhi[NLAYERS]; + std::unique_ptr mDenRowCol[NLAYERS]; + std::unique_ptr mNumRowCol[NLAYERS]; // phi, eta, pt of the duplicated cluster per layer - TH2D* mPt_EtaDupl[NLAYERS]; + std::unique_ptr mPt_EtaDupl[NLAYERS]; // duplicated per layer and per cut std::unique_ptr mDuplicatedEtaAllPt[NLAYERS]; std::unique_ptr mDuplicatedEta[NLAYERS][3]; std::unique_ptr mDuplicatedPhiAllPt[NLAYERS]; std::unique_ptr mDuplicatedPhi[NLAYERS][3]; - TH1D* mDuplicatedPt[NLAYERS]; - TH1D* mDuplicatedRow[NLAYERS]; - TH2D* mDuplicatedPtEta[NLAYERS]; - TH2D* mDuplicatedPtPhi[NLAYERS]; - TH2D* mDuplicatedEtaPhi[NLAYERS]; + std::unique_ptr mDuplicatedPt[NLAYERS]; + std::unique_ptr mDuplicatedRow[NLAYERS]; + std::unique_ptr mDuplicatedCol[NLAYERS]; + std::unique_ptr mDuplicatedZ[NLAYERS]; + std::unique_ptr mDuplicatedPtEta[NLAYERS]; + std::unique_ptr mDuplicatedPtPhi[NLAYERS]; + std::unique_ptr mDuplicatedEtaPhi[NLAYERS]; // matches per layer and per cut std::unique_ptr mNGoodMatchesEtaAllPt[NLAYERS]; @@ -200,26 +191,36 @@ class EfficiencyStudy : public Task std::unique_ptr mNFakeMatchesPhiAllPt[NLAYERS]; std::unique_ptr mNFakeMatchesPhi[NLAYERS][3]; - TH1D* mNGoodMatchesPt[NLAYERS]; - TH1D* mNFakeMatchesPt[NLAYERS]; + std::unique_ptr mNGoodMatchesPt[NLAYERS]; + std::unique_ptr mNFakeMatchesPt[NLAYERS]; + + std::unique_ptr mNGoodMatchesRow[NLAYERS]; + std::unique_ptr mNFakeMatchesRow[NLAYERS]; + + std::unique_ptr mNGoodMatchesCol[NLAYERS]; + std::unique_ptr mNFakeMatchesCol[NLAYERS]; - TH1D* mNGoodMatchesRow[NLAYERS]; - TH1D* mNFakeMatchesRow[NLAYERS]; + std::unique_ptr mNGoodMatchesZ[NLAYERS]; + std::unique_ptr mNFakeMatchesZ[NLAYERS]; - TH2D* mNGoodMatchesPtEta[NLAYERS]; - TH2D* mNFakeMatchesPtEta[NLAYERS]; + std::unique_ptr mNGoodMatchesPtEta[NLAYERS]; + std::unique_ptr mNFakeMatchesPtEta[NLAYERS]; - TH2D* mNGoodMatchesPtPhi[NLAYERS]; - TH2D* mNFakeMatchesPtPhi[NLAYERS]; + std::unique_ptr mNGoodMatchesPtPhi[NLAYERS]; + std::unique_ptr mNFakeMatchesPtPhi[NLAYERS]; - TH2D* mNGoodMatchesEtaPhi[NLAYERS]; - TH2D* mNFakeMatchesEtaPhi[NLAYERS]; + std::unique_ptr mNGoodMatchesEtaPhi[NLAYERS]; + std::unique_ptr mNFakeMatchesEtaPhi[NLAYERS]; // calculating the efficiency with TEfficiency class std::unique_ptr mEffPtGood[NLAYERS]; std::unique_ptr mEffPtFake[NLAYERS]; std::unique_ptr mEffRowGood[NLAYERS]; std::unique_ptr mEffRowFake[NLAYERS]; + std::unique_ptr mEffColGood[NLAYERS]; + std::unique_ptr mEffColFake[NLAYERS]; + std::unique_ptr mEffZGood[NLAYERS]; + std::unique_ptr mEffZFake[NLAYERS]; std::unique_ptr mEffPtEtaGood[NLAYERS]; std::unique_ptr mEffPtEtaFake[NLAYERS]; std::unique_ptr mEffPtPhiGood[NLAYERS]; @@ -237,17 +238,15 @@ class EfficiencyStudy : public Task std::unique_ptr mEffPhiFakeAllPt[NLAYERS]; std::unique_ptr mEffPhiFake[NLAYERS][3]; - TH2D* mnGoodMatchesPt_layer[NLAYERS]; - TH2D* mnFakeMatchesPt_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPt_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesPt_layer[NLAYERS]; - TH2D* mnGoodMatchesEta_layer[NLAYERS]; - TH2D* mnFakeMatchesEta_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesEta_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesEta_layer[NLAYERS]; - TH2D* mnGoodMatchesPhi_layer[NLAYERS]; - TH2D* mnGoodMatchesPhiTrack_layer[NLAYERS]; - TH2D* mnGoodMatchesPhiOriginal_layer[NLAYERS]; - TH2D* mnFakeMatchesPhi_layer[NLAYERS]; - TH2D* mnFakeMatchesPhiTrack_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPhi_layer[NLAYERS]; + std::unique_ptr mnGoodMatchesPhiOriginal_layer[NLAYERS]; + std::unique_ptr mnFakeMatchesPhi_layer[NLAYERS]; std::unique_ptr DCAxyData[NLAYERS]; std::unique_ptr DCAzData[NLAYERS]; @@ -255,55 +254,77 @@ class EfficiencyStudy : public Task std::unique_ptr DCAxyRejected[NLAYERS]; std::unique_ptr DCAzRejected[NLAYERS]; - std::unique_ptr DistanceClustersX[NLAYERS]; - std::unique_ptr DistanceClustersY[NLAYERS]; - std::unique_ptr DistanceClustersZ[NLAYERS]; - std::unique_ptr DistanceClustersXAftercuts[NLAYERS]; - std::unique_ptr DistanceClustersYAftercuts[NLAYERS]; - std::unique_ptr DistanceClustersZAftercuts[NLAYERS]; - - TH1D* denPt[NLAYERS]; - TH1D* numPt[NLAYERS]; - TH1D* numPtGood[NLAYERS]; - TH1D* numPtFake[NLAYERS]; - - TH1D* denPhi[NLAYERS]; - TH1D* numPhi[NLAYERS]; - TH1D* numPhiGood[NLAYERS]; - TH1D* numPhiFake[NLAYERS]; - - TH1D* denEta[NLAYERS]; - TH1D* numEta[NLAYERS]; - TH1D* numEtaGood[NLAYERS]; - TH1D* numEtaFake[NLAYERS]; + std::unique_ptr denPt[NLAYERS]; + std::unique_ptr numPt[NLAYERS]; + std::unique_ptr numPtGood[NLAYERS]; + std::unique_ptr numPtFake[NLAYERS]; + + std::unique_ptr denPhi[NLAYERS]; + std::unique_ptr numPhi[NLAYERS]; + std::unique_ptr numPhiGood[NLAYERS]; + std::unique_ptr numPhiFake[NLAYERS]; + + std::unique_ptr denEta[NLAYERS]; + std::unique_ptr numEta[NLAYERS]; + std::unique_ptr numEtaGood[NLAYERS]; + std::unique_ptr numEtaFake[NLAYERS]; + + std::unique_ptr denRow[NLAYERS]; + std::unique_ptr numRow[NLAYERS]; + std::unique_ptr numRowGood[NLAYERS]; + std::unique_ptr numRowFake[NLAYERS]; + + std::unique_ptr denCol[NLAYERS]; + std::unique_ptr numCol[NLAYERS]; + std::unique_ptr numColGood[NLAYERS]; + std::unique_ptr numColFake[NLAYERS]; + std::unique_ptr denZ[NLAYERS]; + std::unique_ptr numZ[NLAYERS]; + std::unique_ptr numZGood[NLAYERS]; + std::unique_ptr numZFake[NLAYERS]; + + std::unique_ptr numLayers; + std::unique_ptr denLayers; + std::unique_ptr numGoodLayers; + std::unique_ptr numFakeLayers; int nDuplicatedClusters[NLAYERS] = {0}; int nTracksSelected[NLAYERS] = {0}; // denominator fot the efficiency calculation - TH2D* diffPhivsPt[NLAYERS]; - TH1D* diffTheta[NLAYERS]; - - TH1D* thetaOriginal[NLAYERS]; - TH1D* thetaOriginalCalc[NLAYERS]; - TH1D* thetaDuplicated[NLAYERS]; - TH1D* thetaOriginalCalcWhenDuplicated[NLAYERS]; - TH1D* thetaOriginalWhenDuplicated[NLAYERS]; - std::unique_ptr IPOriginalxy[NLAYERS]; std::unique_ptr IPOriginalz[NLAYERS]; - std::unique_ptr IPOriginalifDuplicatedxy[NLAYERS]; - std::unique_ptr IPOriginalifDuplicatedz[NLAYERS]; std::unique_ptr chipRowDuplicated[NLAYERS]; std::unique_ptr chipRowOriginalIfDuplicated[NLAYERS]; - std::unique_ptr chi2track; std::unique_ptr chi2trackAccepted; + + /// checking where the duplicated not found are (histograms filled with the orifinal cluster variables) + std::unique_ptr phiFound[NLAYERS]; + std::unique_ptr rowFound[NLAYERS]; + std::unique_ptr phiNotFound[NLAYERS]; + std::unique_ptr rowNotFound[NLAYERS]; + std::unique_ptr zFound[NLAYERS]; + std::unique_ptr zNotFound[NLAYERS]; + std::unique_ptr colFoundOriginalVsDuplicated[NLAYERS]; + std::unique_ptr colFoundOriginal[NLAYERS]; + std::unique_ptr colNotFound[NLAYERS]; + std::unique_ptr radiusFound[NLAYERS]; + std::unique_ptr radiusNotFound[NLAYERS]; + std::unique_ptr m2DClusterFoundPositions; + std::unique_ptr m2DClusterNotFoundPositions; + std::unique_ptr mChipNotFound; + std::unique_ptr mChipFound; + std::unique_ptr l0_00; + std::unique_ptr l1_15; + std::unique_ptr l2_19; + std::unique_ptr chipOrigVsOverlap; + std::unique_ptr chipmap; }; void EfficiencyStudy::init(InitContext& ic) { - LOGP(info, "--------------- init"); + LOGP(info, "init"); o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest); @@ -320,15 +341,8 @@ void EfficiencyStudy::init(InitContext& ic) mOutFile = std::make_unique(mOutFileName.c_str(), "recreate"); - mXoriginal = std::make_unique("xoriginal", "x original ;x (cm); ", 200, 0, 0); - mYoriginal = std::make_unique("yoriginal", "y original ;y (cm); ", 200, 0, 0); - mZoriginal = std::make_unique("zoriginal", "z original ;z (cm); ", 300, 0, 0); - mXduplicated = std::make_unique("xduplicated", "x duplicated ;x (cm); ", 200, -10, 10); - mYduplicated = std::make_unique("yduplicated", "y duplicated ;y (cm); ", 200, -10, 10); - mZduplicated = std::make_unique("zduplicated", "z duplicated ;z (cm); ", 300, -30, 30); - - mDCAxyDuplicated = std::make_unique("dcaXYDuplicated", "Distance between track and duplicated cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzDuplicated = std::make_unique("dcaZDuplicated", "Distance between track and duplicated cluster ;DCA z (cm); ", 400, -0.2, 0.2); + mDCAxyDuplicated = std::make_unique("dcaXYDuplicated", "Distance between track and duplicated cluster ;DCA xy (cm); ", 200, -0.01, 0.01); + mDCAzDuplicated = std::make_unique("dcaZDuplicated", "Distance between track and duplicated cluster ;DCA z (cm); ", 200, -0.01, 0.01); m3DClusterPositions = std::make_unique("3DClusterPositions", ";x (cm);y (cm);z (cm)", 200, -10, 10, 200, -10, 10, 400, -20, 20); m3DDuplicatedClusterPositions = std::make_unique("3DDuplicatedClusterPositions", ";x (cm);y (cm);z (cm)", 200, -10, 10, 200, -10, 10, 500, -30, 30); @@ -339,152 +353,192 @@ void EfficiencyStudy::init(InitContext& ic) mEfficiencyFakeMatch = std::make_unique("mEfficiencyFakeMatch", ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyTotal = std::make_unique("mEfficiencyTotal", ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); - chi2track = std::make_unique("chi2track", "; $chi^{2}", 500, 0, 100); - chi2trackAccepted = std::make_unique("chi2trackAccepted", "; $chi^{2}", 500, 0, 100); + chi2trackAccepted = std::make_unique("chi2trackAccepted", "; $#chi^{2}", 500, 0, 100); + + m2DClusterFoundPositions = std::make_unique("m2DClusterFoundPositions", ";x (cm);y (cm)", 250, -5, 5, 250, -5, 5); + m2DClusterNotFoundPositions = std::make_unique("m2DClusterNotFoundPositions", ";x (cm);y (cm)", 250, -5, 5, 250, -5, 5); + mChipNotFound = std::make_unique("mChipNotFound", ";chipID", 432, 0, 432); + mChipFound = std::make_unique("mChipFound", ";chipID", 432, 0, 432); + l0_00 = std::make_unique("l0_00", ";col; row", 2304, -0.5, 9215.5, 128, -0.5, 511.5); + l1_15 = std::make_unique("l1_15", ";col; row", 2304, -0.5, 9215.5, 512, -0.5, 511.5); + l2_19 = std::make_unique("l2_19", ";col; row", 2304, -0.5, 9215.5, 512, -0.5, 511.5); + chipOrigVsOverlap = std::make_unique("chipOrigVsOverlap", ";chipID Overlap;chipID Original", 9, 0, 9, 9, 0, 9); + chipmap = std::make_unique("chipmap", ";Column;Row", 1024, 0, 1023, 512, -0.5, 511.5); + + numLayers = std::make_unique("numLayers", "numLayers; ; Efficiency", 3, -0.5, 2.5); + numGoodLayers = std::make_unique("numGoodLayers", "numGoodLayers; ; Efficiency", 3, -0.5, 2.5); + numFakeLayers = std::make_unique("numFakeLayers", "numFakeLayers; ; Efficiency", 3, -0.5, 2.5); + denLayers = std::make_unique("denLayers", "denLayers; ; Efficiency", 3, -0.5, 2.5); for (int i = 0; i < NLAYERS; i++) { chipRowDuplicated[i] = std::make_unique(Form("chipPosDuplicated_L%d", i), Form("L%d; row", i), 512, -0.5, 511.5); chipRowOriginalIfDuplicated[i] = std::make_unique(Form("chipPosOriginalIfDuplicated%d", i), Form("L%d; row", i), 512, -0.5, 511.5); - DCAxyData[i] = std::make_unique(Form("dcaXYData_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 4000, -2, 2); - DCAzData[i] = std::make_unique(Form("dcaZData_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 4000, -2, 2); + DCAxyData[i] = std::make_unique(Form("dcaXYData_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 4000, -0.2, 0.2); + DCAzData[i] = std::make_unique(Form("dcaZData_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 4000, -0.2, 0.2); DCAxyRejected[i] = std::make_unique(Form("DCAxyRejected%d", i), "Distance between track and original cluster (rejected) ;DCA xy (cm); ", 30000, -30, 30); DCAzRejected[i] = std::make_unique(Form("DCAzRejected%d", i), "Distance between track and original cluster (rejected) ;DCA z (cm); ", 30000, -30, 30); - DistanceClustersX[i] = std::make_unique(Form("distanceClustersX_L%d", i), ";Distance x (cm); ", 100, 0, 1); - DistanceClustersY[i] = std::make_unique(Form("distanceClustersY_L%d", i), ";Distance y (cm); ", 100, 0, 1); - DistanceClustersZ[i] = std::make_unique(Form("distanceClustersZ_L%d", i), ";Distance z (cm); ", 100, 0, 1); - DistanceClustersXAftercuts[i] = std::make_unique(Form("distanceClustersXAftercuts_L%d", i), ";Distance x (cm); ", 100, 0, 1); - DistanceClustersYAftercuts[i] = std::make_unique(Form("distanceClustersYAftercuts_L%d", i), ";Distance y (cm); ", 100, 0, 1); - DistanceClustersZAftercuts[i] = std::make_unique(Form("distanceClustersZAftercuts_L%d", i), ";Distance z (cm); ", 100, 0, 1); + mDCAxyOriginal[i] = std::make_unique(Form("dcaXYOriginal_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 200, -0.01, 0.01); + mDCAzOriginal[i] = std::make_unique(Form("dcaZOriginal_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 200, -0.01, 0.01); - mDistanceClustersX[i] = std::make_unique(Form("distanceClustersX_L%d", i), ";Distance x (cm); ", 100, 0, 1); - mDistanceClustersY[i] = std::make_unique(Form("distanceClustersY_L%d", i), ";Distance y (cm); ", 100, 0, 1); - mDistanceClustersZ[i] = std::make_unique(Form("distanceClustersZ_L%d", i), ";Distance z (cm); ", 100, 0, 1); - mDistanceClusters[i] = std::make_unique(Form("distanceClusters_L%d", i), ";Distance (cm); ", 100, 0, 1); - - mDCAxyOriginal[i] = std::make_unique(Form("dcaXYOriginal_L%d", i), "Distance between track and original cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzOriginal[i] = std::make_unique(Form("dcaZOriginal_L%d", i), "Distance between track and original cluster ;DCA z (cm); ", 400, -0.2, 0.2); - - mPhiOriginal[i] = std::make_unique(Form("phiOriginal_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackOriginal[i] = std::make_unique(Form("phiTrackOriginal_L%d", i), ";phi Track (deg); ", 1440, 0, 360); - mEtaOriginal[i] = std::make_unique(Form("etaOriginal_L%d", i), ";eta (deg); ", 100, -2, 2); + mPhiOriginal[i] = std::make_unique(Form("phiOriginal_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mEtaOriginal[i] = std::make_unique(Form("etaOriginal_L%d", i), ";eta (rad); ", 100, -2, 2); mPtOriginal[i] = std::make_unique(Form("ptOriginal_L%d", i), ";pt (GeV/c); ", 100, 0, 10); - mZvsPhiDUplicated[i] = std::make_unique(Form("zvsphiDuplicated_L%d", i), ";z (cm);phi (deg)", 400, -20, 20, 1440, -180, 180); + mZvsPhiDUplicated[i] = std::make_unique(Form("zvsphiDuplicated_L%d", i), ";z (cm);phi (rad)", 400, -20, 20, 90, -3.2, 3.2); - mPtDuplicated[i] = new TH1D(Form("ptDuplicated_L%d", i), ";pt (GeV/c); ", nbPt, 0, 7.5); // xbins); - mEtaDuplicated[i] = new TH1D(Form("etaDuplicated_L%d", i), ";eta; ", 40, -2, 2); - mPhiDuplicated[i] = new TH1D(Form("phiDuplicated_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackDuplicated[i] = new TH1D(Form("phiTrackDuplicated_L%d", i), ";phi Track (deg); ", 1440, 0, 360); - mPhiOriginalIfDuplicated[i] = new TH1D(Form("phiOriginalIfDuplicated_L%d", i), ";phi (deg); ", 1440, -180, 180); - mPhiTrackDuplicatedvsphiDuplicated[i] = new TH2D(Form("phiTrackDuplicatedvsphiDuplicated_L%d", i), ";phi track (deg);phi oridinal if duplicated (deg); ", 1440, 0, 360, 1440, -180, 180); - mPhiTrackoriginalvsphioriginal[i] = new TH2D(Form("phiTrackoriginalvsphioriginal_L%d", i), ";phi track (deg);phi original (deg); ", 1440, 0, 360, 1440, -180, 180); - mDCAxyDuplicated_layer[i] = std::make_unique(Form("dcaXYDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA xy (cm); ", 400, -0.2, 0.2); - mDCAzDuplicated_layer[i] = std::make_unique(Form("dcaZDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA z (cm); ", 400, -0.2, 0.2); + mPtDuplicated[i] = std::make_unique(Form("ptDuplicated_L%d", i), ";pt (GeV/c); ", nbPt, 0, 7.5); // xbins); + mEtaDuplicated[i] = std::make_unique(Form("etaDuplicated_L%d", i), ";eta; ", 40, -2, 2); + mPhiDuplicated[i] = std::make_unique(Form("phiDuplicated_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mPhiOriginalIfDuplicated[i] = std::make_unique(Form("phiOriginalIfDuplicated_L%d", i), ";phi (rad); ", 90, -3.2, 3.2); + mDCAxyDuplicated_layer[i] = std::make_unique(Form("dcaXYDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA xy (cm); ", 100, -0.01, 0.01); + mDCAzDuplicated_layer[i] = std::make_unique(Form("dcaZDuplicated_layer_L%d", i), "Distance between track and duplicated cluster ;DCA z (cm); ", 100, -0.01, 0.01); mEfficiencyGoodMatch_layer[i] = std::make_unique(Form("mEfficiencyGoodMatch_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyFakeMatch_layer[i] = std::make_unique(Form("mEfficiencyFakeMatch_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); mEfficiencyTotal_layer[i] = std::make_unique(Form("mEfficiencyTotal_layer_L%d", i), ";#sigma(DCA) cut;Efficiency;", 20, 0.5, 20.5); - mEfficiencyGoodMatchPt_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); - mEfficiencyFakeMatchPt_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); + mEfficiencyGoodMatchPt_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); + mEfficiencyFakeMatchPt_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchPt_layer_L%d", i), ";#it{p}_{T} (GeV/c);#sigma(DCA) cut;Efficiency;", nbPt, 0, 7.5, /* xbins*/ 20, 0.5, 20.5); - mEfficiencyGoodMatchEta_layer[i] = new TH2D(Form("mEfficiencyGoodMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); - mEfficiencyFakeMatchEta_layer[i] = new TH2D(Form("mEfficiencyFakeMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); + mEfficiencyGoodMatchEta_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); + mEfficiencyFakeMatchEta_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchEta_layer_L%d", i), ";#eta;#sigma(DCA) cut;Efficiency;", 40, -2, 2, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhi_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhiTrack_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhiTrack_layer_L%d", i), ";#phi track;#sigma(DCA) cut;Efficiency;", 1440, 0, 360, 20, 0.5, 20.5); - mEfficiencyGoodMatchPhiOriginal_layer[i] = new TH2D(Form("mEfficiencyGoodMatchPhiOriginal_layer_L%d", i), ";#phi Original;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyFakeMatchPhi_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 1440, -180, 180, 20, 0.5, 20.5); - mEfficiencyFakeMatchPhiTrack_layer[i] = new TH2D(Form("mEfficiencyFakeMatchPhiTrack_layer_L%d", i), ";#phi Track;#sigma(DCA) cut;Efficiency;", 1440, 0, 360, 20, 0.5, 20.5); + mEfficiencyGoodMatchPhi_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); + mEfficiencyGoodMatchPhiOriginal_layer[i] = std::make_unique(Form("mEfficiencyGoodMatchPhiOriginal_layer_L%d", i), ";#phi Original;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); + mEfficiencyFakeMatchPhi_layer[i] = std::make_unique(Form("mEfficiencyFakeMatchPhi_layer_L%d", i), ";#phi;#sigma(DCA) cut;Efficiency;", 90, -3.2, 3.2, 20, 0.5, 20.5); - mPt_EtaDupl[i] = new TH2D(Form("mPt_EtaDupl_L%d", i), ";#it{p}_{T} (GeV/c);#eta; ", 100, 0, 10, 100, -2, 2); + mPt_EtaDupl[i] = std::make_unique(Form("mPt_EtaDupl_L%d", i), ";#it{p}_{T} (GeV/c);#eta; ", 100, 0, 10, 100, -2, 2); - mDuplicatedPt[i] = new TH1D(Form("mDuplicatedPt_log_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/); + mDuplicatedPt[i] = std::make_unique(Form("mDuplicatedPt_log_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/); mDuplicatedPt[i]->Sumw2(); - mNGoodMatchesPt[i] = new TH1D(Form("mNGoodMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/); + mNGoodMatchesPt[i] = std::make_unique(Form("mNGoodMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/); mNGoodMatchesPt[i]->Sumw2(); - mNFakeMatchesPt[i] = new TH1D(Form("mNFakeMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of fake matches L%d", i), nbPt, 0, 7.5 /* xbins*/); + mNFakeMatchesPt[i] = std::make_unique(Form("mNFakeMatchesPt_L%d", i), Form("; #it{p}_{T} (GeV/c); Number of fake matches L%d", i), nbPt, 0, 7.5 /* xbins*/); mNFakeMatchesPt[i]->Sumw2(); - mDuplicatedRow[i] = new TH1D(Form("mDuplicatedRow_L%d", i), Form("; Row; Number of duplciated clusters L%d", i), 512, -0.5, 511.5); + mDuplicatedRow[i] = std::make_unique(Form("mDuplicatedRow_L%d", i), Form("; Row; Number of duplciated clusters L%d", i), 128, -0.5, 511.5); mDuplicatedRow[i]->Sumw2(); - mNGoodMatchesRow[i] = new TH1D(Form("mNGoodMatchesRow_L%d", i), Form("; Row; Number of good matches L%d", i), 512, -0.5, 511.5); + mNGoodMatchesRow[i] = std::make_unique(Form("mNGoodMatchesRow_L%d", i), Form("; Row; Number of good matches L%d", i), 128, -0.5, 511.5); mNGoodMatchesRow[i]->Sumw2(); - mNFakeMatchesRow[i] = new TH1D(Form("mNFakeMatchesRow_L%d", i), Form(";Row; Number of fake matches L%d", i), 512, -0.5, 511.5); + mNFakeMatchesRow[i] = std::make_unique(Form("mNFakeMatchesRow_L%d", i), Form(";Row; Number of fake matches L%d", i), 128, -0.5, 511.5); mNFakeMatchesRow[i]->Sumw2(); - mDuplicatedPtEta[i] = new TH2D(Form("mDuplicatedPtEta_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mDuplicatedCol[i] = std::make_unique(Form("mDuplicatedCol_L%d", i), Form("; Col; Number of duplciated clusters L%d", i), 128, -0.5, 1023.5); + mDuplicatedCol[i]->Sumw2(); + mNGoodMatchesCol[i] = std::make_unique(Form("mNGoodMatchesCol_L%d", i), Form("; Col; Number of good matches L%d", i), 128, -0.5, 1023.5); + mNGoodMatchesCol[i]->Sumw2(); + mNFakeMatchesCol[i] = std::make_unique(Form("mNFakeMatchesCol_L%d", i), Form(";Col; Number of fake matches L%d", i), 128, -0.5, 1023.5); + mNFakeMatchesCol[i]->Sumw2(); + + mDuplicatedZ[i] = std::make_unique(Form("mDuplicatedZ_L%d", i), Form("; Z (cm); Number of duplciated clusters L%d", i), 100, -15, 15); + mDuplicatedZ[i]->Sumw2(); + mNGoodMatchesZ[i] = std::make_unique(Form("mNGoodMatchesZ_L%d", i), Form("; Z (cm); Number of good matches L%d", i), 100, -15, 15); + mNGoodMatchesZ[i]->Sumw2(); + mNFakeMatchesZ[i] = std::make_unique(Form("mNFakeMatchesZ_L%d", i), Form(";Z (cm); Number of fake matches L%d", i), 100, -15, 15); + mNFakeMatchesZ[i]->Sumw2(); + + mDuplicatedPtEta[i] = std::make_unique(Form("mDuplicatedPtEta_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mDuplicatedPtEta[i]->Sumw2(); - mNGoodMatchesPtEta[i] = new TH2D(Form("mNGoodMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mNGoodMatchesPtEta[i] = std::make_unique(Form("mNGoodMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mNGoodMatchesPtEta[i]->Sumw2(); - mNFakeMatchesPtEta[i] = new TH2D(Form("mNFakeMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); + mNFakeMatchesPtEta[i] = std::make_unique(Form("mNFakeMatchesPtEta_L%d", i), Form("; #it{p}_{T} (GeV/c);#eta; Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 40, -2, 2); mNFakeMatchesPtEta[i]->Sumw2(); - mDuplicatedPtPhi[i] = new TH2D(Form("mDuplicatedPtPhi_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mDuplicatedPtPhi[i] = std::make_unique(Form("mDuplicatedPtPhi_log_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of duplciated clusters L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mDuplicatedPtPhi[i]->Sumw2(); - mNGoodMatchesPtPhi[i] = new TH2D(Form("mNGoodMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mNGoodMatchesPtPhi[i] = std::make_unique(Form("mNGoodMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mNGoodMatchesPtPhi[i]->Sumw2(); - mNFakeMatchesPtPhi[i] = new TH2D(Form("mNFakeMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (deg); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 1440, -180, 180); + mNFakeMatchesPtPhi[i] = std::make_unique(Form("mNFakeMatchesPtPhi_L%d", i), Form("; #it{p}_{T} (GeV/c);#phi (rad); Number of good matches L%d", i), nbPt, 0, 7.5 /* xbins*/, 90, -3.2, 3.2); mNFakeMatchesPtPhi[i]->Sumw2(); - mDuplicatedEtaPhi[i] = new TH2D(Form("mDuplicatedEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of duplciated clusters L%d", i), 40, -2, 2, 1440, -180, 180); + mDuplicatedEtaPhi[i] = std::make_unique(Form("mDuplicatedEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of duplciated clusters L%d", i), 40, -2, 2, 90, -3.2, 3.2); mDuplicatedEtaPhi[i]->Sumw2(); - mNGoodMatchesEtaPhi[i] = new TH2D(Form("mNGoodMatchesEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of good matches L%d", i), 40, -2, 2, 1440, -180, 180); + mNGoodMatchesEtaPhi[i] = std::make_unique(Form("mNGoodMatchesEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of good matches L%d", i), 40, -2, 2, 90, -3.2, 3.2); mNGoodMatchesEtaPhi[i]->Sumw2(); - mNFakeMatchesEtaPhi[i] = new TH2D(Form("mNFakeMatchesEtaPhi_L%d", i), Form("; #eta;#phi (deg); Number of good matches L%d", i), 40, -2, 2, 1440, -180, 180); + mNFakeMatchesEtaPhi[i] = std::make_unique(Form("mNFakeMatchesEtaPhi_L%d", i), Form("; #eta;#phi (rad); Number of good matches L%d", i), 40, -2, 2, 90, -3.2, 3.2); mNFakeMatchesEtaPhi[i]->Sumw2(); mDuplicatedEtaAllPt[i] = std::make_unique(Form("mDuplicatedEtaAllPt_L%d", i), Form("; #eta; Number of duplicated clusters L%d", i), 40, -2, 2); mNGoodMatchesEtaAllPt[i] = std::make_unique(Form("mNGoodMatchesEtaAllPt_L%d", i), Form("; #eta; Number of good matches L%d", i), 40, -2, 2); mNFakeMatchesEtaAllPt[i] = std::make_unique(Form("mNFakeMatchesEtaAllPt_L%d", i), Form("; #eta; Number of fake matches L%d", i), 40, -2, 2); - mDuplicatedPhiAllPt[i] = std::make_unique(Form("mDuplicatedPhiAllPt_L%d", i), Form("; #phi (deg); Number of duplicated clusters L%d", i), 1440, -180, 180); - mNGoodMatchesPhiAllPt[i] = std::make_unique(Form("mNGoodMatchesPhiAllPt_L%d", i), Form("; #phi (deg); Number of good matches L%d", i), 1440, -180, 180); - mNFakeMatchesPhiAllPt[i] = std::make_unique(Form("mNFakeMatchesPhiAllPt_L%d", i), Form("; #phi (deg); Number of fake matches L%d", i), 1440, -180, 180); - - mnGoodMatchesPt_layer[i] = new TH2D(Form("mnGoodMatchesPt_layer_L%d", i), ";pt; nGoodMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); - mnFakeMatchesPt_layer[i] = new TH2D(Form("mnFakeMatchesPt_layer_L%d", i), ";pt; nFakeMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); - mnGoodMatchesEta_layer[i] = new TH2D(Form("mnGoodMatchesEta_layer_L%d", i), ";#eta; nGoodMatches", 40, -2, 2, 20, 0.5, 20.5); - mnFakeMatchesEta_layer[i] = new TH2D(Form("mnFakeMatchesEta_layer_L%d", i), ";#eta; nFakeMatches", 40, -2, 2, 20, 0.5, 20.5); - mnGoodMatchesPhi_layer[i] = new TH2D(Form("mnGoodMatchesPhi_layer_L%d", i), ";#Phi; nGoodMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnGoodMatchesPhiTrack_layer[i] = new TH2D(Form("mnGoodMatchesPhiTrack_layer_L%d", i), ";#Phi track; nGoodMatches", 1440, 0, 360, 20, 0.5, 20.5); - mnGoodMatchesPhiOriginal_layer[i] = new TH2D(Form("mnGoodMatchesPhiOriginal_layer_L%d", i), ";#Phi of the original Cluster; nGoodMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnFakeMatchesPhi_layer[i] = new TH2D(Form("mnFakeMatchesPhi_layer_L%d", i), ";#Phi; nFakeMatches", 1440, -180, 180, 20, 0.5, 20.5); - mnFakeMatchesPhiTrack_layer[i] = new TH2D(Form("mnFakeMatchesPhiTrack_layer_L%d", i), ";#Phi track; nFakeMatches", 1440, 0, 360, 20, 0.5, 20.5); - - denPt[i] = new TH1D(Form("denPt_L%d", i), Form("denPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPt[i] = new TH1D(Form("numPt_L%d", i), Form("numPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPtGood[i] = new TH1D(Form("numPtGood_L%d", i), Form("numPtGood_L%d", i), nbPt, 0, 7.5 /* xbins*/); - numPtFake[i] = new TH1D(Form("numPtFake_L%d", i), Form("numPtFake_L%d", i), nbPt, 0, 7.5 /* xbins*/); - - denPhi[i] = new TH1D(Form("denPhi_L%d", i), Form("denPhi_L%d", i), 1440, -180, 180); - numPhi[i] = new TH1D(Form("numPhi_L%d", i), Form("numPhi_L%d", i), 1440, -180, 180); - numPhiGood[i] = new TH1D(Form("numPhiGood_L%d", i), Form("numPhiGood_L%d", i), 1440, -180, 180); - numPhiFake[i] = new TH1D(Form("numPhiFake_L%d", i), Form("numPhiFake_L%d", i), 1440, -180, 180); - - denEta[i] = new TH1D(Form("denEta_L%d", i), Form("denEta_L%d", i), 200, -2, 2); - numEta[i] = new TH1D(Form("numEta_L%d", i), Form("numEta_L%d", i), 200, -2, 2); - numEtaGood[i] = new TH1D(Form("numEtaGood_L%d", i), Form("numEtaGood_L%d", i), 200, -2, 2); - numEtaFake[i] = new TH1D(Form("numEtaFake_L%d", i), Form("numEtaFake_L%d", i), 200, -2, 2); - - diffPhivsPt[i] = new TH2D(Form("diffPhivsPt_L%d", i), Form("diffPhivsPt_L%d", i), nbPt, 0, 7.5 /* xbins*/, 50, 0, 5); + mDuplicatedPhiAllPt[i] = std::make_unique(Form("mDuplicatedPhiAllPt_L%d", i), Form("; #phi (rad); Number of duplicated clusters L%d", i), 90, -3.2, 3.2); + mNGoodMatchesPhiAllPt[i] = std::make_unique(Form("mNGoodMatchesPhiAllPt_L%d", i), Form("; #phi (rad); Number of good matches L%d", i), 90, -3.2, 3.2); + mNFakeMatchesPhiAllPt[i] = std::make_unique(Form("mNFakeMatchesPhiAllPt_L%d", i), Form("; #phi (rad); Number of fake matches L%d", i), 90, -3.2, 3.2); + + mnGoodMatchesPt_layer[i] = std::make_unique(Form("mnGoodMatchesPt_layer_L%d", i), ";pt; nGoodMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); + mnFakeMatchesPt_layer[i] = std::make_unique(Form("mnFakeMatchesPt_layer_L%d", i), ";pt; nFakeMatches", nbPt, 0, 7.5 /* xbins*/, 20, 0.5, 20.5); + mnGoodMatchesEta_layer[i] = std::make_unique(Form("mnGoodMatchesEta_layer_L%d", i), ";#eta; nGoodMatches", 40, -2, 2, 20, 0.5, 20.5); + mnFakeMatchesEta_layer[i] = std::make_unique(Form("mnFakeMatchesEta_layer_L%d", i), ";#eta; nFakeMatches", 40, -2, 2, 20, 0.5, 20.5); + mnGoodMatchesPhi_layer[i] = std::make_unique(Form("mnGoodMatchesPhi_layer_L%d", i), ";#Phi; nGoodMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + mnGoodMatchesPhiOriginal_layer[i] = std::make_unique(Form("mnGoodMatchesPhiOriginal_layer_L%d", i), ";#Phi of the original Cluster; nGoodMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + mnFakeMatchesPhi_layer[i] = std::make_unique(Form("mnFakeMatchesPhi_layer_L%d", i), ";#Phi; nFakeMatches", 90, -3.2, 3.2, 20, 0.5, 20.5); + + denPt[i] = std::make_unique(Form("denPt_L%d", i), Form("denPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPt[i] = std::make_unique(Form("numPt_L%d", i), Form("numPt_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPtGood[i] = std::make_unique(Form("numPtGood_L%d", i), Form("numPtGood_L%d", i), nbPt, 0, 7.5 /* xbins*/); + numPtFake[i] = std::make_unique(Form("numPtFake_L%d", i), Form("numPtFake_L%d", i), nbPt, 0, 7.5 /* xbins*/); + + denPhi[i] = std::make_unique(Form("denPhi_L%d", i), Form("denPhi_L%d", i), 90, -3.2, 3.2); + numPhi[i] = std::make_unique(Form("numPhi_L%d", i), Form("numPhi_L%d", i), 90, -3.2, 3.2); + numPhiGood[i] = std::make_unique(Form("numPhiGood_L%d", i), Form("numPhiGood_L%d", i), 90, -3.2, 3.2); + numPhiFake[i] = std::make_unique(Form("numPhiFake_L%d", i), Form("numPhiFake_L%d", i), 90, -3.2, 3.2); + + denEta[i] = std::make_unique(Form("denEta_L%d", i), Form("denEta_L%d", i), 200, -2, 2); + numEta[i] = std::make_unique(Form("numEta_L%d", i), Form("numEta_L%d", i), 200, -2, 2); + numEtaGood[i] = std::make_unique(Form("numEtaGood_L%d", i), Form("numEtaGood_L%d", i), 200, -2, 2); + numEtaFake[i] = std::make_unique(Form("numEtaFake_L%d", i), Form("numEtaFake_L%d", i), 200, -2, 2); + + denRow[i] = std::make_unique(Form("denRow_L%d", i), Form("denRow_L%d", i), 128, -0.5, 511.5); + numRow[i] = std::make_unique(Form("numRow_L%d", i), Form("numRow_L%d", i), 128, -0.5, 511.5); + numRowGood[i] = std::make_unique(Form("numRowGood_L%d", i), Form("numRowGood_L%d", i), 128, -0.5, 511.5); + numRowFake[i] = std::make_unique(Form("numRowFake_L%d", i), Form("numRowFake_L%d", i), 128, -0.5, 511.5); + + denCol[i] = std::make_unique(Form("denCol_L%d", i), Form("denCol_L%d", i), 128, -0.5, 1023.5); + numCol[i] = std::make_unique(Form("numCol_L%d", i), Form("numCol_L%d", i), 128, -0.5, 1023.5); + numColGood[i] = std::make_unique(Form("numColGood_L%d", i), Form("numColGood_L%d", i), 128, -0.5, 1023.5); + numColFake[i] = std::make_unique(Form("numColFake_L%d", i), Form("numColFake_L%d", i), 128, -0.5, 1023.5); + + denZ[i] = std::make_unique(Form("denZ_L%d", i), Form("denZ_L%d", i), 100, -15, 15); + numZ[i] = std::make_unique(Form("numZ_L%d", i), Form("numZ_L%d", i), 100, -15, 15); + numZGood[i] = std::make_unique(Form("numZGood_L%d", i), Form("numZGood_L%d", i), 100, -15, 15); + numZFake[i] = std::make_unique(Form("numZFake_L%d", i), Form("numZFake_L%d", i), 100, -15, 15); + + mDenColEta[i] = std::make_unique(Form("mDenColEta_L%d", i), Form("mDenColEta_L%d", i), 128, -0.5, 1023.5, 50, -1, 1); + mNumColEta[i] = std::make_unique(Form("mNumColEta_L%d", i), Form("mNumColEta_L%d", i), 128, -0.5, 1023.5, 50, -1, 1); + + mDenRowPhi[i] = std::make_unique(Form("mDenRowPhi_L%d", i), Form("mDenRowPhi_L%d", i), 128, -0.5, 511.5, 90, -3.2, 3.2); + mNumRowPhi[i] = std::make_unique(Form("mNumRowPhi_L%d", i), Form("mNumRowPhi_L%d", i), 128, -0.5, 511.5, 90, -3.2, 3.2); + + mDenRowCol[i] = std::make_unique(Form("mDenRowCol_L%d", i), Form("mDenRowCol_L%d", i), 128, -0.5, 511.5, 128, -0.5, 1023.5); + mNumRowCol[i] = std::make_unique(Form("mNumRowCol_L%d", i), Form("mNumRowCol_L%d", i), 128, -0.5, 511.5, 128, -0.5, 1023.5); IPOriginalxy[i] = std::make_unique(Form("IPOriginalxy_L%d", i), Form("IPOriginalxy_L%d", i), 500, -0.002, 0.002); IPOriginalz[i] = std::make_unique(Form("IPOriginalz_L%d", i), Form("IPOriginalz_L%d", i), 200, -10, 10); - IPOriginalifDuplicatedxy[i] = std::make_unique(Form("IPOriginalifDuplicatedxy_L%d", i), Form("IPOriginalifDuplicatedxy_L%d", i), 1000, -0.005, 0.005); - IPOriginalifDuplicatedz[i] = std::make_unique(Form("IPOriginalifDuplicatedz_L%d", i), Form("IPOriginalifDuplicatedz_L%d", i), 200, -10, 10); + + phiFound[i] = std::make_unique(Form("phiFound_L%d", i), Form("phiFound_L%d", i), 190, -3.2, 3.2); + rowFound[i] = std::make_unique(Form("rowFound_L%d", i), Form("rowFound_L%d", i), 128, -0.5, 511.5); + phiNotFound[i] = std::make_unique(Form("phiNotFound_L%d", i), Form("phiNotFound_L%d", i), 90, -3.2, 3.2); + rowNotFound[i] = std::make_unique(Form("rowNotFound_L%d", i), Form("rowNotFound_L%d", i), 128, -0.5, 511.5); + zFound[i] = std::make_unique(Form("zFound_L%d", i), Form("zFound_L%d", i), 100, -15, 15); + zNotFound[i] = std::make_unique(Form("zNotFound%d", i), Form("zNotFound%d", i), 100, -15, 15); + colFoundOriginalVsDuplicated[i] = std::make_unique(Form("colFoundOriginalVsDuplicated_L%d", i), Form("colFoundOriginalVsDuplicated_L%d; Col Original cluster; Col Overlap cluster", i), 9216, -0.5, 9215.5, 9216, -0.5, 9215.5); + colFoundOriginal[i] = std::make_unique(Form("colFoundOriginal_L%d", i), Form("colFoundOriginal_L%d; Col Original cluster;", i), 9216, -0.5, 9215.5); + colNotFound[i] = std::make_unique(Form("colNotFound_L%d", i), Form("colNotFound_L%d", i), 9216, -0.5, 9215.5); + radiusFound[i] = std::make_unique(Form("radiusFound_L%d", i), Form("radiusFound_L%d", i), 80, 0, 6); + radiusNotFound[i] = std::make_unique(Form("radiusNotFound_L%d", i), Form("radiusNotFound_L%d", i), 80, 0, 4); for (int j = 0; j < 3; j++) { mDuplicatedEta[i][j] = std::make_unique(Form("mDuplicatedEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); mNGoodMatchesEta[i][j] = std::make_unique(Form("mNGoodMatchesEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); mNFakeMatchesEta[i][j] = std::make_unique(Form("mNFakeMatchesEta_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #eta; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 40, -2, 2); - mDuplicatedPhi[i][j] = std::make_unique(Form("mDuplicatedPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); - mNGoodMatchesPhi[i][j] = std::make_unique(Form("mNGoodMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); - mNFakeMatchesPhi[i][j] = std::make_unique(Form("mNFakeMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 1440, -180, 180); + mDuplicatedPhi[i][j] = std::make_unique(Form("mDuplicatedPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of duplicated clusters L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); + mNGoodMatchesPhi[i][j] = std::make_unique(Form("mNGoodMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of good matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); + mNFakeMatchesPhi[i][j] = std::make_unique(Form("mNFakeMatchesPhi_L%d_pt%d", i, j), Form("%f < #it{p}_{T} < %f GeV/c; #phi; Number of fake matches L%d", mrangesPt[j][0], mrangesPt[j][1], i), 90, -3.2, 3.2); } } gStyle->SetPalette(55); @@ -508,7 +562,6 @@ void EfficiencyStudy::initialiseRun(o2::globaltracking::RecoContainer& recoData) mTracksMCLabels = recoData.getITSTracksMCLabels(); mClustersMCLCont = recoData.getITSClustersMCLabels(); } - mITSClustersArray.clear(); mTracksROFRecords = recoData.getITSTracksROFRecords(); mTracks = recoData.getITSTracks(); @@ -574,7 +627,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) continue; } - float ip[2]; + float ip[2]; // IP from 0,0,0 and the track should be the deplacement of the primary vertex track.getImpactParams(0, 0, 0, 0, ip); // if (abs(ip[0])>0.001 ) continue; ///pv not in (0,0,0) @@ -587,14 +640,12 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) auto pt = trackParCov.getPt(); auto eta = trackParCov.getEta(); - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } + // if (pt < mPtCuts[0] || pt > mPtCuts[1]) { + // continue; + // } + // if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // continue; + // } float phioriginal = 0; float phiduplicated = 0; @@ -617,8 +668,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phioriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - mPhiTrackoriginalvsphioriginal[layer]->Fill(phiTrack, phioriginal); + phioriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; mPhiOriginal[layer]->Fill(phioriginal); mPtOriginal[layer]->Fill(pt); @@ -645,7 +695,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; // phiduplicated = std::atan2(clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.x()) * 180 / M_PI + 180; - phiduplicated = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phiduplicated = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; auto labsClus = mClustersMCLCont->getLabels(iClus); // ideally I can have more than one label per cluster for (auto labC : labsClus) { @@ -686,8 +736,6 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) mEtaDuplicated[layerClus]->Fill(eta); mPhiDuplicated[layerClus]->Fill(phiduplicated); mZvsPhiDUplicated[layerClus]->Fill(clusDuplicatedPointGlob.Z(), phiduplicated); - mPhiTrackDuplicated[layerClus]->Fill(phiTrack); - mPhiTrackDuplicatedvsphiDuplicated[layerClus]->Fill(phiTrack, phioriginal); mPhiOriginalIfDuplicated[layerClus]->Fill(phioriginal); } @@ -700,6 +748,8 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) } UShort_t rowDuplicated = clusDuplicated.getRow(); mDuplicatedRow[layerDuplicated]->Fill(rowOriginal); + mDuplicatedCol[layerDuplicated]->Fill(clusOriginal.getCol()); + mDuplicatedZ[layerDuplicated]->Fill(clusOriginalPointGlob.Z()); mDuplicatedPt[layerDuplicated]->Fill(pt); mDuplicatedPtEta[layerDuplicated]->Fill(pt, eta); mDuplicatedPtPhi[layerDuplicated]->Fill(pt, phiduplicated); @@ -713,12 +763,6 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) m3DClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); - /// compute the distance between original and dubplicated cluster - mDistanceClustersX[layerClus]->Fill(abs(clusOriginalPointGlob.x() - clusDuplicatedPointGlob.x())); - mDistanceClustersY[layerClus]->Fill(abs(clusOriginalPointGlob.y() - clusDuplicatedPointGlob.y())); - mDistanceClustersZ[layerClus]->Fill(abs(clusOriginalPointGlob.z() - clusDuplicatedPointGlob.z())); - mDistanceClusters[layerClus]->Fill(std::hypot(clusOriginalPointGlob.x() - clusDuplicatedPointGlob.x(), clusOriginalPointGlob.y() - clusDuplicatedPointGlob.y(), clusOriginalPointGlob.z() - clusDuplicatedPointGlob.z())); - /// Compute the DCA between the cluster location and the track /// first propagate to the original cluster @@ -815,7 +859,6 @@ void EfficiencyStudy::countDuplicatedAfterCuts() rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - // std::cout<<"Track number: "< mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // applying the cuts on the track - only eta + + if (eta < mEtaCuts[0] || eta >= mEtaCuts[1]) { continue; } @@ -860,38 +900,22 @@ void EfficiencyStudy::countDuplicatedAfterCuts() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - - if (layerOriginal == 0) { - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - possibleduplicated[0]++; - keepTrack = true; - } - } + if (abs(clusOriginalPointGlob.y()) < 0.5) { ///// excluding gap between bottom and top barrels + continue; } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - possibleduplicated[1]++; - keepTrack = true; - } - } + + if (abs(clusOriginalPointGlob.z()) >= 10) { /// excluding external z + continue; } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - possibleduplicated[2]++; - keepTrack = true; - } - } + + if (clusOriginal.getRow() < 2 || (clusOriginal.getRow() > 15 && clusOriginal.getRow() < 496) || clusOriginal.getRow() > 509) { //// cutting on the row + continue; } - if (!keepTrack) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one + if (clusOriginal.getCol() < 160 || clusOriginal.getCol() > 870) { /// excluding the gap between two chips in the same stave (comment to obtain the plot efficiency col vs eta) + continue; } for (auto& labT : labsTrack) { // for each valid label iterate over ALL the clusters in the ROF to see if there are duplicates @@ -912,7 +936,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; auto labsClus = mClustersMCLCont->getLabels(iClus); // ideally I can have more than one label per cluster for (auto labC : labsClus) { @@ -940,7 +964,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() } duplicated[layer]++; - std::cout << "Taken L" << layer << " # " << duplicated[layer] << " : pt, eta, phi = " << pt << " , " << eta << " , " << phiOriginal << " Label: " << std::endl; + std::cout << "Taken L" << layer << " # " << duplicated[layer] << " : eta, phi = " << eta << " , " << phiOriginal << " Label: " << std::endl; labC.print(); } } @@ -1035,15 +1059,6 @@ void EfficiencyStudy::studyDCAcutsMC() float ip[2]; track.getImpactParams(0, 0, 0, 0, ip); - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - float phi = -999.; float phiOriginal = -999.; int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track @@ -1077,7 +1092,7 @@ void EfficiencyStudy::studyDCAcutsMC() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; for (auto& labT : labsOriginal) { // for each valid label iterate over ALL the clusters in the ROF to see if there are duplicates if (labT != tracklab) { @@ -1114,7 +1129,7 @@ void EfficiencyStudy::studyDCAcutsMC() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; /// Compute the DCA between the duplicated cluster location and the track trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); @@ -1150,7 +1165,6 @@ void EfficiencyStudy::studyDCAcutsMC() mnGoodMatchesPt_layer[layerDuplicated]->Fill(pt, i); mnGoodMatchesEta_layer[layerDuplicated]->Fill(eta, i); mnGoodMatchesPhi_layer[layerDuplicated]->Fill(phi, i); - mnGoodMatchesPhiTrack_layer[layerDuplicated]->Fill(phiTrack, i); mnGoodMatchesPhiOriginal_layer[layerDuplicated]->Fill(phiOriginal, i); } else { @@ -1159,7 +1173,6 @@ void EfficiencyStudy::studyDCAcutsMC() mnFakeMatchesPt_layer[layerDuplicated]->Fill(pt, i); mnFakeMatchesEta_layer[layerDuplicated]->Fill(eta, i); mnFakeMatchesPhi_layer[layerDuplicated]->Fill(phi, i); - mnFakeMatchesPhiTrack_layer[layerDuplicated]->Fill(phiTrack, i); } } else if (mVerboseOutput) { LOGP(info, "Check DCA failed"); @@ -1211,13 +1224,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyGoodMatchPhiOriginal_layer[l]->SetBinContent(iphi + 1, i + 1, mnGoodMatchesPhiOriginal_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiOriginalIfDuplicated[l]->GetBinContent(iphi + 1)); } } - - for (int iphi = 0; iphi < mPhiTrackDuplicated[l]->GetNbinsX(); iphi++) { - if (mPhiTrackDuplicated[l]->GetBinContent(iphi + 1) != 0) { - mEfficiencyGoodMatchPhiTrack_layer[l]->SetBinContent(iphi + 1, i + 1, mnGoodMatchesPhiTrack_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiTrackDuplicated[l]->GetBinContent(iphi + 1)); - } - mEfficiencyFakeMatchPhiTrack_layer[l]->SetBinContent(iphi + 1, i + 1, mnFakeMatchesPhiTrack_layer[l]->GetBinContent(iphi + 1, i + 1) / mPhiTrackDuplicated[l]->GetBinContent(iphi + 1)); - } } } for (int i = 0; i < NLAYERS; i++) { @@ -1243,8 +1249,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyGoodMatchEta_layer[l]->Write(); mEfficiencyGoodMatchPhi_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyGoodMatchPhi_layer[l]->Write(); - mEfficiencyGoodMatchPhiTrack_layer[l]->GetZaxis()->SetRangeUser(0, 1); - mEfficiencyGoodMatchPhiTrack_layer[l]->Write(); mEfficiencyGoodMatchPhiOriginal_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyGoodMatchPhiOriginal_layer[l]->Write(); mEfficiencyFakeMatchPt_layer[l]->GetZaxis()->SetRangeUser(0, 1); @@ -1253,8 +1257,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyFakeMatchEta_layer[l]->Write(); mEfficiencyFakeMatchPhi_layer[l]->GetZaxis()->SetRangeUser(0, 1); mEfficiencyFakeMatchPhi_layer[l]->Write(); - mEfficiencyFakeMatchPhiTrack_layer[l]->GetZaxis()->SetRangeUser(0, 1); - mEfficiencyFakeMatchPhiTrack_layer[l]->Write(); } mOutFile->mkdir("Efficiency/"); @@ -1289,7 +1291,6 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyTotal->Draw("same P l E1_NOSTAT PLC PMC"); leg->Draw("same"); c.Write(); - c.SaveAs("prova.png"); TCanvas cc[NLAYERS]; for (int l = 0; l < NLAYERS; l++) { @@ -1312,12 +1313,12 @@ void EfficiencyStudy::studyDCAcutsMC() mEfficiencyTotal_layer[l]->Draw("same P l E1_NOSTAT"); leg->Draw("same"); cc[l].Write(); - cc[l].SaveAs(Form("provaLayer%d.png", l)); } } void EfficiencyStudy::studyClusterSelectionMC() { + //// to be used only with MC // study to find a good selection method for the duplicated cluster, to be used for non-MC data // iterate over tracks an associated clusters, and find the closer cluster that is not the original one applying cuts on staveID and chipID // fix the DCA < 10 sigma, then compute the efficiency for each bin of pt, eta and phi and also in the rows @@ -1392,14 +1393,6 @@ void EfficiencyStudy::studyClusterSelectionMC() auto pt = trackParCov.getPt(); auto eta = trackParCov.getEta(); - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - - // auto phi = trackParCov.getPhi()*180/M_PI; float phi = -999.; float phiOriginal = -999.; float phiDuplicated = -999.; @@ -1410,7 +1403,6 @@ void EfficiencyStudy::studyClusterSelectionMC() tracklab.print(); } for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track to extract layer, stave and chip to restrict the possible matches to be searched with the DCA cut - // LOGP(info, "New cluster"); auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); if (layerOriginal >= NLAYERS) { @@ -1426,7 +1418,7 @@ void EfficiencyStudy::studyClusterSelectionMC() o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - auto phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; + auto phiOriginal = clusOriginalPointGlob.phi(); // * 180 / M_PI; auto labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); @@ -1464,7 +1456,7 @@ void EfficiencyStudy::studyClusterSelectionMC() o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - auto phiDuplicated = clusDuplicatedPointGlob.phi() * 180 / M_PI; + auto phiDuplicated = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; /// Compute the DCA between the duplicated cluster location and the track trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); @@ -1473,7 +1465,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // Imposing that the distance between the original cluster and the duplicated one is less than x sigma - if (!(abs(meanDCAxyDuplicated[layerDuplicated] - clusDuplicatedDCA[0]) < 8 * sigmaDCAxyDuplicated[layerDuplicated] && abs(meanDCAzDuplicated[layerDuplicated] - clusDuplicatedDCA[1]) < 8 * sigmaDCAzDuplicated[layerDuplicated])) { + if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { continue; } @@ -1503,12 +1495,11 @@ void EfficiencyStudy::studyClusterSelectionMC() for (auto lab : std::get<2>(clusID_rDCA_label)) { if (lab == tracklab) { isGood = true; - diffPhivsPt[layerOriginal]->Fill(pt, abs(phi - phiOriginal)); - IPOriginalifDuplicatedxy[layerOriginal]->Fill(ip[0]); - IPOriginalifDuplicatedz[layerOriginal]->Fill(ip[1]); mNGoodMatchesPt[layerOriginal]->Fill(pt); mNGoodMatchesRow[layerOriginal]->Fill(row); + mNGoodMatchesCol[layerOriginal]->Fill(clusOriginal.getCol()); + mNGoodMatchesZ[layerOriginal]->Fill(clusOriginalPointGlob.Z()); mNGoodMatchesPtEta[layerOriginal]->Fill(pt, eta); mNGoodMatchesPtPhi[layerOriginal]->Fill(pt, phi); mNGoodMatchesEtaPhi[layerOriginal]->Fill(eta, phi); @@ -1529,6 +1520,8 @@ void EfficiencyStudy::studyClusterSelectionMC() mNFakeMatchesPt[layerOriginal]->Fill(pt); mNFakeMatchesRow[layerOriginal]->Fill(row); + mNFakeMatchesCol[layerOriginal]->Fill(clusOriginal.getCol()); + mNFakeMatchesZ[layerOriginal]->Fill(clusOriginalPointGlob.Z()); mNFakeMatchesPtEta[layerOriginal]->Fill(pt, eta); mNFakeMatchesPtPhi[layerOriginal]->Fill(pt, phi); mNFakeMatchesEtaPhi[layerOriginal]->Fill(eta, phi); @@ -1549,33 +1542,37 @@ void EfficiencyStudy::studyClusterSelectionMC() mOutFile->mkdir("EfficiencyCuts/"); mOutFile->cd("EfficiencyCuts/"); - std::cout << "------Calculatin efficiency..." << std::endl; - TH1D* axpt = new TH1D("axpt", "", 1, 0.05, 7.5); - TH1D* axRow = new TH1D("axRow", "", 1, -0.5, 511.5); - TH2D* axptetaGood = new TH2D("axptetaGood", "", 1, 0.05, 7.5, 1, -2, 2); - TH2D* axptetaFake = new TH2D("axptetaFake", "", 1, 0.05, 7.5, 1, -2, 2); - TH2D* axptphiGood = new TH2D("axptphiGood", "", 1, 0.05, 7.5, 1, -180, 180); - TH2D* axptphiFake = new TH2D("axptphiFake", "", 1, 0.05, 7.5, 1, -180, 180); - TH2D* axetaphiGood = new TH2D("axetaphiGood", "", 1, -2, 2, 1, -180, 180); - TH2D* axetaphiFake = new TH2D("axetaphiFake", "", 1, -2, 2, 1, -180, 180); - TH1D* axetaAllPt = new TH1D("axetaAllPt", "", 1, -2, 2); - TH1D* axeta[NLAYERS]; - TH1D* axphi[NLAYERS]; + std::cout << "Calculating efficiency..." << std::endl; + std::unique_ptr axpt = std::make_unique("axpt", "", 1, 0.05, 7.5); + std::unique_ptr axRow = std::make_unique("axRow", "", 1, -0.5, 511.5); + std::unique_ptr axCol = std::make_unique("axRow", "", 1, -0.5, 1023.5); + std::unique_ptr axZ = std::make_unique("axZ", "", 1, -15, 15); + std::unique_ptr axptetaGood = std::make_unique("axptetaGood", "", 1, 0.05, 7.5, 1, -2, 2); + std::unique_ptr axptetaFake = std::make_unique("axptetaFake", "", 1, 0.05, 7.5, 1, -2, 2); + std::unique_ptr axptphiGood = std::make_unique("axptphiGood", "", 1, 0.05, 7.5, 1, -3.2, 3.2); + std::unique_ptr axptphiFake = std::make_unique("axptphiFake", "", 1, 0.05, 7.5, 1, -3.2, 3.2); + std::unique_ptr axetaphiGood = std::make_unique("axetaphiGood", "", 1, -2, 2, 1, -3.2, 3.2); + std::unique_ptr axetaphiFake = std::make_unique("axetaphiFake", "", 1, -2, 2, 1, -3.2, 3.2); + std::unique_ptr axetaAllPt = std::make_unique("axetaAllPt", "", 1, -2, 2); + std::unique_ptr axeta[NLAYERS]; + std::unique_ptr axphi[NLAYERS]; for (int ipt = 0; ipt < 3; ipt++) { - axeta[ipt] = new TH1D(Form("axeta%d", ipt), Form("axeta%d", ipt), 1, -2, 2); - axphi[ipt] = new TH1D(Form("axphi%d", ipt), Form("axphi%d", ipt), 1, -180, 180); + axeta[ipt] = std::make_unique(Form("axeta%d", ipt), Form("axeta%d", ipt), 1, -2, 2); + axphi[ipt] = std::make_unique(Form("axphi%d", ipt), Form("axphi%d", ipt), 1, -3.2, 3.2); } - TH1D* axphiAllPt = new TH1D("axphi", "", 1, -180, 180); - - TCanvas* effPt[NLAYERS]; - TCanvas* effRow[NLAYERS]; - TCanvas* effPtEta[NLAYERS][2]; - TCanvas* effPtPhi[NLAYERS][2]; - TCanvas* effEtaPhi[NLAYERS][2]; - TCanvas* effEtaAllPt[NLAYERS]; - TCanvas* effEta[NLAYERS][3]; - TCanvas* effPhiAllPt[NLAYERS]; - TCanvas* effPhi[NLAYERS][3]; + std::unique_ptr axphiAllPt = std::make_unique("axphi", "", 1, -3.2, 3.2); + + std::unique_ptr effPt[NLAYERS]; + std::unique_ptr effRow[NLAYERS]; + std::unique_ptr effCol[NLAYERS]; + std::unique_ptr effZ[NLAYERS]; + std::unique_ptr effPtEta[NLAYERS][2]; + std::unique_ptr effPtPhi[NLAYERS][2]; + std::unique_ptr effEtaPhi[NLAYERS][2]; + std::unique_ptr effEtaAllPt[NLAYERS]; + std::unique_ptr effEta[NLAYERS][3]; + std::unique_ptr effPhiAllPt[NLAYERS]; + std::unique_ptr effPhi[NLAYERS][3]; ///////////////// plotting results for (int l = 0; l < 3; l++) { @@ -1583,12 +1580,8 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Pt L" << l << "\n\n"; } - diffPhivsPt[l]->Write(); - IPOriginalifDuplicatedxy[l]->Write(); - IPOriginalifDuplicatedz[l]->Write(); - // Pt - effPt[l] = new TCanvas(Form("effPt_L%d", l)); + effPt[l] = std::make_unique(Form("effPt_L%d", l)); mEffPtGood[l] = std::make_unique(*mNGoodMatchesPt[l], *mDuplicatedPt[l]); stileEfficiencyGraph(mEffPtGood[l], Form("mEffPtGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1616,7 +1609,7 @@ void EfficiencyStudy::studyClusterSelectionMC() effPt[l]->Write(); // PtEtaGood - effPtEta[l][0] = new TCanvas(Form("effPtEtaGood_L%d", l)); + effPtEta[l][0] = std::make_unique(Form("effPtEtaGood_L%d", l)); mEffPtEtaGood[l] = std::make_unique(*mNGoodMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaGood[l], Form("mEffPtEtaGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1647,7 +1640,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // Row - effRow[l] = new TCanvas(Form("effRow_L%d", l)); + effRow[l] = std::make_unique(Form("effRow_L%d", l)); for (int ibin = 1; ibin <= mNGoodMatchesRow[l]->GetNbinsX(); ibin++) { std::cout << "--- Good Row: Npass = " << mNGoodMatchesRow[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedRow[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; @@ -1667,7 +1660,7 @@ void EfficiencyStudy::studyClusterSelectionMC() axRow->SetTitle(Form("L%d;Row;Efficiency", l)); axRow->GetYaxis()->SetRangeUser(-0.1, 1.1); - axRow->GetXaxis()->SetRangeUser(0.05, 7.5); + axRow->GetXaxis()->SetRangeUser(0, 512); axRow->Draw(); mEffRowGood[l]->Draw("same p"); mEffRowFake[l]->Draw("same p"); @@ -1678,8 +1671,72 @@ void EfficiencyStudy::studyClusterSelectionMC() legRow->Draw("same"); effRow[l]->Write(); + // Col + effCol[l] = std::make_unique(Form("effCol_L%d", l)); + + for (int ibin = 1; ibin <= mNGoodMatchesCol[l]->GetNbinsX(); ibin++) { + std::cout << "--- Good Col: Npass = " << mNGoodMatchesCol[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedCol[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + } + + mEffColGood[l] = std::make_unique(*mNGoodMatchesCol[l], *mDuplicatedCol[l]); + stileEfficiencyGraph(mEffColGood[l], Form("mEffColGood_L%d", l), Form("L%d;Col;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + + for (int ibin = 1; ibin <= mNFakeMatchesCol[l]->GetNbinsX(); ibin++) { + if (mNFakeMatchesCol[l]->GetBinContent(ibin) > mDuplicatedCol[l]->GetBinContent(ibin)) { + std::cout << "--- Col: Npass = " << mNFakeMatchesCol[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedCol[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + mNFakeMatchesCol[l]->SetBinContent(ibin, mDuplicatedCol[l]->GetBinContent(ibin)); + } + } + mEffColFake[l] = std::make_unique(*mNFakeMatchesCol[l], *mDuplicatedCol[l]); + stileEfficiencyGraph(mEffColFake[l], Form("mEffColFake_L%d", l), Form("L%d;Col;Efficiency", l), false, kFullDiamond, 1, kRed + 1, kRed + 1); + + axCol->SetTitle(Form("L%d;Col;Efficiency", l)); + axCol->GetYaxis()->SetRangeUser(-0.1, 1.1); + axCol->GetXaxis()->SetRangeUser(0, 1024); + axCol->Draw(); + mEffColGood[l]->Draw("same p"); + mEffColFake[l]->Draw("same p"); + + auto legCol = std::make_unique(0.70, 0.15, 0.89, 0.35); + legCol->AddEntry(mEffColGood[l].get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legCol->AddEntry(mEffColFake[l].get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legCol->Draw("same"); + effCol[l]->Write(); + + // Z + effZ[l] = std::make_unique(Form("effZ_L%d", l)); + + for (int ibin = 1; ibin <= mNGoodMatchesZ[l]->GetNbinsX(); ibin++) { + std::cout << "--- Good Z: Npass = " << mNGoodMatchesZ[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedZ[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + } + + mEffZGood[l] = std::make_unique(*mNGoodMatchesZ[l], *mDuplicatedZ[l]); + stileEfficiencyGraph(mEffZGood[l], Form("mEffZGood_L%d", l), Form("L%d;Z;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + + for (int ibin = 1; ibin <= mNFakeMatchesZ[l]->GetNbinsX(); ibin++) { + if (mNFakeMatchesZ[l]->GetBinContent(ibin) > mDuplicatedZ[l]->GetBinContent(ibin)) { + std::cout << "--- Z: Npass = " << mNFakeMatchesZ[l]->GetBinContent(ibin) << ", Nall = " << mDuplicatedZ[l]->GetBinContent(ibin) << " for ibin = " << ibin << std::endl; + mNFakeMatchesZ[l]->SetBinContent(ibin, mDuplicatedZ[l]->GetBinContent(ibin)); + } + } + mEffZFake[l] = std::make_unique(*mNFakeMatchesZ[l], *mDuplicatedZ[l]); + stileEfficiencyGraph(mEffZFake[l], Form("mEffZFake_L%d", l), Form("L%d;Z;Efficiency", l), false, kFullDiamond, 1, kRed + 1, kRed + 1); + + axZ->SetTitle(Form("L%d;Z;Efficiency", l)); + axZ->GetYaxis()->SetRangeUser(-0.1, 1.1); + axZ->GetXaxis()->SetRangeUser(0, 512); + axZ->Draw(); + mEffZGood[l]->Draw("same p"); + mEffZFake[l]->Draw("same p"); + + auto legZ = std::make_unique(0.70, 0.15, 0.89, 0.35); + legZ->AddEntry(mEffZGood[l].get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legZ->AddEntry(mEffZFake[l].get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legZ->Draw("same"); + effZ[l]->Write(); + // PtEtaGood - effPtEta[l][0] = new TCanvas(Form("effPtEtaGood_L%d", l)); + effPtEta[l][0] = std::make_unique(Form("effPtEtaGood_L%d", l)); mEffPtEtaGood[l] = std::make_unique(*mNGoodMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaGood[l], Form("mEffPtEtaGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1710,7 +1767,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } // PtEtaFake - effPtEta[l][1] = new TCanvas(Form("effPtEtaFake_L%d", l)); + effPtEta[l][1] = std::make_unique(Form("effPtEtaFake_L%d", l)); mEffPtEtaFake[l] = std::make_unique(*mNFakeMatchesPtEta[l], *mDuplicatedPtEta[l]); stileEfficiencyGraph(mEffPtEtaFake[l], Form("mEffPtEtaFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#eta;Efficiency", l), true); @@ -1724,14 +1781,14 @@ void EfficiencyStudy::studyClusterSelectionMC() effPtEta[l][1]->Write(); // PtPhiGood - effPtPhi[l][0] = new TCanvas(Form("effPtPhiGood_L%d", l)); + effPtPhi[l][0] = std::make_unique(Form("effPtPhiGood_L%d", l)); mEffPtPhiGood[l] = std::make_unique(*mNGoodMatchesPtPhi[l], *mDuplicatedPtPhi[l]); - stileEfficiencyGraph(mEffPtPhiGood[l], Form("mEffPtPhiGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l), true); + stileEfficiencyGraph(mEffPtPhiGood[l], Form("mEffPtPhiGood_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l), true); - axptphiGood->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l)); + axptphiGood->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l)); axptphiGood->GetZaxis()->SetRangeUser(-0.1, 1.1); - axptphiGood->GetYaxis()->SetRangeUser(-180, 180); + axptphiGood->GetYaxis()->SetRangeUser(-3.2, 3.2); axptphiGood->GetXaxis()->SetRangeUser(0.05, 7.5); axptphiGood->Draw(); mEffPtPhiGood[l]->Draw("same colz"); @@ -1750,13 +1807,13 @@ void EfficiencyStudy::studyClusterSelectionMC() } // PtPhiFake - effPtPhi[l][1] = new TCanvas(Form("effPtPhiFake_L%d", l)); + effPtPhi[l][1] = std::make_unique(Form("effPtPhiFake_L%d", l)); mEffPtPhiFake[l] = std::make_unique(*mNFakeMatchesPtPhi[l], *mDuplicatedPtPhi[l]); - stileEfficiencyGraph(mEffPtPhiFake[l], Form("mEffPtPhiFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l), true); - axptphiFake->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (deg);Efficiency", l)); + stileEfficiencyGraph(mEffPtPhiFake[l], Form("mEffPtPhiFake_L%d", l), Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l), true); + axptphiFake->SetTitle(Form("L%d;#it{p}_{T} (GeV/#it{c});#phi (rad);Efficiency", l)); axptphiFake->GetZaxis()->SetRangeUser(-0.1, 1.1); - axptphiFake->GetYaxis()->SetRangeUser(-180, 180); + axptphiFake->GetYaxis()->SetRangeUser(-3.2, 3.2); axptphiFake->GetXaxis()->SetRangeUser(0.05, 7.5); axptphiFake->Draw(); mEffPtPhiFake[l]->Draw("same colz"); @@ -1764,14 +1821,14 @@ void EfficiencyStudy::studyClusterSelectionMC() effPtPhi[l][1]->Write(); // EtaPhiGood - effEtaPhi[l][0] = new TCanvas(Form("effEtaPhiGood_L%d", l)); + effEtaPhi[l][0] = std::make_unique(Form("effEtaPhiGood_L%d", l)); mEffEtaPhiGood[l] = std::make_unique(*mNGoodMatchesEtaPhi[l], *mDuplicatedEtaPhi[l]); - stileEfficiencyGraph(mEffEtaPhiGood[l], Form("mEffEtaPhiGood_L%d", l), Form("L%d;#eta;#phi (deg);Efficiency", l), true); + stileEfficiencyGraph(mEffEtaPhiGood[l], Form("mEffEtaPhiGood_L%d", l), Form("L%d;#eta;#phi (rad);Efficiency", l), true); - axetaphiGood->SetTitle(Form("L%d;#eta;#phi (deg);Efficiency", l)); + axetaphiGood->SetTitle(Form("L%d;#eta;#phi (rad);Efficiency", l)); axetaphiGood->GetZaxis()->SetRangeUser(-0.1, 1.1); - axetaphiGood->GetYaxis()->SetRangeUser(-180, 180); + axetaphiGood->GetYaxis()->SetRangeUser(-3.2, 3.2); axetaphiGood->GetXaxis()->SetRangeUser(-2, 2); axetaphiGood->Draw(); mEffEtaPhiGood[l]->Draw("same colz"); @@ -1790,13 +1847,13 @@ void EfficiencyStudy::studyClusterSelectionMC() } // EtaPhiFake - effEtaPhi[l][1] = new TCanvas(Form("effEtaPhiFake_L%d", l)); + effEtaPhi[l][1] = std::make_unique(Form("effEtaPhiFake_L%d", l)); mEffEtaPhiFake[l] = std::make_unique(*mNFakeMatchesEtaPhi[l], *mDuplicatedEtaPhi[l]); - stileEfficiencyGraph(mEffEtaPhiFake[l], Form("mEffEtaPhiFake_L%d", l), Form("L%d;#eta;#phi (deg);Efficiency", l), true); - axetaphiFake->SetTitle(Form("L%d;#eta;#phi (deg);Efficiency", l)); + stileEfficiencyGraph(mEffEtaPhiFake[l], Form("mEffEtaPhiFake_L%d", l), Form("L%d;#eta;#phi (rad);Efficiency", l), true); + axetaphiFake->SetTitle(Form("L%d;#eta;#phi (rad);Efficiency", l)); axetaphiFake->GetZaxis()->SetRangeUser(-0.1, 1.1); - axetaphiFake->GetYaxis()->SetRangeUser(-180, 180); + axetaphiFake->GetYaxis()->SetRangeUser(-3.2, 3.2); axetaphiFake->GetXaxis()->SetRangeUser(-2, 2); axetaphiFake->Draw(); mEffEtaPhiFake[l]->Draw("same colz"); @@ -1808,7 +1865,7 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Eta L" << l << "\n\n"; } - effEtaAllPt[l] = new TCanvas(Form("effEtaAllPt_L%d", l)); + effEtaAllPt[l] = std::make_unique(Form("effEtaAllPt_L%d", l)); mEffEtaGoodAllPt[l] = std::make_unique(*mNGoodMatchesEtaAllPt[l], *mDuplicatedEtaAllPt[l]); stileEfficiencyGraph(mEffEtaGoodAllPt[l], Form("mEffEtaGoodAllPt_L%d", l), Form("L%d;#eta;Efficiency", l), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1840,7 +1897,7 @@ void EfficiencyStudy::studyClusterSelectionMC() /// eta and phi in different pt ranges for (int ipt = 0; ipt < 3; ipt++) { // eta - effEta[l][ipt] = new TCanvas(Form("effEta_L%d_pt%d", l, ipt)); + effEta[l][ipt] = std::make_unique(Form("effEta_L%d_pt%d", l, ipt)); mEffEtaGood[l][ipt] = std::make_unique(*mNGoodMatchesEta[l][ipt], *mDuplicatedEta[l][ipt]); stileEfficiencyGraph(mEffEtaGood[l][ipt], Form("mEffEtaGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#eta;Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); @@ -1871,7 +1928,7 @@ void EfficiencyStudy::studyClusterSelectionMC() effEta[l][ipt]->Write(); // phi - effPhi[l][ipt] = new TCanvas(Form("effPhi_L%d_pt%d", l, ipt)); + effPhi[l][ipt] = std::make_unique(Form("effPhi_L%d_pt%d", l, ipt)); for (int ibin = 1; ibin <= mNGoodMatchesPhi[l][ipt]->GetNbinsX(); ibin++) { if (mNGoodMatchesPhi[l][ipt]->GetBinContent(ibin) > mDuplicatedPhi[l][ipt]->GetBinContent(ibin)) { @@ -1883,7 +1940,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } mEffPhiGood[l][ipt] = std::make_unique(*mNGoodMatchesPhi[l][ipt], *mDuplicatedPhi[l][ipt]); - stileEfficiencyGraph(mEffPhiGood[l][ipt], Form("mEffPhiGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + stileEfficiencyGraph(mEffPhiGood[l][ipt], Form("mEffPhiGood_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kGreen + 3, kGreen + 3); for (int ibin = 1; ibin <= mNFakeMatchesPhi[l][ipt]->GetNbinsX(); ibin++) { if (mNFakeMatchesPhi[l][ipt]->GetBinContent(ibin) > mDuplicatedPhi[l][ipt]->GetBinContent(ibin)) { @@ -1895,9 +1952,9 @@ void EfficiencyStudy::studyClusterSelectionMC() } mEffPhiFake[l][ipt] = std::make_unique(*mNFakeMatchesPhi[l][ipt], *mDuplicatedPhi[l][ipt]); - stileEfficiencyGraph(mEffPhiFake[l][ipt], Form("mEffPhiFake_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kRed + 1, kRed + 1); + stileEfficiencyGraph(mEffPhiFake[l][ipt], Form("mEffPhiFake_L%d_pt%d", l, ipt), Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1]), false, kFullDiamond, 1, kRed + 1, kRed + 1); - axphi[ipt]->SetTitle(Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (deg);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1])); + axphi[ipt]->SetTitle(Form("L%d %.1f #leq #it{p}_{T} < %.1f GeV/#it{c};#phi (rad);Efficiency", l, mrangesPt[ipt][0], mrangesPt[ipt][1])); axphi[ipt]->GetYaxis()->SetRangeUser(-0.1, 1.1); axphi[ipt]->Draw(); @@ -1916,7 +1973,7 @@ void EfficiencyStudy::studyClusterSelectionMC() std::cout << "Phi L" << l << "\n\n"; } - effPhiAllPt[l] = new TCanvas(Form("effPhiAllPt_L%d", l)); + effPhiAllPt[l] = std::make_unique(Form("effPhiAllPt_L%d", l)); for (int ibin = 1; ibin <= mNGoodMatchesPhiAllPt[l]->GetNbinsX(); ibin++) { if (mNGoodMatchesPhiAllPt[l]->GetBinContent(ibin) > mDuplicatedPhiAllPt[l]->GetBinContent(ibin)) { @@ -1953,85 +2010,170 @@ void EfficiencyStudy::studyClusterSelectionMC() legPhi->Draw("same"); effPhiAllPt[l]->Write(); } -} - -void EfficiencyStudy::saveDataInfo() -{ - // save histograms for data (phi, eta, pt,...) - LOGP(info, "--------------- saveDataInfo"); - - unsigned int rofIndexTrack = 0; - unsigned int rofNEntriesTrack = 0; - unsigned int rofIndexClus = 0; - unsigned int rofNEntriesClus = 0; - unsigned int totClus = 0; - - for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array - rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); - rofNEntriesTrack = mTracksROFRecords[iROF].getNEntries(); - - rofIndexClus = mClustersROFRecords[iROF].getFirstEntry(); - rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); - - for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - auto track = mTracks[iTrack]; - o2::track::TrackParCov trackParCov = mTracks[iTrack]; - int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track - int ncl = track.getNumberOfClusters(); // get the number of clusters of the track - - if (ncl < 7) { - continue; - } - float ip[2]; - track.getImpactParams(0, 0, 0, 0, ip); - - auto pt = trackParCov.getPt(); - auto eta = trackParCov.getEta(); - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - // if (pt < mPtCuts[0] || pt > mPtCuts[1]) continue; - // if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) continue; - float phioriginal = 0; - float phiduplicated = 0; - - for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track - auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; - auto clusOriginalPoint = mITSClustersArray[mInputITSidxs[iclTrack]]; // cluster spacepoint in the tracking system - auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); - auto chipOriginal = mGeometry->getChipIdInStave(clusOriginal.getSensorID()); - - auto layer = mGeometry->getLayer(clusOriginal.getSensorID()); - if (layer >= NLAYERS) { - continue; // checking only selected layers - } - - o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; - o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - - phioriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mPhiOriginal[layer]->Fill(phioriginal); - mPhiTrackOriginal[layer]->Fill(phiTrack); - mPtOriginal[layer]->Fill(pt); - mEtaOriginal[layer]->Fill(eta); - m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); - m2DClusterOriginalPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); - } // end loop on clusters - totClus += ncl; - } // end loop on tracks per ROF - } // end loop on ROFRecords array - LOGP(info, "Total number of clusters: {} ", totClus); + /// all Row + std::unique_ptr effRowAll = std::make_unique("effRowAll"); + auto numRowGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesRow[0]->Clone("numRowGoodAll")); + numRowGoodAll->Add(mNGoodMatchesRow[1].get()); + numRowGoodAll->Add(mNGoodMatchesRow[2].get()); + numRowGoodAll->Write(); + auto numRowFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesRow[0]->Clone("numRowFakeAll")); + numRowFakeAll->Add(mNFakeMatchesRow[1].get()); + numRowFakeAll->Add(mNFakeMatchesRow[2].get()); + numRowFakeAll->Write(); + auto denRowAll = std::unique_ptr((TH1D*)mDuplicatedRow[0]->Clone("denRowAll")); + denRowAll->Add(mDuplicatedRow[1].get()); + denRowAll->Add(mDuplicatedRow[2].get()); + denRowAll->Write(); + + std::unique_ptr mEffRowGoodAll = std::make_unique(*numRowGoodAll, *denRowAll); + stileEfficiencyGraph(mEffRowGoodAll, "mEffRowGoodAll", "L0 + L1 + L2;Row;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffRowFakeAll = std::make_unique(*numRowFakeAll, *denRowAll); + stileEfficiencyGraph(mEffRowFakeAll, "mEffRowFakeAll", "L0 + L1 + L2;Row;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axRow->SetTitle("L0 + L1 + L2;Row;Efficiency"); + axRow->GetYaxis()->SetRangeUser(-0.1, 1.1); + axRow->Draw(); + mEffRowGoodAll->Draw("same p"); + mEffRowFakeAll->Draw("same p"); + + auto legRow = std::make_unique(0.70, 0.15, 0.89, 0.35); + legRow->AddEntry(mEffRowGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legRow->AddEntry(mEffRowFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legRow->Draw("same"); + effRowAll->Write(); + + /// all Col + std::unique_ptr effColAll = std::make_unique("effColAll"); + auto numColGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesCol[0]->Clone("numColGoodAll")); + numColGoodAll->Add(mNGoodMatchesCol[1].get()); + numColGoodAll->Add(mNGoodMatchesCol[2].get()); + numColGoodAll->Write(); + auto numColFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesCol[0]->Clone("numColFakeAll")); + numColFakeAll->Add(mNFakeMatchesCol[1].get()); + numColFakeAll->Add(mNFakeMatchesCol[2].get()); + numColFakeAll->Write(); + auto denColAll = std::unique_ptr((TH1D*)mDuplicatedCol[0]->Clone("denColAll")); + denColAll->Add(mDuplicatedCol[1].get()); + denColAll->Add(mDuplicatedCol[2].get()); + denColAll->Write(); + + std::unique_ptr mEffColGoodAll = std::make_unique(*numColGoodAll, *denColAll); + stileEfficiencyGraph(mEffColGoodAll, "mEffColGoodAll", "L0 + L1 + L2;Column;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffColFakeAll = std::make_unique(*numColFakeAll, *denColAll); + stileEfficiencyGraph(mEffColFakeAll, "mEffColFakeAll", "L0 + L1 + L2;Column;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axCol->SetTitle("L0 + L1 + L2;Col;Efficiency"); + axCol->GetYaxis()->SetRangeUser(-0.1, 1.1); + axCol->Draw(); + mEffColGoodAll->Draw("same p"); + mEffColFakeAll->Draw("same p"); + + auto legCol = std::make_unique(0.70, 0.15, 0.89, 0.35); + legCol->AddEntry(mEffColGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legCol->AddEntry(mEffColFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legCol->Draw("same"); + effColAll->Write(); + + /// all Z + std::unique_ptr effZAll = std::make_unique("effZAll"); + auto numZGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesZ[0]->Clone("numZGoodAll")); + numZGoodAll->Add(mNGoodMatchesZ[1].get()); + numZGoodAll->Add(mNGoodMatchesZ[2].get()); + numZGoodAll->Write(); + auto numZFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesZ[0]->Clone("numZFakeAll")); + numZFakeAll->Add(mNFakeMatchesZ[1].get()); + numZFakeAll->Add(mNFakeMatchesZ[2].get()); + numZFakeAll->Write(); + auto denZAll = std::unique_ptr((TH1D*)mDuplicatedZ[0]->Clone("denZAll")); + denZAll->Add(mDuplicatedZ[1].get()); + denZAll->Add(mDuplicatedZ[2].get()); + denZAll->Write(); + + std::unique_ptr mEffZGoodAll = std::make_unique(*numZGoodAll, *denZAll); + stileEfficiencyGraph(mEffZGoodAll, "mEffZGoodAll", "L0 + L1 + L2;Z;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffZFakeAll = std::make_unique(*numZFakeAll, *denZAll); + stileEfficiencyGraph(mEffZFakeAll, "mEffZFakeAll", "L0 + L1 + L2;Z;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axZ->SetTitle("L0 + L1 + L2;Z;Efficiency"); + axZ->GetYaxis()->SetRangeUser(-0.1, 1.1); + axZ->Draw(); + mEffZGoodAll->Draw("same p"); + mEffZFakeAll->Draw("same p"); + + auto legZ = std::make_unique(0.70, 0.15, 0.89, 0.35); + legZ->AddEntry(mEffZGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legZ->AddEntry(mEffZFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legZ->Draw("same"); + effZAll->Write(); + + /// all Eta + std::unique_ptr effEtaAll = std::make_unique("effEtaAll"); + auto numEtaGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesEtaAllPt[0]->Clone("numEtaGoodAll")); + numEtaGoodAll->Add(mNGoodMatchesEtaAllPt[1].get()); + numEtaGoodAll->Add(mNGoodMatchesEtaAllPt[2].get()); + numEtaGoodAll->Write(); + auto numEtaFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesEtaAllPt[0]->Clone("numEtaFakeAll")); + numEtaFakeAll->Add(mNFakeMatchesEtaAllPt[1].get()); + numEtaFakeAll->Add(mNFakeMatchesEtaAllPt[2].get()); + numEtaFakeAll->Write(); + auto denEtaAll = std::unique_ptr((TH1D*)mDuplicatedEtaAllPt[0]->Clone("denEtaAll")); + denEtaAll->Add(mDuplicatedEtaAllPt[1].get()); + denEtaAll->Add(mDuplicatedEtaAllPt[2].get()); + denEtaAll->Write(); + + std::unique_ptr mEffEtaGoodAll = std::make_unique(*numEtaGoodAll, *denEtaAll); + stileEfficiencyGraph(mEffEtaGoodAll, "mEffEtaGoodAll", "L0 + L1 + L2;#Eta;Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffEtaFakeAll = std::make_unique(*numEtaFakeAll, *denEtaAll); + stileEfficiencyGraph(mEffEtaFakeAll, "mEffEtaFakeAll", "L0 + L1 + L2;#Eta;Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axetaAllPt->SetTitle("L0 + L1 + L2;Eta;Efficiency"); + axetaAllPt->GetYaxis()->SetRangeUser(-0.1, 1.1); + axetaAllPt->Draw(); + mEffEtaGoodAll->Draw("same p"); + mEffEtaFakeAll->Draw("same p"); + + auto legEta = std::make_unique(0.70, 0.15, 0.89, 0.35); + legEta->AddEntry(mEffEtaGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legEta->AddEntry(mEffEtaFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legEta->Draw("same"); + effEtaAll->Write(); + + /// all Phi + std::unique_ptr effPhiAll = std::make_unique("effPhiAll"); + auto numPhiGoodAll = std::unique_ptr((TH1D*)mNGoodMatchesPhiAllPt[0]->Clone("numPhiGoodAll")); + numPhiGoodAll->Add(mNGoodMatchesPhiAllPt[1].get()); + numPhiGoodAll->Add(mNGoodMatchesPhiAllPt[2].get()); + numPhiGoodAll->Write(); + auto numPhiFakeAll = std::unique_ptr((TH1D*)mNFakeMatchesPhiAllPt[0]->Clone("numPhiFakeAll")); + numPhiFakeAll->Add(mNFakeMatchesPhiAllPt[1].get()); + numPhiFakeAll->Add(mNFakeMatchesPhiAllPt[2].get()); + numPhiFakeAll->Write(); + auto denPhiAll = std::unique_ptr((TH1D*)mDuplicatedPhiAllPt[0]->Clone("denPhiAll")); + denPhiAll->Add(mDuplicatedPhiAllPt[1].get()); + denPhiAll->Add(mDuplicatedPhiAllPt[2].get()); + denPhiAll->Write(); + + std::unique_ptr mEffPhiGoodAll = std::make_unique(*numPhiGoodAll, *denPhiAll); + stileEfficiencyGraph(mEffPhiGoodAll, "mEffPhiGoodAll", "L0 + L1 + L2;#Phi (rad);Efficiency", false, kFullDiamond, 1, kGreen + 3, kGreen + 3); + std::unique_ptr mEffPhiFakeAll = std::make_unique(*numPhiFakeAll, *denPhiAll); + stileEfficiencyGraph(mEffPhiFakeAll, "mEffPhiFakeAll", "L0 + L1 + L2;#Phi (rad);Efficiency", false, kFullDiamond, 1, kRed + 1, kRed + 1); + axphiAllPt->SetTitle("L0 + L1 + L2;Phi;Efficiency"); + axphiAllPt->GetYaxis()->SetRangeUser(-0.1, 1.1); + axphiAllPt->Draw(); + mEffPhiGoodAll->Draw("same p"); + mEffPhiFakeAll->Draw("same p"); + + auto legPhi = std::make_unique(0.70, 0.15, 0.89, 0.35); + legPhi->AddEntry(mEffPhiGoodAll.get(), "#frac{# good matches}{# tot duplicated clusters}", "pl"); + legPhi->AddEntry(mEffPhiFakeAll.get(), "#frac{# fake matches}{# tot duplicated clusters}", "pl"); + legPhi->Draw("same"); + effPhiAll->Write(); } void EfficiencyStudy::getEfficiency(bool isMC) { // Extract the efficiency for the IB, exploiting the staves overlaps and the duplicated clusters for the tracks passing through the overlaps - // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (DCA, phi, eta, pt) + // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (eta, z, row, col) // The numerator will be the number of duplicated clusters for the tracks passing through the overlaps - LOGP(info, "--------------- getEfficiency"); + LOGP(info, "getEfficiency()"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; @@ -2041,8 +2183,6 @@ void EfficiencyStudy::getEfficiency(bool isMC) unsigned int rofNEntriesTrack = 0; unsigned int rofIndexClus = 0; unsigned int rofNEntriesClus = 0; - int nLabels = 0; - unsigned int totClus = 0; int nbPt = 75; double xbins[nbPt + 1], ptcutl = 0.05, ptcuth = 7.5; @@ -2054,7 +2194,6 @@ void EfficiencyStudy::getEfficiency(bool isMC) int totNClusters; int nDuplClusters; - // denominator fot the efficiency calculation for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); @@ -2068,7 +2207,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) auto track = mTracks[iTrack]; o2::track::TrackParCov trackParCov = mTracks[iTrack]; - auto pt = trackParCov.getPt(); + auto pt = trackParCov.getPt(); // Always 0.6 GeV/c for B = 0 T auto eta = trackParCov.getEta(); float phi = -999.; float phiOriginal = -999.; @@ -2078,24 +2217,17 @@ void EfficiencyStudy::getEfficiency(bool isMC) float ip[2]; track.getImpactParams(0, 0, 0, 0, ip); - float phiTrack = trackParCov.getPhi() * 180 / M_PI; + // float phiTrack = trackParCov.getPhi(); // * 180 / M_PI; - // applying the cuts on the track - only pt and eta, and chi2 cuts since for phi(cluster) the layer is needed - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { + // applying the cuts on the track - only eta + if (eta < mEtaCuts[0] || eta >= mEtaCuts[1]) { continue; } - if (chi2 > mChi2cut) { - continue; - } - - /// the cut on phi, since it is layer-dependent, can be applied only after finding the cluster and then the layer int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track int ncl = track.getNumberOfClusters(); // get the number of clusters of the track + //// keeping only 7 clusters track to reduce fakes if (ncl < 7) { continue; } @@ -2109,7 +2241,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) } if (mVerboseOutput && isMC) { - LOGP(info, "--------- track Label: "); + LOGP(info, "track Label: "); tracklab.print(); } @@ -2119,358 +2251,73 @@ void EfficiencyStudy::getEfficiency(bool isMC) auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); UShort_t rowOriginal = clusOriginal.getRow(); + UShort_t colOriginal = clusOriginal.getCol(); + /// filling some chip maps + if (clusOriginal.getChipID() >= 0 && clusOriginal.getChipID() <= 8) { + l0_00->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + if (clusOriginal.getChipID() >= 252 && clusOriginal.getChipID() <= 260) { + l1_15->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + if (clusOriginal.getChipID() >= 423 && clusOriginal.getChipID() <= 431) { + l2_19->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusOriginal.getRow()); + } + + //// only IB if (layerOriginal >= NLAYERS) { continue; } + chipmap->Fill(clusOriginal.getCol(), clusOriginal.getRow()); + IPOriginalxy[layerOriginal]->Fill(ip[0]); IPOriginalz[layerOriginal]->Fill(ip[1]); + ///// cluster point and conversion from track local coordinates to global coordinates o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - // phiOriginal = std::(clusOriginalPointGlob.y(), clusOriginalPointGlob.x()) * 180 / M_PI + 180; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mXoriginal->Fill(clusOriginalPointGlob.x()); - mYoriginal->Fill(clusOriginalPointGlob.y()); - mZoriginal->Fill(clusOriginalPointGlob.z()); - - // std::cout<<" Layer: "<Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); - m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); - - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - if (layerOriginal == 0) { - - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - keepTrack = true; - } - } - } - - ///////////////////////////////////// - if (!(keepTrack)) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one - } else { /// fill the den and go ahead - chi2trackAccepted->Fill(chi2); - denPt[layerOriginal]->Fill(pt); - denPhi[layerOriginal]->Fill(phiOriginal); - denEta[layerOriginal]->Fill(eta); - nTracksSelected[layerOriginal]++; + if (abs(clusOriginalPointGlob.y()) < 0.5) { ///// excluding gap between bottom and top barrels + continue; } - /// if the cuts up to here are passed, then search for the duplicated cluster, otherwise go to the next cluster - gsl::span labsOriginal = {}; - if (isMC) { - labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) + if (abs(clusOriginalPointGlob.z()) >= 10) { /// excluding external z + continue; } - auto staveOriginal = mGeometry->getStave(clusOriginal.getSensorID()); - auto chipOriginal = mGeometry->getChipIdInStave(clusOriginal.getSensorID()); - - std::tuple> clusID_rDCA_label = {0, 999., gsl::span()}; // inizializing tuple with dummy values (if data, ignore the third value) - - bool adjacentFound = 0; - float phiDuplicated = -999.; - float ptDuplicated = -999.; - float etaDuplicated = -999.; - float clusZ = -999.; - /// for each original cluster iterate over all the possible duplicated clusters to see first wether increment or not the denominator (if a track has a possible duplicated cluster in the selected phi region) - /// then if the phi is within the cuts, select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. - // std::cout<<"Loop on clusters 2"< clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; - o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; - - //// applying constraints: the cluster should be on the same layer, should be on an adjacent stave and on the same or adjacent chip position - if (clusDuplicated.getSensorID() == clusOriginal.getSensorID()) { - continue; - } - auto layerDuplicated = mGeometry->getLayer(clusDuplicated.getSensorID()); - if (layerDuplicated != layerOriginal) { - continue; - } - auto staveDuplicated = mGeometry->getStave(clusDuplicated.getSensorID()); - if (abs(staveDuplicated - staveOriginal) != 1) { - continue; - } - auto chipDuplicated = mGeometry->getChipIdInStave(clusDuplicated.getSensorID()); - if (abs(chipDuplicated - chipOriginal) > 1) { - continue; - } - - gsl::span labsDuplicated = {}; - if (isMC) { - labsDuplicated = mClustersMCLCont->getLabels(iClus); - } - - /// if the cheks are passed, then calculate the DCA - /// Compute the DCA between the duplicated cluster location and the track - trackParCov.rotate(mGeometry->getSensorRefAlpha(clusDuplicated.getSensorID())); - if (!propagator->propagateToDCA(clusDuplicatedPointGlob, trackParCov, b, 2.f, matCorr, &clusDuplicatedDCA)) { // check if the propagation fails - continue; - } - - DCAxyData[layerDuplicated]->Fill(clusDuplicatedDCA[0]); - DCAzData[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - // std::cout<<"DCA: "<Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersY[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZ[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); - - // Imposing that the distance between the duplicated cluster and the track is less than x sigma - if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { - DCAxyRejected[layerDuplicated]->Fill(clusDuplicatedDCA[0]); - DCAzRejected[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - continue; - } - - m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); - m3DDuplicatedClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); - - mXduplicated->Fill(clusDuplicatedPointGlob.x()); - mYduplicated->Fill(clusDuplicatedPointGlob.y()); - mZduplicated->Fill(clusDuplicatedPointGlob.z()); - - IPOriginalifDuplicatedxy[layerOriginal]->Fill(ip[0]); - IPOriginalifDuplicatedz[layerOriginal]->Fill(ip[1]); - - DistanceClustersXAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersYAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); - - if (mVerboseOutput) { - LOGP(info, "Propagation ok"); - } - double rDCA = std::hypot(clusDuplicatedDCA[0], clusDuplicatedDCA[1]); - - // taking the closest cluster within x sigma - if (rDCA < std::get<1>(clusID_rDCA_label)) { // updating the closest cluster - if (isMC) { - clusID_rDCA_label = {iClus, rDCA, labsDuplicated}; - } else { - clusID_rDCA_label = {iClus, rDCA, gsl::span()}; - } - phiDuplicated = phiOriginal; - ptDuplicated = pt; - etaDuplicated = eta; - clusZ = clusOriginalPointGlob.z(); - } - adjacentFound = 1; - } // end loop on all the clusters in the rof -> at this point we have the information on the closest cluster (if there is one) - - // here clusID_rDCA_label is updated with the closest cluster to the track other than the original one - - if (!adjacentFound) { + if (rowOriginal < 2 || (rowOriginal > 15 && rowOriginal < 496) || rowOriginal > 509) { //// cutting on the row continue; } - nDuplClusters++; - nDuplicatedClusters[layerOriginal]++; - numPt[layerOriginal]->Fill(ptDuplicated); - numPhi[layerOriginal]->Fill(phiDuplicated); - numEta[layerOriginal]->Fill(etaDuplicated); - mZvsPhiDUplicated[layerOriginal]->Fill(clusZ, phiDuplicated); - // checking if it is a good or fake match looking at the labels (only if isMC) - if (isMC) { - bool isGood = false; - for (auto lab : std::get<2>(clusID_rDCA_label)) { - if (lab == tracklab) { - isGood = true; - numPtGood[layerOriginal]->Fill(ptDuplicated); - numPhiGood[layerOriginal]->Fill(phiDuplicated); - numEtaGood[layerOriginal]->Fill(etaDuplicated); - continue; - } - } - if (!isGood) { - numPtFake[layerOriginal]->Fill(ptDuplicated); - numPhiFake[layerOriginal]->Fill(phiDuplicated); - numEtaFake[layerOriginal]->Fill(etaDuplicated); + if (mUseMC) { //// excluding known bad chips in MC which are not bad in data --- to be checked based on the anchored run + if (std::find(mExcludedChipMC.begin(), mExcludedChipMC.end(), clusOriginal.getChipID()) != mExcludedChipMC.end()) { + continue; } } - } // end loop on clusters associated to the track - totNClusters += NLAYERS; - } // end loop on tracks per ROF - } // end loop on ROFRecords array - std::cout << " Num of duplicated clusters L0: " << nDuplicatedClusters[0] << " N tracks selected: " << nTracksSelected[0] << std::endl; - std::cout << " Num of duplicated clusters L1: " << nDuplicatedClusters[1] << " N tracks selected: " << nTracksSelected[1] << std::endl; - std::cout << " Num of duplicated clusters L2: " << nDuplicatedClusters[2] << " N tracks selected: " << nTracksSelected[2] << std::endl; - - std::cout << " --------- N total clusters: " << totNClusters << std::endl; - std::cout << " --------- N duplicated clusters: " << nDuplClusters << std::endl; -} - -void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) -{ - // Extract the efficiency for the IB, exploiting the staves overlaps and the duplicated clusters for the tracks passing through the overlaps - // The denominator for the efficiency calculation will be the number of tracks per layer fulfilling some cuts (DCA, phi, eta, pt) - // The numerator will be the number of duplicated clusters for the tracks passing through the overlaps - // additionally, print/save info (to be used in MC) - - LOGP(info, "--------------- getEfficiency"); - - o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; - auto propagator = o2::base::Propagator::Instance(); - - unsigned int rofIndexTrack = 0; - unsigned int rofNEntriesTrack = 0; - unsigned int rofIndexClus = 0; - unsigned int rofNEntriesClus = 0; - int nLabels = 0; - unsigned int totClus = 0; - - int nbPt = 75; - double xbins[nbPt + 1], ptcutl = 0.05, ptcuth = 7.5; - double a = std::log(ptcuth / ptcutl) / nbPt; - for (int i = 0; i <= nbPt; i++) { - xbins[i] = ptcutl * std::exp(i * a); - } - - int totNClusters; - int nDuplClusters; - - // denominator fot the efficiency calculation - for (unsigned int iROF = 0; iROF < mTracksROFRecords.size(); iROF++) { // loop on ROFRecords array - - rofIndexTrack = mTracksROFRecords[iROF].getFirstEntry(); - rofNEntriesTrack = mTracksROFRecords[iROF].getNEntries(); - - rofIndexClus = mClustersROFRecords[iROF].getFirstEntry(); - rofNEntriesClus = mClustersROFRecords[iROF].getNEntries(); - - ////// imposing cuts on the tracks = collecting tracks for the denominator - for (unsigned int iTrack = rofIndexTrack; iTrack < rofIndexTrack + rofNEntriesTrack; iTrack++) { // loop on tracks per ROF - auto track = mTracks[iTrack]; - o2::track::TrackParCov trackParCov = mTracks[iTrack]; - - auto pt = trackParCov.getPt(); - auto eta = trackParCov.getEta(); - float phi = -999.; - float phiOriginal = -999.; - - float chi2 = track.getChi2(); - - chi2track->Fill(chi2); - - float phiTrack = trackParCov.getPhi() * 180 / M_PI; - - // applying the cuts on the track - only pt and eta cuts since for phi(cluster) the layer is needed - if (pt < mPtCuts[0] || pt > mPtCuts[1]) { - continue; - } - if (eta < mEtaCuts[0] || eta > mEtaCuts[1]) { - continue; - } - if (chi2 > mChi2cut) { - continue; - } - /// the cut on phi, since it is layer-dependent, can be applied only after finding the cluster and then the layer - - int firstClus = track.getFirstClusterEntry(); // get the first cluster of the track - int ncl = track.getNumberOfClusters(); // get the number of clusters of the track - - if (ncl < 7) { - continue; - } - - o2::MCCompLabel tracklab; - if (isMC) { - tracklab = mTracksMCLabels[iTrack]; - if (tracklab.isFake()) { + if (clusOriginal.getCol() < 160 || clusOriginal.getCol() > 870) { /// excluding the gap between two chips in the same stave (comment to obtain the plot efficiency col vs eta) continue; } - } - - if (mVerboseOutput && isMC) { - LOGP(info, "--------- track Label: "); - tracklab.print(); - } - - for (int iclTrack = firstClus; iclTrack < firstClus + ncl; iclTrack++) { // loop on clusters associated to the track to extract layer, stave and chip to restrict the possible matches to be searched with the DCA cut - auto& clusOriginal = mClusters[mInputITSidxs[iclTrack]]; - auto clusOriginalPoint = mITSClustersArray[mInputITSidxs[iclTrack]]; - auto layerOriginal = mGeometry->getLayer(clusOriginal.getSensorID()); - - UShort_t rowOriginal = clusOriginal.getRow(); - - if (layerOriginal >= NLAYERS) { - continue; - } - - o2::math_utils::Point3D clusOriginalPointTrack = {clusOriginalPoint.getX(), clusOriginalPoint.getY(), clusOriginalPoint.getZ()}; - o2::math_utils::Point3D clusOriginalPointGlob = mGeometry->getMatrixT2G(clusOriginal.getSensorID()) * clusOriginalPointTrack; - phiOriginal = clusOriginalPointGlob.phi() * 180 / M_PI; - - mXoriginal->Fill(clusOriginalPointGlob.x()); - mYoriginal->Fill(clusOriginalPointGlob.y()); - mZoriginal->Fill(clusOriginalPointGlob.z()); + /// if the track passes the cuts, fill the den and go ahead m2DClusterOriginalPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); m3DClusterPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y(), clusOriginalPointGlob.z()); + chi2trackAccepted->Fill(chi2); + denPt[layerOriginal]->Fill(pt); + denPhi[layerOriginal]->Fill(phiOriginal); + denEta[layerOriginal]->Fill(eta); + denRow[layerOriginal]->Fill(rowOriginal); + denCol[layerOriginal]->Fill(clusOriginal.getCol()); + denZ[layerOriginal]->Fill(clusOriginalPointGlob.z()); + nTracksSelected[layerOriginal]++; + mDenColEta[layerOriginal]->Fill(clusOriginal.getCol(), eta); + mDenRowPhi[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginalPointGlob.z()); + mDenRowCol[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginal.getCol()); + denLayers->Fill(layerOriginal); - /// applying the cuts on the phi of the original cluster - bool keepTrack = false; /// wether or not a cluster is found in an eligible track in the corresponding layer - - if (layerOriginal == 0) { - for (int i = 0; i < 10; i++) { - if ((phiOriginal >= mPhiCutsL0[i][0] && phiOriginal <= mPhiCutsL0[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 1) { - for (int i = 0; i < 12; i++) { - if ((phiOriginal >= mPhiCutsL1[i][0] && phiOriginal <= mPhiCutsL1[i][1])) { - keepTrack = true; - } - } - } - if (layerOriginal == 2) { - for (int i = 0; i < 17; i++) { - if ((phiOriginal >= mPhiCutsL2[i][0] && phiOriginal <= mPhiCutsL2[i][1])) { - keepTrack = true; - } - } - } - if (!(keepTrack)) { - continue; /// if the track (cluster) is not eligible for any layer, go to the next one - } else { /// fill the den and go ahead - chi2trackAccepted->Fill(chi2); - denPt[layerOriginal]->Fill(pt); - denPhi[layerOriginal]->Fill(phiOriginal); - denEta[layerOriginal]->Fill(eta); - nTracksSelected[layerOriginal]++; - } + /// if the cuts up to here are passed, then search for the duplicated cluster, otherwise go to the next cluster gsl::span labsOriginal = {}; if (isMC) { labsOriginal = mClustersMCLCont->getLabels(mInputITSidxs[iclTrack]); // get labels of the cluster associated to the track (original) @@ -2487,19 +2334,16 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) float etaDuplicated = -999.; float clusZ = -999.; - o2::MCCompLabel labelCandidateDuplicated; - bool duplExists = false; + o2::itsmft::CompClusterExt clusDuplicatedSelected = o2::itsmft::CompClusterExt(); - /// for each original cluster iterate over all the possible duplicated clusters to see first wether increment or not the denominator (if a track has a possible duplicated cluster in the selected phi region) - /// then if the phi is within the cuts, select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. + /// for each original cluster iterate over all the possible duplicated clusters to select the "adjacent" clusters (stave +-1, chip =,+-1) and calculate the DCA with the track. Then choose the closest one. for (unsigned int iClus = rofIndexClus; iClus < rofIndexClus + rofNEntriesClus; iClus++) { // iteration over ALL the clusters in the ROF auto clusDuplicated = mClusters[iClus]; - auto clusDuplicatedPoint = mITSClustersArray[iClus]; o2::math_utils::Point3D clusDuplicatedPointTrack = {clusDuplicatedPoint.getX(), clusDuplicatedPoint.getY(), clusDuplicatedPoint.getZ()}; o2::math_utils::Point3D clusDuplicatedPointGlob = mGeometry->getMatrixT2G(clusDuplicated.getSensorID()) * clusDuplicatedPointTrack; - phi = clusDuplicatedPointGlob.phi() * 180 / M_PI; + phi = clusDuplicatedPointGlob.phi(); // * 180 / M_PI; //// applying constraints: the cluster should be on the same layer, should be on an adjacent stave and on the same or adjacent chip position if (clusDuplicated.getSensorID() == clusOriginal.getSensorID()) { @@ -2509,12 +2353,6 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) if (layerDuplicated != layerOriginal) { continue; } - labelCandidateDuplicated = mClustersMCLCont->getLabels(iClus)[0]; - if (labelCandidateDuplicated == tracklab) { - duplExists = true; - std::cout << "Duplicated should exist with label: " << labelCandidateDuplicated.asString() << " , phi = " << phi << " and be: "; - clusDuplicated.print(); - } auto staveDuplicated = mGeometry->getStave(clusDuplicated.getSensorID()); if (abs(staveDuplicated - staveOriginal) != 1) { continue; @@ -2524,8 +2362,6 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) continue; } - std::cout << "checks passed" << std::endl; - gsl::span labsDuplicated = {}; if (isMC) { labsDuplicated = mClustersMCLCont->getLabels(iClus); @@ -2538,13 +2374,8 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) continue; } - std::cout << "dca calculated: " << clusDuplicatedDCA[0] << " " << clusDuplicatedDCA[1] << std::endl; - DCAxyData[layerDuplicated]->Fill(clusDuplicatedDCA[0]); DCAzData[layerDuplicated]->Fill(clusDuplicatedDCA[1]); - DistanceClustersX[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersY[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZ[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); // Imposing that the distance between the duplicated cluster and the track is less than x sigma if (!(clusDuplicatedDCA[0] > mDCACutsXY[layerDuplicated][0] && clusDuplicatedDCA[0] < mDCACutsXY[layerDuplicated][1] && clusDuplicatedDCA[1] > mDCACutsZ[layerDuplicated][0] && clusDuplicatedDCA[1] < mDCACutsZ[layerDuplicated][1])) { @@ -2552,15 +2383,9 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) DCAzRejected[layerDuplicated]->Fill(clusDuplicatedDCA[1]); continue; } + m2DClusterDuplicatedPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y()); m3DDuplicatedClusterPositions->Fill(clusDuplicatedPointGlob.x(), clusDuplicatedPointGlob.y(), clusDuplicatedPointGlob.z()); - mXduplicated->Fill(clusDuplicatedPointGlob.x()); - mYduplicated->Fill(clusDuplicatedPointGlob.y()); - mZduplicated->Fill(clusDuplicatedPointGlob.z()); - - DistanceClustersXAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.x() - clusOriginalPointGlob.x())); - DistanceClustersYAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.y() - clusOriginalPointGlob.y())); - DistanceClustersZAftercuts[layerDuplicated]->Fill(abs(clusDuplicatedPointGlob.z() - clusOriginalPointGlob.z())); if (mVerboseOutput) { LOGP(info, "Propagation ok"); @@ -2578,32 +2403,46 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) ptDuplicated = pt; etaDuplicated = eta; clusZ = clusOriginalPointGlob.z(); + clusDuplicatedSelected = clusDuplicated; } adjacentFound = 1; - std::cout << "Duplicated found with label: " << labsDuplicated[0] << " and phi: " << phiDuplicated << std::endl; - clusDuplicated.print(); - std::cout << "-----" << std::endl; } // end loop on all the clusters in the rof -> at this point we have the information on the closest cluster (if there is one) // here clusID_rDCA_label is updated with the closest cluster to the track other than the original one - // checking if it is a good or fake match looking at the labels (only if isMC) + if (!adjacentFound) { - if (duplExists) { - std::cout << "No duplicated found but should exist" << std::endl; - std::cout << "DCA cuts were: xy-> " << mDCACutsXY[layerOriginal][0] << " to " << mDCACutsXY[layerOriginal][1] << " and z-> " << mDCACutsZ[layerOriginal][0] << " to " << mDCACutsZ[layerOriginal][1] << "\n-----" << std::endl; - } else { - std::cout << "No duplicated found and does not exist" << std::endl; - } + radiusNotFound[layerOriginal]->Fill(sqrt(clusOriginalPointGlob.x() * clusOriginalPointGlob.x() + clusOriginalPointGlob.y() * clusOriginalPointGlob.y())); + colNotFound[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9))); + rowNotFound[layerOriginal]->Fill(rowOriginal); + zNotFound[layerOriginal]->Fill(clusOriginalPointGlob.z()); + phiNotFound[layerOriginal]->Fill(phiOriginal); continue; } - std::cout << "-----" << std::endl; + + chipOrigVsOverlap->Fill(clusOriginal.getChipID() % 9, clusDuplicatedSelected.getChipID() % 9); + mChipFound->Fill(clusOriginal.getChipID()); + zFound[layerOriginal]->Fill(clusOriginalPointGlob.z()); + radiusFound[layerOriginal]->Fill(sqrt(clusOriginalPointGlob.x() * clusOriginalPointGlob.x() + clusOriginalPointGlob.y() * clusOriginalPointGlob.y())); + colFoundOriginalVsDuplicated[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9)), clusDuplicatedSelected.getCol() + (1024 * (clusDuplicatedSelected.getChipID() % 9))); + colFoundOriginal[layerOriginal]->Fill(clusOriginal.getCol() + (1024 * (clusOriginal.getChipID() % 9))); + m2DClusterFoundPositions->Fill(clusOriginalPointGlob.x(), clusOriginalPointGlob.y()); + phiFound[layerOriginal]->Fill(phiOriginal); + rowFound[layerOriginal]->Fill(rowOriginal); nDuplClusters++; nDuplicatedClusters[layerOriginal]++; - numPt[layerOriginal]->Fill(ptDuplicated); + numPt[layerOriginal]->Fill(pt); numPhi[layerOriginal]->Fill(phiDuplicated); numEta[layerOriginal]->Fill(etaDuplicated); + numRow[layerOriginal]->Fill(rowOriginal); + numCol[layerOriginal]->Fill(clusOriginal.getCol()); + numZ[layerOriginal]->Fill(clusOriginalPointGlob.z()); mZvsPhiDUplicated[layerOriginal]->Fill(clusZ, phiDuplicated); + mNumColEta[layerOriginal]->Fill(clusOriginal.getCol(), eta); + mNumRowPhi[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginalPointGlob.z()); + mNumRowCol[layerOriginal]->Fill(clusOriginal.getRow(), clusOriginal.getCol()); + numLayers->Fill(layerOriginal); + // checking if it is a good or fake match looking at the labels (only if isMC) if (isMC) { bool isGood = false; for (auto lab : std::get<2>(clusID_rDCA_label)) { @@ -2612,6 +2451,10 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) numPtGood[layerOriginal]->Fill(ptDuplicated); numPhiGood[layerOriginal]->Fill(phiDuplicated); numEtaGood[layerOriginal]->Fill(etaDuplicated); + numRowGood[layerOriginal]->Fill(rowOriginal); + numColGood[layerOriginal]->Fill(clusOriginal.getCol()); + numZGood[layerOriginal]->Fill(clusOriginalPointGlob.z()); + numGoodLayers->Fill(layerOriginal); continue; } } @@ -2619,6 +2462,10 @@ void EfficiencyStudy::getEfficiencyAndTrackInfo(bool isMC) numPtFake[layerOriginal]->Fill(ptDuplicated); numPhiFake[layerOriginal]->Fill(phiDuplicated); numEtaFake[layerOriginal]->Fill(etaDuplicated); + numRowFake[layerOriginal]->Fill(rowOriginal); + numColFake[layerOriginal]->Fill(clusOriginal.getCol()); + numZFake[layerOriginal]->Fill(clusOriginalPointGlob.z()); + numFakeLayers->Fill(layerOriginal); } } } // end loop on clusters associated to the track @@ -2642,16 +2489,14 @@ void EfficiencyStudy::process(o2::globaltracking::RecoContainer& recoData) if (mUseMC) { // getDCAClusterTrackMC(); - // studyDCAcutsMC(); + studyDCAcutsMC(); // studyClusterSelectionMC(); - // getEfficiencyAndTrackInfo(mUseMC); // countDuplicatedAfterCuts(); - } else if (!mUseMC) { - // saveDataInfo(); + getEfficiency(mUseMC); + } else { + getEfficiency(mUseMC); } - getEfficiency(mUseMC); - LOGP(info, "** Found in {} rofs:\n\t- {} clusters\n\t", mClustersROFRecords.size(), mClusters.size()); @@ -2681,21 +2526,13 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->mkdir("EfficiencyFinal/"); mOutFile->mkdir("DCAFinal/"); + mOutFile->mkdir("NotFoundChecks/"); - mOutFile->mkdir("DistanceClusters/"); mOutFile->mkdir("DCA/"); mOutFile->mkdir("Pt_Eta_Phi/"); if (mUseMC) { - mOutFile->cd("DistanceClusters"); - for (int i = 0; i < NLAYERS; i++) { - mDistanceClustersX[i]->Write(); - mDistanceClustersY[i]->Write(); - mDistanceClustersZ[i]->Write(); - mDistanceClusters[i]->Write(); - } - mOutFile->cd("DCA"); mDCAxyDuplicated->Write(); mDCAzDuplicated->Write(); @@ -2709,24 +2546,19 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->cd("Pt_Eta_Phi/"); for (int i = 0; i < NLAYERS; i++) { - mPhiOriginal[i]->Write(); - mPhiTrackOriginal[i]->Write(); mDuplicatedPhiAllPt[i]->Write(); - mPtOriginal[i]->Write(); mPtDuplicated[i]->Write(); mEtaDuplicated[i]->Write(); mPhiDuplicated[i]->Write(); - mPhiTrackDuplicated[i]->Write(); - mPhiTrackDuplicatedvsphiDuplicated[i]->Write(); - mPhiTrackoriginalvsphioriginal[i]->Write(); mPhiOriginalIfDuplicated[i]->Write(); mDuplicatedPt[i]->Write(); mDuplicatedPtEta[i]->Write(); mDuplicatedPtPhi[i]->Write(); mDuplicatedEtaPhi[i]->Write(); - mEtaOriginal[i]->Write(); mDuplicatedEtaAllPt[i]->Write(); mDuplicatedRow[i]->Write(); + mDuplicatedCol[i]->Write(); + mDuplicatedZ[i]->Write(); for (int p = 0; p < 3; p++) { mDuplicatedEta[i][p]->Write(); @@ -2741,7 +2573,6 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) IPOriginalxy[i]->Write(); IPOriginalz[i]->Write(); mPhiOriginal[i]->Write(); - mPhiTrackOriginal[i]->Write(); mPtOriginal[i]->Write(); mEtaOriginal[i]->Write(); mZvsPhiDUplicated[i]->Write(); @@ -2752,75 +2583,245 @@ void EfficiencyStudy::endOfStream(EndOfStreamContext& ec) mOutFile->mkdir("chi2"); mOutFile->cd("chi2/"); - chi2track->Write(); chi2trackAccepted->Write(); mOutFile->cd("EfficiencyFinal/"); + TList listNum; + TList listDen; + auto numPhiAll = std::unique_ptr((TH1D*)numPhi[0]->Clone("numPhiAll")); + auto denPhiAll = std::unique_ptr((TH1D*)denPhi[0]->Clone("denPhiAll")); + + TList listNumColEta; + TList listDenColEta; + auto numColEtaAll = std::unique_ptr((TH1D*)mNumColEta[0]->Clone("numColEtaAll")); + auto denColEtaAll = std::unique_ptr((TH1D*)mDenColEta[0]->Clone("denColEtaAll")); + + TList listNumRowPhi; + TList listDenRowPhi; + auto numRowPhiAll = std::unique_ptr((TH1D*)mNumRowPhi[0]->Clone("numRowPhiAll")); + auto denRowPhiAll = std::unique_ptr((TH1D*)mDenRowPhi[0]->Clone("denRowPhiAll")); + + TList listNumRowCol; + TList listDenRowCol; + auto numRowColAll = std::unique_ptr((TH1D*)mNumRowCol[0]->Clone("numRowColAll")); + auto denRowColAll = std::unique_ptr((TH1D*)mDenRowCol[0]->Clone("denRowColAll")); + + std::unique_ptr effLayers = std::make_unique(*numLayers, *denLayers); + effLayers->SetName("effLayers"); + effLayers->SetTitle("; ;Efficiency"); + std::unique_ptr effLayersGood = std::make_unique(*numGoodLayers, *denLayers); + effLayersGood->SetName("effLayersGood"); + effLayersGood->SetTitle("; ;Efficiency Good Matches"); + std::unique_ptr effLayersFake = std::make_unique(*numFakeLayers, *denLayers); + effLayersFake->SetName("effLayersFake"); + effLayersFake->SetTitle("; ;Efficiency Fake Matches"); + effLayers->Write(); + effLayersGood->Write(); + effLayersFake->Write(); + denLayers->Write(); + numLayers->Write(); + numGoodLayers->Write(); + numFakeLayers->Write(); for (int l = 0; l < NLAYERS; l++) { - TEfficiency* effPt = new TEfficiency(*numPt[l], *denPt[l]); + std::unique_ptr effPt = std::make_unique(*numPt[l], *denPt[l]); effPt->SetName(Form("effPt_layer%d", l)); effPt->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency", l)); - TEfficiency* effPtGood = new TEfficiency(*numPtGood[l], *denPt[l]); + std::unique_ptr effPtGood = std::make_unique(*numPtGood[l], *denPt[l]); effPtGood->SetName(Form("effPtGood_layer%d", l)); effPtGood->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency Good Matches", l)); - TEfficiency* effPtFake = new TEfficiency(*numPtFake[l], *denPt[l]); + std::unique_ptr effPtFake = std::make_unique(*numPtFake[l], *denPt[l]); effPtFake->SetName(Form("effPtFake_layer%d", l)); effPtFake->SetTitle(Form("L%d;p_{T} (GeV/c);Efficiency Fake Matches", l)); effPt->Write(); effPtGood->Write(); effPtFake->Write(); - TEfficiency* effPhi = new TEfficiency(*numPhi[l], *denPhi[l]); + std::unique_ptr effPhi = std::make_unique(*numPhi[l], *denPhi[l]); effPhi->SetName(Form("effPhi_layer%d", l)); effPhi->SetTitle(Form("L%d;#phi;Efficiency", l)); - TEfficiency* effPhiGood = new TEfficiency(*numPhiGood[l], *denPhi[l]); + std::unique_ptr effPhiGood = std::make_unique(*numPhiGood[l], *denPhi[l]); effPhiGood->SetName(Form("effPhiGood_layer%d", l)); effPhiGood->SetTitle(Form("L%d;#phi;Efficiency Good Matches", l)); - TEfficiency* effPhiFake = new TEfficiency(*numPhiFake[l], *denPhi[l]); + std::unique_ptr effPhiFake = std::make_unique(*numPhiFake[l], *denPhi[l]); effPhiFake->SetName(Form("effPhiFake_layer%d", l)); effPhiFake->SetTitle(Form("L%d;#phi;Efficiency Fake Matches", l)); effPhi->Write(); effPhiGood->Write(); effPhiFake->Write(); + listNum.Add(numPhi[l].get()); + listDen.Add(denPhi[l].get()); - TEfficiency* effEta = new TEfficiency(*numEta[l], *denEta[l]); + std::unique_ptr effEta = std::make_unique(*numEta[l], *denEta[l]); effEta->SetName(Form("effEta_layer%d", l)); effEta->SetTitle(Form("L%d;#eta;Efficiency", l)); - TEfficiency* effEtaGood = new TEfficiency(*numEtaGood[l], *denEta[l]); + std::unique_ptr effEtaGood = std::make_unique(*numEtaGood[l], *denEta[l]); effEtaGood->SetName(Form("effEtaGood_layer%d", l)); effEtaGood->SetTitle(Form("L%d;#eta;Efficiency Good Matches", l)); - TEfficiency* effEtaFake = new TEfficiency(*numEtaFake[l], *denEta[l]); + std::unique_ptr effEtaFake = std::make_unique(*numEtaFake[l], *denEta[l]); effEtaFake->SetName(Form("effEtaFake_layer%d", l)); effEtaFake->SetTitle(Form("L%d;#eta;Efficiency Fake Matches", l)); effEta->Write(); effEtaGood->Write(); effEtaFake->Write(); + std::unique_ptr effRow = std::make_unique(*numRow[l], *denRow[l]); + effRow->SetName(Form("effRow_layer%d", l)); + effRow->SetTitle(Form("L%d;#Row;Efficiency", l)); + std::unique_ptr effRowGood = std::make_unique(*numRowGood[l], *denRow[l]); + effRowGood->SetName(Form("effRowGood_layer%d", l)); + effRowGood->SetTitle(Form("L%d;#Row;Efficiency Good Matches", l)); + std::unique_ptr effRowFake = std::make_unique(*numRowFake[l], *denRow[l]); + effRowFake->SetName(Form("effRowFake_layer%d", l)); + effRowFake->SetTitle(Form("L%d;#Row;Efficiency Fake Matches", l)); + effRow->Write(); + effRowGood->Write(); + effRowFake->Write(); + + std::unique_ptr effCol = std::make_unique(*numCol[l], *denCol[l]); + effCol->SetName(Form("effCol_layer%d", l)); + effCol->SetTitle(Form("L%d;#Col;Efficiency", l)); + std::unique_ptr effColGood = std::make_unique(*numColGood[l], *denCol[l]); + effColGood->SetName(Form("effColGood_layer%d", l)); + effColGood->SetTitle(Form("L%d;#Col;Efficiency Good Matches", l)); + std::unique_ptr effColFake = std::make_unique(*numColFake[l], *denCol[l]); + effColFake->SetName(Form("effColFake_layer%d", l)); + effColFake->SetTitle(Form("L%d;#Col;Efficiency Fake Matches", l)); + effCol->Write(); + effColGood->Write(); + effColFake->Write(); + + std::unique_ptr effZ = std::make_unique(*numZ[l], *denZ[l]); + effZ->SetName(Form("effZ_layer%d", l)); + effZ->SetTitle(Form("L%d;#Z (cm);Efficiency", l)); + std::unique_ptr effZGood = std::make_unique(*numZGood[l], *denZ[l]); + effZGood->SetName(Form("effZGood_layer%d", l)); + effZGood->SetTitle(Form("L%d;#Z (cm);Efficiency Good Matches", l)); + std::unique_ptr effZFake = std::make_unique(*numZFake[l], *denZ[l]); + effZFake->SetName(Form("effZFake_layer%d", l)); + effZFake->SetTitle(Form("L%d;#Z (cm);Efficiency Fake Matches", l)); + effZ->Write(); + effZGood->Write(); + effZFake->Write(); + + std::unique_ptr effColEta = std::make_unique(*mNumColEta[l], *mDenColEta[l]); + effColEta->SetName(Form("effColEta_layer%d", l)); + effColEta->SetTitle(Form("L%d;Column;#eta", l)); + effColEta->Write(); + + listNumColEta.Add(mNumColEta[l].get()); + listDenColEta.Add(mDenColEta[l].get()); + + std::unique_ptr effRowPhi = std::make_unique(*mNumRowPhi[l], *mDenRowPhi[l]); + effRowPhi->SetName(Form("effRowPhi_layer%d", l)); + effRowPhi->SetTitle(Form("L%d;Column;#eta", l)); + effRowPhi->Write(); + + listNumRowPhi.Add(mNumRowPhi[l].get()); + listDenRowPhi.Add(mDenRowPhi[l].get()); + + std::unique_ptr effRowCol = std::make_unique(*mNumRowCol[l], *mDenRowCol[l]); + effRowCol->SetName(Form("effRowCol_layer%d", l)); + effRowCol->SetTitle(Form("L%d;Column;#eta", l)); + effRowCol->Write(); + + listNumRowCol.Add(mNumRowCol[l].get()); + listDenRowCol.Add(mDenRowCol[l].get()); + + mNumRowCol[l]->Write(); + mDenRowCol[l]->Write(); + mNumRowPhi[l]->Write(); + mDenRowPhi[l]->Write(); + mNumColEta[l]->Write(); + mDenColEta[l]->Write(); numPhi[l]->Write(); denPhi[l]->Write(); numPt[l]->Write(); denPt[l]->Write(); numEta[l]->Write(); denEta[l]->Write(); + numRow[l]->Write(); + denRow[l]->Write(); + numCol[l]->Write(); + denCol[l]->Write(); + numZ[l]->Write(); + denZ[l]->Write(); } + numPhiAll->Merge(&listNum); + denPhiAll->Merge(&listDen); + + numColEtaAll->Merge(&listNumColEta); + denColEtaAll->Merge(&listDenColEta); + + numRowPhiAll->Merge(&listNumRowPhi); + denRowPhiAll->Merge(&listDenRowPhi); + + numRowColAll->Merge(&listNumRowCol); + denRowColAll->Merge(&listDenRowCol); + + std::unique_ptr effPhiAll = std::make_unique(*numPhiAll, *denPhiAll); + effPhiAll->SetName("effPhi_AllLayers"); + effPhiAll->SetTitle("L0 + L1 + L2;#phi;Efficiency"); + effPhiAll->Write(); + numPhiAll->Write(); + denPhiAll->Write(); + + std::unique_ptr effColEtaAll = std::make_unique(*numColEtaAll, *denColEtaAll); + effColEtaAll->SetName("effColEta_AllLayers"); + effColEtaAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effColEtaAll->Write(); + numColEtaAll->Write(); + denColEtaAll->Write(); + + std::unique_ptr effRowPhiAll = std::make_unique(*numRowPhiAll, *denRowPhiAll); + effRowPhiAll->SetName("effRowPhi_AllLayers"); + effRowPhiAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effRowPhiAll->Write(); + numRowPhiAll->Write(); + denRowPhiAll->Write(); + + std::unique_ptr effRowColAll = std::make_unique(*numRowColAll, *denRowColAll); + effRowColAll->SetName("effRowCol_AllLayers"); + effRowColAll->SetTitle("L0 + L1 + L2;Column;#eta"); + effRowColAll->Write(); + numRowColAll->Write(); + denRowColAll->Write(); mOutFile->cd("DCAFinal/"); for (int l = 0; l < NLAYERS; l++) { DCAxyData[l]->Write(); DCAzData[l]->Write(); - DistanceClustersX[l]->Write(); - DistanceClustersY[l]->Write(); - DistanceClustersZ[l]->Write(); - DistanceClustersXAftercuts[l]->Write(); - DistanceClustersYAftercuts[l]->Write(); - DistanceClustersZAftercuts[l]->Write(); DCAxyRejected[l]->Write(); DCAzRejected[l]->Write(); } + mOutFile->cd("NotFoundChecks/"); + + for (int l = 0; l < NLAYERS; l++) { + phiFound[l]->Write(); + phiNotFound[l]->Write(); + rowFound[l]->Write(); + rowNotFound[l]->Write(); + zFound[l]->Write(); + zNotFound[l]->Write(); + radiusFound[l]->Write(); + radiusNotFound[l]->Write(); + colFoundOriginalVsDuplicated[l]->Write(); + colFoundOriginal[l]->Write(); + colNotFound[l]->Write(); + } + mChipFound->Write(); + mChipNotFound->Write(); + m2DClusterFoundPositions->Write(); + l0_00->Write(); + l1_15->Write(); + l2_19->Write(); + chipOrigVsOverlap->Write(); + chipmap->SetContour(100); + chipmap->Write(); + mOutFile->Close(); } From d9d78bcededce73fe8ef2e452d81dff9f8c797d2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 09:28:41 +0100 Subject: [PATCH 0301/1914] GPU CMake: Cleanup and do not use Clang x86 denormals flag for device compilation --- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +- GPU/GPUTracking/CMakeLists.txt | 5 +- dependencies/FindO2GPU.cmake | 55 +++++++++++----------- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 48f292a198b9c..381c9c050ca09 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -23,9 +23,9 @@ endif() set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021) +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - set(OCL_FLAGS ${OCL_FLAGS} -cl-denorms-are-zero -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) + set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() set(OCL_FLAGS ${OCL_FLAGS} ${GPUCA_OCL_NO_FAST_MATH_FLAGS}) endif() diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ad7dd9c210cd1..39218e9f94527 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -17,11 +17,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2 ${GPUCA_CLANG_FTZ}") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") endif() elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math ${GPUCA_CLANG_FTZ}") + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() +set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") include(cmake/helpers.cmake) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 56b53e1be8879..d796d0b7145a8 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -85,17 +85,20 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") - set(GPUCA_CLANG_FTZ "") + set(GPUCA_CXX_DENORMALS_FLAGS "") else() - set(GPUCA_CLANG_FTZ "-mdaz-ftz") + set(GPUCA_CXX_DENORMALS_FLAGS "-mdaz-ftz") endif() -set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off ${GPUCA_CLANG_FTZ}") -set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--ftz=true --prec-div=true --prec-sqrt=true --fmad false") -set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt -cl-denorms-are-zero) +set(GPUCA_CUDA_DENORMALS_FLAGS "--ftz=true") +set(GPUCA_OCL_DENORMALS_FLAGS "-cl-denorms-are-zero") +set(GPUCA_HIP_DENORMALS_FLAGS "-fgpu-flush-denormals-to-zero") +set(GPUCA_CXX_NO_FAST_MATH_FLAGS "-fno-fast-math -ffp-contract=off") +set(GPUCA_CUDA_NO_FAST_MATH_FLAGS "--prec-div=true --prec-sqrt=true --fmad false") +set(GPUCA_OCL_NO_FAST_MATH_FLAGS -cl-fp32-correctly-rounded-divide-sqrt ) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_WHOLEO2}) add_definitions(-DGPUCA_DETERMINISTIC_MODE) - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") - set(CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") endif() @@ -143,7 +146,8 @@ if(ENABLE_CUDA) endif() endif() if(CMAKE_CUDA_COMPILER) - set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes") + set(CMAKE_CUDA_FLAGS "-Xcompiler \"${O2_GPU_CMAKE_CXX_FLAGS_NOSTD}\" ${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr --extended-lambda -Xcompiler -Wno-attributes ${GPUCA_CUDA_DENORMALS_FLAGS}") + set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_CUDA_FLAGS " -Xptxas -v") endif() @@ -151,19 +155,18 @@ if(ENABLE_CUDA) if (NOT ENABLE_CUDA STREQUAL "AUTO") string(APPEND CMAKE_CUDA_FLAGS " --allow-unsupported-compiler") endif() - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "-Xcompiler \"${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}\" ${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -lineinfo -Xptxas -O0 -Xcompiler -O0") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -lineinfo -Xptxas -O0") else() - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -Xptxas -O4 -Xcompiler -O4") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -Xptxas -O4 -Xcompiler -O4") endif() if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -use_fast_math --ftz=true")# + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true") endif() if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call") + string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call") endif() if(GPUCA_CUDA_GCCBIN) list(FILTER CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES EXCLUDE REGEX "^/usr/lib.*/gcc/") # Workaround, since CMake adds old GCC lib paths implicitly if we request that gcc for CUDA @@ -285,27 +288,25 @@ if(ENABLE_HIP) set(HIP_ENABLED ON) set_target_properties(roc::rocthrust PROPERTIES IMPORTED_GLOBAL TRUE) message(STATUS "HIP Found (${hip_HIPCC_EXECUTABLE} version ${hip_VERSION})") - set(O2_HIP_CMAKE_CXX_FLAGS "-fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed") + set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}") + set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") + string(APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed ") if(hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2") - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup + string(APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup endif() if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -Rpass-analysis=kernel-resource-usage") + string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") + endif() + if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + string(APPEND CMAKE_HIP_FLAGS " -ffast-math") + endif() + if(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3") endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection endif() - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - string(APPEND O2_HIP_CMAKE_CXX_FLAGS " -fgpu-flush-denormals-to-zero -ffast-math") - endif() - set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${O2_HIP_CMAKE_CXX_FLAGS}") - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") - if(CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0 -ggdb") - else() - set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3") - endif() else() set(HIP_ENABLED OFF) endif() From d1fcd960548af1fbc975638e8b08abb44482d2dd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 09:29:05 +0100 Subject: [PATCH 0302/1914] GPU HIP CMake: Remove deprecated option --- dependencies/FindO2GPU.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index d796d0b7145a8..bbbb420354fae 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -291,9 +291,6 @@ if(ENABLE_HIP) set(CMAKE_HIP_FLAGS "${O2_GPU_CMAKE_CXX_FLAGS_NOSTD} ${CMAKE_HIP_FLAGS} ${GPUCA_HIP_DENORMALS_FLAGS}") set(CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER}}") string(APPEND CMAKE_HIP_FLAGS " -fgpu-defer-diag -mllvm -amdgpu-enable-lower-module-lds=false -mllvm -amdgpu-function-calls=true -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-invalid-constexpr -Wno-ignored-optimization-argument -Wno-unused-private-field -Wno-pass-failed ") - if(hip_VERSION VERSION_GREATER_EQUAL "6.0" AND NOT hip_VERSION VERSION_GREATER_EQUAL "6.2") - string(APPEND CMAKE_HIP_FLAGS " -mllvm -amdgpu-legacy-sgpr-spill-lowering=true") # TODO: Cleanup - endif() if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") endif() From 210dc61adc0b14ee8e4ae62916d442dd02ed3f50 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 27 Mar 2025 15:57:38 +0100 Subject: [PATCH 0303/1914] DPL: make input slots display in DebugGUI scrollable (#14041) --- .../Core/include/Framework/DeviceControl.h | 2 + .../src/FrameworkGUIDataRelayerUsage.cxx | 95 ++++++++++++++----- .../src/FrameworkGUIDataRelayerUsage.h | 5 +- .../src/FrameworkGUIDevicesGraph.cxx | 3 +- 4 files changed, 79 insertions(+), 26 deletions(-) diff --git a/Framework/Core/include/Framework/DeviceControl.h b/Framework/Core/include/Framework/DeviceControl.h index ce946e8e77fbf..03889c00f6cf9 100644 --- a/Framework/Core/include/Framework/DeviceControl.h +++ b/Framework/Core/include/Framework/DeviceControl.h @@ -52,6 +52,8 @@ struct DeviceControl { int logStreams = 0; /// An incremental number to identify the device state int requestedState = 0; + /// The first window in the records buffer to display in GUI + int firstWnd = 1; }; } // namespace o2::framework diff --git a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx index 8e683d46131ea..c39e268fa90a7 100644 --- a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.cxx @@ -17,8 +17,7 @@ #include "Framework/DataProcessingStates.h" #include "InspectorHelpers.h" #include "PaletteHelpers.h" -#include "Framework/Logger.h" -#include +#include "FrameworkGUIDataRelayerUsage.h" #include #include @@ -27,11 +26,11 @@ static inline ImVec2 operator-(const ImVec2& lhs, const ImVec2& rhs) { return Im namespace o2::framework::gui { - // This is to display the information in the data relayer struct HeatMapHelper { template - static void draw(const char* name, + static void draw(const char* /*name*/, + int& v, ImVec2 const& sizeHint, std::function const& getNumInputs, std::function const& getNumRecords, @@ -42,21 +41,69 @@ struct HeatMapHelper { std::function const& getColor, std::function const& describeCell) { - ImVec2 size = ImVec2(sizeHint.x, std::min(sizeHint.y, 16.f * getNumItems(0) + 2)); - ImU32 BORDER_COLOR = ImColor(200, 200, 200, 255); - ImU32 BACKGROUND_COLOR = ImColor(20, 20, 20, 255); + float padding = 1; + // add slider to scroll between the grid display windows + size_t nw = getNumRecords() / WND; + ImGui::PushItemWidth(sizeHint.x); + ImGui::SliderInt("##window", &v, 1, nw, "wnd: %d", ImGuiSliderFlags_AlwaysClamp); + ImVec2 sliderMin = ImGui::GetItemRectMin(); + constexpr float MAX_BOX_X_SIZE = 16.f; constexpr float MAX_BOX_Y_SIZE = 16.f; + + ImVec2 size = ImVec2(sizeHint.x, std::min(sizeHint.y, MAX_BOX_Y_SIZE * getNumItems(0) + 2)); + ImU32 BORDER_COLOR = ImColor(200, 200, 200, 255); + ImU32 BACKGROUND_COLOR = ImColor(20, 20, 20, 255); + ImU32 BORDER_COLOR_A = ImColor(200, 200, 200, 0); + ImU32 BACKGROUND_COLOR_A = ImColor(0, 0, 0, 0); + ImDrawList* drawList = ImGui::GetWindowDrawList(); - ImVec2 winPos = ImGui::GetCursorScreenPos() + ImVec2{0, 7}; - auto records = getNumRecords(); - auto boxSizeX = std::min(size.x / records, MAX_BOX_X_SIZE); - auto numInputs = getNumInputs(); + ImVec2 winPos = sliderMin; + // overlay activity indicator on the slider + auto xsz = size.x / nw; + drawList->AddRectFilled( + ImVec2{0., 0.} + winPos, + ImVec2{size.x, size.y} + winPos, + BACKGROUND_COLOR_A); + drawList->AddRect( + ImVec2{0. - 1, -1} + winPos, + ImVec2{size.x + 1, size.y - 1} + winPos, + BORDER_COLOR_A); + + const static auto colorA = ImColor(ImVec4{0.945, 0.096, 0.278, 0.5}); + const static auto colorE = ImColor(ImVec4{0, 0, 0, 0}); + + drawList->PrimReserve(nw * 6, nw * 4); + for (size_t iw = 0; iw < nw; ++iw) { + ImVec2 xOffset{iw * xsz + 2 * padding, 0}; + ImVec2 xSize{xsz - 2 * padding, 0}; + ImVec2 yOffset{0, 2 * padding}; + ImVec2 ySize{0, 16 - 4 * padding}; + bool active = 0; + for (size_t ir = iw; ir < ((iw + WND > getNumRecords()) ? getNumRecords() : iw + WND); ++ir) { + for (size_t i = 0; i < getNumItems(ir); ++i) { + active = getValue(*getItem(ir, i)) > 0; + if (active) { + break; + } + } + } + drawList->PrimRect( + xOffset + yOffset + winPos, + xOffset + xSize + yOffset + ySize + winPos, + active ? colorA : colorE); + } + + // display the grid + size_t recordsWindow = v * WND; + auto boxSizeX = std::min(size.x / WND, MAX_BOX_X_SIZE); + auto numInputs = getNumInputs(); + winPos = ImGui::GetCursorScreenPos() + ImVec2{0, 7}; ImGui::InvisibleButton("sensible area", ImVec2(size.x, size.y)); if (ImGui::IsItemHovered()) { auto pos = ImGui::GetMousePos() - winPos; - auto slot = std::lround(std::trunc(pos.x / size.x * records)); + auto slot = (v - 1) * WND + std::lround(std::trunc(pos.x / size.x * WND)); auto row = std::lround(std::trunc(pos.y / size.y * numInputs)); describeCell(row, slot); } @@ -69,21 +116,21 @@ struct HeatMapHelper { ImVec2(0. - 1, -1) + winPos, ImVec2{size.x + 1, size.y - 1} + winPos, BORDER_COLOR); - float padding = 1; size_t totalRects = 0; - for (size_t ri = 0, re = getNumRecords(); ri < re; ri++) { + for (size_t ri = (v - 1) * WND; ri < recordsWindow; ri++) { auto record = getRecord(ri); totalRects += getNumItems(record); } drawList->PrimReserve(totalRects * 6, totalRects * 4); - for (size_t ri = 0, re = getNumRecords(); ri < re; ri++) { + for (size_t ri = (v - 1) * WND; ri < recordsWindow; ri++) { auto record = getRecord(ri); - ImVec2 xOffset{(ri * boxSizeX) + padding, 0}; + ImVec2 xOffset{((ri - (v - 1) * WND) * boxSizeX) + padding, 0}; ImVec2 xSize{boxSizeX - 2 * padding, 0}; - auto boxSizeY = std::min(size.y / getNumItems(record), MAX_BOX_Y_SIZE); - for (size_t mi = 0, me = getNumItems(record); mi < me; mi++) { + auto me = getNumItems(record); + auto boxSizeY = std::min(size.y / me, MAX_BOX_Y_SIZE); + for (size_t mi = 0; mi < me; mi++) { ImVec2 yOffSet{0, (mi * boxSizeY) + padding}; ImVec2 ySize{0, boxSizeY - 2 * padding}; @@ -98,11 +145,12 @@ struct HeatMapHelper { } }; -void displayDataRelayer(DeviceMetricsInfo const& metrics, - DeviceInfo const& info, +void displayDataRelayer(DeviceMetricsInfo const& /*metrics*/, + DeviceInfo const& /*info*/, DeviceSpec const& spec, DataProcessingStates const& states, - ImVec2 const& size) + ImVec2 const& size, + int& v) { auto getNumInputs = [&states]() -> size_t { auto& inputsView = states.statesViews[(int)ProcessingStateId::DATA_QUERIES]; @@ -146,7 +194,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, } char const* const beginData = strchr(buffer + view.first, ' ') + 1; // Protect against buffer overflows - if (view.size <= beginData - buffer + i - view.first) { + if ((size_t)view.size <= beginData - buffer + i - view.first) { return &error; } return (int8_t const*)beginData + i; }; @@ -184,7 +232,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, if ((end - input) == 0) { continue; } - if (i == row) { + if (i == (size_t)row) { ImGui::Text("%d %.*s (%s)", row, int(end - input), input, InspectorHelpers::getLifeTimeStr(spec.inputs[i].matcher.lifetime).c_str()); break; } @@ -226,6 +274,7 @@ void displayDataRelayer(DeviceMetricsInfo const& metrics, if (getNumRecords()) { HeatMapHelper::draw("DataRelayer", + v, size, getNumInputs, getNumRecords, diff --git a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h index 8c4941474d8a7..8bea06829f0dc 100644 --- a/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h +++ b/Framework/GUISupport/src/FrameworkGUIDataRelayerUsage.h @@ -9,6 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "Framework/DeviceSpec.h" class ImVec2; namespace o2::framework @@ -19,9 +20,9 @@ class DataProcessingStates; namespace gui { - +static constexpr int WND = 16; /// View of the DataRelayer metrics for a given DeviceInfo -void displayDataRelayer(DeviceMetricsInfo const& metrics, DeviceInfo const& info, DeviceSpec const& spec, DataProcessingStates const&, ImVec2 const& size); +void displayDataRelayer(DeviceMetricsInfo const& metrics, DeviceInfo const& info, DeviceSpec const& spec, DataProcessingStates const&, ImVec2 const& size, int& v); } // namespace gui } // namespace o2::framework diff --git a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx index 89126cf303a66..1c4ddd7e6aabf 100644 --- a/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx +++ b/Framework/GUISupport/src/FrameworkGUIDevicesGraph.cxx @@ -713,7 +713,8 @@ void showTopologyNodeGraph(WorkspaceGUIState& state, default: break; } - gui::displayDataRelayer(metricsInfos[node->ID], infos[node->ID], specs[node->ID], allStates[node->ID], ImVec2(140., 90.)); + + gui::displayDataRelayer(metricsInfos[node->ID], infos[node->ID], specs[node->ID], allStates[node->ID], ImVec2(200., 160.), controls[node->ID].firstWnd); ImGui::EndGroup(); // Save the size of what we have emitted and whether any of the widgets are being used From 1c4e4a6be81b8b9762db927ebfdf5c636cda23ed Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 27 Mar 2025 16:51:56 +0100 Subject: [PATCH 0304/1914] DPL: allow searching for plugins in executables as well (#14118) --- Framework/Core/include/Framework/PluginManager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/PluginManager.h b/Framework/Core/include/Framework/PluginManager.h index d6b16f01ad713..96281942e667d 100644 --- a/Framework/Core/include/Framework/PluginManager.h +++ b/Framework/Core/include/Framework/PluginManager.h @@ -87,7 +87,7 @@ struct PluginManager { #else auto libraryName = fmt::format("lib{}.so", loadablePlugin.library); #endif - auto ret = uv_dlopen(libraryName.c_str(), &handle); + auto ret = uv_dlopen(loadablePlugin.library.empty() ? nullptr : libraryName.c_str(), &handle); if (ret != 0) { LOGP(error, "Could not load library {}", loadablePlugin.library); LOG(error) << uv_dlerror(&handle); From 243279af0b17985227f1f3d0aa80794c1549122b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 11:12:37 +0100 Subject: [PATCH 0305/1914] GPU Standalone: unify duplicate denormal code in standalone benchmark --- .../Standalone/Benchmark/standalone.cxx | 22 ---------- GPU/GPUTracking/utils/qmaths_helpers.h | 41 +++++++++++-------- 2 files changed, 25 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index e9cb7c5179c59..2e3e4725bd6aa 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -53,9 +53,6 @@ #include "GPUTPCGMMergedTrack.h" #include "GPUSettings.h" #include -#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE -#include -#endif #include "GPUO2DataTypes.h" #include "GPUChainITS.h" @@ -85,23 +82,6 @@ std::atomic nIteration, nIterationEnd; std::vector ioPtrEvents; std::vector ioMemEvents; -void SetCPUAndOSSettings() -{ -#if not(defined(__ARM_NEON) or defined(__aarch64__)) // ARM doesn't have SSE -#ifdef FE_DFL_DISABLE_SSE_DENORMS_ENV // Flush and load denormals to zero in any case - fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); -#else -#ifndef _MM_FLUSH_ZERO_ON -#define _MM_FLUSH_ZERO_ON 0x8000 -#endif -#ifndef _MM_DENORMALS_ZERO_ON -#define _MM_DENORMALS_ZERO_ON 0x0040 -#endif - _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON)); -#endif -#endif // ARM -} - int32_t ReadConfiguration(int argc, char** argv) { int32_t qcRet = qConfigParse(argc, (const char**)argv); @@ -740,8 +720,6 @@ int32_t main(int argc, char** argv) { std::unique_ptr recUnique, recUniqueAsync, recUniquePipeline; - SetCPUAndOSSettings(); - if (ReadConfiguration(argc, argv)) { return 1; } diff --git a/GPU/GPUTracking/utils/qmaths_helpers.h b/GPU/GPUTracking/utils/qmaths_helpers.h index 9c5f704180aaa..5eb3ce4fb2483 100644 --- a/GPU/GPUTracking/utils/qmaths_helpers.h +++ b/GPU/GPUTracking/utils/qmaths_helpers.h @@ -15,24 +15,33 @@ #ifndef QMATH_HELPERS_H #define QMATH_HELPERS_H -#if defined __has_include -#if __has_include() && __has_include() -#include -#include -#if defined(_MM_FLUSH_ZERO_OFF) && defined(_MM_DENORMALS_ZERO_ON) +#if !(defined(__ARM_NEON) || defined(__aarch64__)) && __has_include() // clang-format off + #include + #if __has_include() + #include + #endif +#elif __has_include() + #include +#endif + static void disable_denormals() { - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#if !(defined(__ARM_NEON) || defined(__aarch64__)) && __has_include() // clang-format off + #if defined(_MM_FLUSH_ZERO_OFF) && defined(_MM_DENORMALS_ZERO_ON) + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); + #else + #ifndef _MM_FLUSH_ZERO_ON + #define _MM_FLUSH_ZERO_ON 0x8000 + #endif + #ifndef _MM_DENORMALS_ZERO_ON + #define _MM_DENORMALS_ZERO_ON 0x0040 + #endif + _mm_setcsr(_mm_getcsr() | (_MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON)); + #endif +#elif __has_include() && defined(FE_DFL_DISABLE_SSE_DENORMS_ENV) + fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV); +#endif // clang-format on } -#define XMM_HAS_DENORMAL_DEACTIVATE -#endif -#endif -#endif -#ifdef XMM_HAS_DENORMAL_DEACTIVATE -#undef XMM_HAS_DENORMAL_DEACTIVATE -#else -static void disable_denormals() {} -#endif #endif From 6d54cfc154ca9370a30dc5f212e9e40fd972fde1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 26 Mar 2025 22:18:27 +0100 Subject: [PATCH 0306/1914] GPU: Make launch bounds of GPUDefParamters available as C++ struct at runtime --- GPU/Common/GPUCommonDef.h | 2 +- GPU/GPUTracking/Base/GPUConstantMem.h | 4 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 22 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 7 +- .../Base/GPUReconstructionKernelMacros.h | 27 +- .../Base/GPUReconstructionKernels.h | 12 +- .../Base/GPUReconstructionProcessing.cxx | 24 ++ .../Base/GPUReconstructionProcessing.h | 9 +- .../Base/cuda/GPUReconstructionCUDA.cu | 17 +- .../Base/cuda/GPUReconstructionCUDA.h | 2 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 5 +- .../Base/opencl/GPUReconstructionOCL.cxx | 4 + .../Base/opencl/GPUReconstructionOCL.h | 2 - .../opencl/GPUReconstructionOCLKernels.cxx | 4 +- GPU/GPUTracking/CMakeLists.txt | 12 +- GPU/GPUTracking/Definitions/GPUDef.h | 2 +- .../Definitions/GPUDefConstantsAndSettings.h | 4 +- .../Definitions/GPUDefParameters.template.h | 27 ++ ...Parameters.h => GPUDefParametersDefault.h} | 332 ++++++++++++++++-- .../GPUDefParametersLoad.template.inc | 56 +++ GPU/GPUTracking/Standalone/CMakeLists.txt | 7 +- .../cmake/GPUNoFastMathKernels.template.h | 5 + GPU/GPUTracking/kernels.cmake | 4 +- dependencies/FindO2GPU.cmake | 1 - 24 files changed, 488 insertions(+), 103 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParameters.template.h rename GPU/GPUTracking/Definitions/{GPUDefGPUParameters.h => GPUDefParametersDefault.h} (69%) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index 78da104a0c029..d7e99f53d4ce8 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -58,7 +58,7 @@ #if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY)) #define GPUCA_NO_CONSTANT_MEMORY -#elif defined(__CUDACC__) || defined(__HIPCC__) +#elif (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(GPUCA_GPUCODE_HOSTONLY) #define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM #endif diff --git a/GPU/GPUTracking/Base/GPUConstantMem.h b/GPU/GPUTracking/Base/GPUConstantMem.h index e0b06f0a3ea55..532c270431d99 100644 --- a/GPU/GPUTracking/Base/GPUConstantMem.h +++ b/GPU/GPUTracking/Base/GPUConstantMem.h @@ -95,7 +95,7 @@ union GPUConstantMemCopyable { static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1); #endif } // namespace o2::gpu -#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) +#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol #endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM namespace o2::gpu @@ -104,7 +104,7 @@ namespace o2::gpu // Must be placed here, to avoid circular header dependency GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const { -#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY) +#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) return &GPUCA_CONSMEM; #else return mConstantMem; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index a8a83fdbd9203..d5404618c32b1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" #include "GPUChain.h" - +#include "GPUDefParameters.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" @@ -120,15 +120,27 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs -krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend() +template +gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { - return krnlProperties{1, 1}; + if (gpu == -1) { + gpu = IsGPU(); + } + const auto num = GetKernelNum(); + const auto* p = gpu ? mParDevice : mParCPU; + gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; + if (ret.nThreads == 0) { + ret.nThreads = gpu ? mThreadCount : 1u; + } + if (ret.minBlocks == 0) { + ret.minBlocks = 1; + } + return ret; } #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ - template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend(); + template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index fd999ec2304e1..099fed5afacf0 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -38,8 +38,6 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); template void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); }; class GPUReconstructionCPU : public GPUReconstructionKernels @@ -55,10 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels void runKernel(krnlSetup&& setup, Args&&... args); template - const gpu_reconstruction_kernels::krnlProperties getKernelProperties() - { - return getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument()); - } + gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 0b1a501ebc094..b3f6c6ec817fd 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -38,21 +38,18 @@ #ifndef GPUCA_KRNL_CUSTOM #define GPUCA_KRNL_CUSTOM(...) #endif -#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__) -#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) -#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_(MODE, ...) -#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) -#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__) +#define GPUCA_ATTRRES_REG(reg, num, ...) GPUCA_M_EXPAND(GPUCA_KRNL_REG)(num) GPUCA_ATTRRES_XREG (__VA_ARGS__) +#define GPUCA_ATTRRES_CUSTOM(custom, args, ...) GPUCA_M_EXPAND(GPUCA_KRNL_CUSTOM)(args) GPUCA_ATTRRES_XCUSTOM(__VA_ARGS__) +#define GPUCA_ATTRRES_NONE(none, ...) GPUCA_ATTRRES_XNONE(__VA_ARGS__) +#define GPUCA_ATTRRES_(...) +#define GPUCA_ATTRRES_XNONE(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES_XCUSTOM(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES_XREG(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) +#define GPUCA_ATTRRES(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__) // GPU Kernel entry point #define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ - GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) + GPUg() void GPUCA_ATTRRES(GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) #ifdef GPUCA_KRNL_DEFONLY #define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); @@ -79,12 +76,6 @@ } \ }; -#define GPUCA_KRNL_PROP(x_class, x_attributes) \ - template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend() { \ - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \ - return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ - } - #endif // GPUCA_GPUCODE #define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index b8f3e3746c743..f3d52da8b5613 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -95,14 +95,10 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - virtual void runKernelImpl(const krnlSetupArgs& args) \ - { \ - T::template runKernelBackend(args); \ - } \ - virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument) \ - { \ - return T::template getKernelPropertiesBackend(); \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ + virtual void runKernelImpl(const krnlSetupArgs& args) \ + { \ + T::template runKernelBackend(args); \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index bae95ac8d3f38..58df7f01823dc 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -12,11 +12,35 @@ /// \file GPUReconstructionProcessing.cxx /// \author David Rohr +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" + #include "GPUReconstructionProcessing.h" #include "GPUReconstructionThreading.h" using namespace o2::gpu; +GPUReconstructionProcessing::GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) +{ + if (mMaster == nullptr) { + mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad()); + mParDevice = new GPUDefParameters(); + } else { + GPUReconstructionProcessing* master = dynamic_cast(mMaster); + mParCPU = master->mParCPU; + mParDevice = master->mParDevice; + } +} + +GPUReconstructionProcessing::~GPUReconstructionProcessing() +{ + if (mMaster == nullptr) { + delete mParCPU; + delete mParDevice; + } +} + int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) { int32_t nThreads = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index b0466efceac24..f539c91b90a6e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -25,6 +25,8 @@ namespace o2::gpu { +struct GPUDefParameters; + namespace gpu_reconstruction_kernels { struct deviceEvent { @@ -63,7 +65,7 @@ class threadContext class GPUReconstructionProcessing : public GPUReconstruction { public: - ~GPUReconstructionProcessing() override = default; + ~GPUReconstructionProcessing() override; // Threading int32_t getNKernelHostThreads(bool splitCores); @@ -101,7 +103,7 @@ class GPUReconstructionProcessing : public GPUReconstruction }; protected: - GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {} + GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); using deviceEvent = gpu_reconstruction_kernels::deviceEvent; static const std::vector mKernelNames; @@ -132,6 +134,9 @@ class GPUReconstructionProcessing : public GPUReconstruction template HighResTimer& getTimer(const char* name, int32_t num = -1); + GPUDefParameters* mParCPU = nullptr; + GPUDefParameters* mParDevice = nullptr; + private: uint32_t getNextTimerId(); timerMeta* getTimerById(uint32_t id, bool increment = true); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index f87d5c8189cdc..8790d7718f517 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -13,8 +13,13 @@ /// \author David Rohr #define GPUCA_GPUCODE_HOSTONLY -#include "GPUReconstructionCUDAIncludesHost.h" +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS +#include "GPUReconstructionCUDADef.h" +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" + +#include "GPUReconstructionCUDAIncludesHost.h" #include #include "GPUReconstructionCUDA.h" @@ -51,11 +56,14 @@ GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDevi { if (mMaster == nullptr) { mInternals = new GPUReconstructionCUDAInternals; + *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } + mDeviceBackendSettings.deviceType = DeviceType::CUDA; } GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() { + Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA if (mMaster == nullptr) { delete mInternals; } @@ -69,7 +77,6 @@ int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, con GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) { - mDeviceBackendSettings.deviceType = DeviceType::CUDA; #ifndef __HIPCC__ // CUDA mRtcSrcExtension = ".cu"; mRtcBinExtension = ".fatbin"; @@ -78,11 +85,7 @@ GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg mRtcBinExtension = ".o"; #endif } - -GPUReconstructionCUDA::~GPUReconstructionCUDA() -{ - Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA -} +GPUReconstructionCUDA::~GPUReconstructionCUDA() {} GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index a98b14a873ca0..ac5920f769f25 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -44,8 +44,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase void runKernelBackend(const krnlSetupArgs& args); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); void getRTCKernelCalls(std::vector& kernels); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 4b3f8a767226c..2596d0e19ec48 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -83,16 +83,13 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e724f0f2cbfcd..7310b8b6041a9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -12,7 +12,10 @@ /// \file GPUReconstructionOCL.cxx /// \author David Rohr +#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUDefParametersDefault.h" +#include "GPUDefParametersLoad.inc" #include @@ -36,6 +39,7 @@ GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDevice { if (mMaster == nullptr) { mInternals = new GPUReconstructionOCLInternals; + *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } mDeviceBackendSettings.deviceType = DeviceType::OCL; } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 29951cd43f167..16ef9b5e87fe8 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -58,8 +58,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t AddKernel(); template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - template - gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend(); GPUReconstructionOCLInternals* mInternals; float mOclVersion; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index fff69038c056f..f71336ac35e0e 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -91,8 +91,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_PROP(x_class, x_attributes) \ - template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 39218e9f94527..33715909e810c 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -129,7 +129,7 @@ set(HDRS_INSTALL DataTypes/GPUTriggerOutputs.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h - Definitions/GPUDefGPUParameters.h + Definitions/GPUDefParametersDefault.h Definitions/GPUDef.h Definitions/GPUDefMacros.h Definitions/GPULogging.h @@ -247,10 +247,18 @@ file(GENERATE OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h INPUT cmake/GPUNoFastMathKernels.template.h ) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUDefParameters.h + INPUT Definitions/GPUDefParameters.template.h +) +file(GENERATE + OUTPUT include_gpu_onthefly/GPUDefParametersLoad.inc + INPUT Definitions/GPUDefParametersLoad.template.inc +) if(NOT ALIGPU_BUILD_TYPE STREQUAL "O2") include_directories(${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) endif() -set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h) +set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h) include(kernels.cmake) # Optional sources depending on optional dependencies diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index f01e3e6d38332..404f35f971c94 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" -#include "GPUDefGPUParameters.h" +#include "GPUDefParametersDefault.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index f18390629f2bc..2d7aca8d71b92 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -13,7 +13,7 @@ /// \author David Rohr // This files contains compile-time constants affecting the GPU algorithms / reconstruction results. -// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefGPUParameters.h +// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h #ifndef GPUDEFCONSTANTSANDSETTINGS_H #define GPUDEFCONSTANTSANDSETTINGS_H @@ -66,7 +66,7 @@ #endif #endif -//#define GPUCA_MERGER_BY_MC_LABEL // Use MC labels for TPC track merging - for performance studies +//#define GPUCA_MERGER_BY_MC_LABEL // Use MC labels for TPC track merging - for performance studies // TODO: Cleanup unneeded options //#define GPUCA_FULL_CLUSTERDATA // Store all cluster information in the cluster data, also those not needed for tracking. //#define GPUCA_TPC_RAW_PROPAGATE_PAD_ROW_TIME // Propagate Pad, Row, Time cluster information to GM //#define GPUCA_GM_USE_FULL_FIELD // Use offline magnetic field during GMPropagator prolongation diff --git a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h b/GPU/GPUTracking/Definitions/GPUDefParameters.template.h new file mode 100644 index 0000000000000..731cb76b89193 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParameters.template.h @@ -0,0 +1,27 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParameters.h +/// \author David Rohr + +#ifndef GPUDEFPARAMETERS_H +#define GPUDEFPARAMETERS_H + +namespace o2::gpu +{ +struct GPUDefParameters { // clang-format off + int32_t par_LB_maxThreads[$>] = {}; + int32_t par_LB_minBlocks[$>] = {}; + int32_t par_LB_forceBlocks[$>] = {}; +}; // clang-format on +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h similarity index 69% rename from GPU/GPUTracking/Definitions/GPUDefGPUParameters.h rename to GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 910907368e891..7051fff2f177d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefGPUParameters.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefGPUParameters.h +/// \file GPUDefParametersDefault.h /// \author David Rohr // This files contains compile-time constants affecting the GPU performance. @@ -17,14 +17,10 @@ // This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc. // Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h -#ifndef GPUDEFGPUPARAMETERS_H -#define GPUDEFGPUPARAMETERS_H +#ifndef GPUDEFPARAMETERSDEFAULT_H +#define GPUDEFPARAMETERSDEFAULT_H // clang-format off -#ifndef GPUDEF_H -#error Please include GPUDef.h -#endif - #include "GPUCommonDef.h" #include "GPUDefMacros.h" @@ -282,7 +278,7 @@ #endif // GPUCA_GPUCODE #ifdef GPUCA_GPUCODE - // Default settings, if not already set for selected GPU type + // Default settings for GPU, if not already set for selected GPU type #ifndef GPUCA_THREAD_COUNT #define GPUCA_THREAD_COUNT 256 #endif @@ -334,10 +330,10 @@ #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 256 #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 256 #endif - #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 256 #endif #ifndef GPUCA_LB_GPUTPCCFDecodeZS @@ -487,10 +483,15 @@ #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 #endif + #ifndef GPUCA_LB_GPUMemClean16 + #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1 + #endif + #ifndef GPUCA_LB_GPUitoa + #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 + #endif #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) #else - // The following defaults are needed to compile the host code - #define GPUCA_GET_THREAD_COUNT(...) 1 + #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. #endif #define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) @@ -523,33 +524,33 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER #if defined(__CUDACC__) || defined(__HIPCC__) -#define GPUCA_SPECIALIZE_THRUST_SORTS + #define GPUCA_SPECIALIZE_THRUST_SORTS #endif #ifndef GPUCA_NEIGHBORSFINDER_REGS -#define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 + #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 #endif #ifdef GPUCA_GPUCODE #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 #endif #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 + #define GPUCA_ALTERNATE_BORDER_SORT 0 #endif #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 + #define GPUCA_SORT_BEFORE_FIT 0 #endif #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 #endif #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 + #define GPUCA_COMP_GATHER_KERNEL 0 #endif #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 + #define GPUCA_COMP_GATHER_MODE 2 #endif #else #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 @@ -562,20 +563,20 @@ #define GPUCA_COMP_GATHER_MODE 0 #endif #ifndef GPUCA_DEDX_STORAGE_TYPE -#define GPUCA_DEDX_STORAGE_TYPE float + #define GPUCA_DEDX_STORAGE_TYPE float #endif #ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float #endif #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) #ifndef GPUCA_WARP_SIZE -#ifdef GPUCA_GPUCODE -#define GPUCA_WARP_SIZE 32 -#else -#define GPUCA_WARP_SIZE 1 -#endif + #ifdef GPUCA_GPUCODE + #define GPUCA_WARP_SIZE 32 + #else + #define GPUCA_WARP_SIZE 1 + #endif #endif #define GPUCA_MAX_THREADS 1024 @@ -602,10 +603,10 @@ // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Some assertions to make sure out parameters are not invalid -static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); -static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); -#ifdef GPUCA_GPUCODE +// Some assertions to make sure the parameters are not invalid +#if defined(GPUCA_GPUCODE) + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); #endif @@ -621,5 +622,270 @@ static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid #define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) #define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT +#ifdef GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS + // Invalid default values, must not be used, but needed for now to make the GPUDefParametersLoad() happy // TOCO: cleanup + #ifndef GPUCA_LB_GPUTPCCreateTrackingData + #define GPUCA_LB_GPUTPCCreateTrackingData 0 + #endif + #ifndef GPUCA_LB_GPUTPCTrackletConstructor + #define GPUCA_LB_GPUTPCTrackletConstructor 0 + #endif + #ifndef GPUCA_LB_GPUTPCTrackletSelector + #define GPUCA_LB_GPUTPCTrackletSelector 0 + #endif + #ifndef GPUCA_LB_GPUTPCNeighboursFinder + #define GPUCA_LB_GPUTPCNeighboursFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCNeighboursCleaner + #define GPUCA_LB_GPUTPCNeighboursCleaner 0 + #endif + #ifndef GPUCA_LB_GPUTPCExtrapolationTracking + #define GPUCA_LB_GPUTPCExtrapolationTracking 0 + #endif + #ifndef GPUCA_LB_GPUTRDTrackerKernels_gpuVersion + #define GPUCA_LB_GPUTRDTrackerKernels_gpuVersion 0 + #endif + #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fill + #define GPUCA_LB_GPUTPCCreateOccupancyMap_fill 0 + #endif + #ifndef GPUCA_LB_GPUTPCCreateOccupancyMap_fold + #define GPUCA_LB_GPUTPCCreateOccupancyMap_fold 0 + #endif + #ifndef GPUCA_LB_GPUTRDTrackerKernels_o2Version + #define GPUCA_LB_GPUTRDTrackerKernels_o2Version 0 + #endif + #ifndef GPUCA_LB_GPUTPCConvertKernel + #define GPUCA_LB_GPUTPCConvertKernel 0 + #endif + #ifndef GPUCA_LB_GPUTPCCompressionKernels_step0attached + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 0 + #endif + #ifndef GPUCA_LB_GPUTPCCompressionKernels_step1unattached + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step0attached + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionKernels_step1unattached + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_sortPerSectorRow 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_countFilteredClusters 0 + #endif + #ifndef GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters + #define GPUCA_LB_GPUTPCDecompressionUtilKernels_storeFilteredClusters 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZS + #define GPUCA_LB_GPUTPCCFDecodeZS 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZSLink + #define GPUCA_LB_GPUTPCCFDecodeZSLink 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDecodeZSDenseLink + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFGather + #define GPUCA_LB_GPUTPCCFGather 0 + #endif + #ifndef GPUCA_LB_COMPRESSION_GATHER + #define GPUCA_LB_COMPRESSION_GATHER 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerTrackFit + #define GPUCA_LB_GPUTPCGMMergerTrackFit 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFollowLoopers + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSectorRefit + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackResetIds + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackGlobal + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step0 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step1 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step2 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step3 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerResolve_step4 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerClearLinks + #define GPUCA_LB_GPUTPCGMMergerClearLinks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeCE + #define GPUCA_LB_GPUTPCGMMergerMergeCE 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerCollect + #define GPUCA_LB_GPUTPCGMMergerCollect 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksPrepare + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step0 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step1 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerFinalize_step2 + #define GPUCA_LB_GPUTPCGMMergerFinalize_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step0 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 + #define GPUCA_LB_GPUTPCGMMergerMergeLoopers_step2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_prepare + #define GPUCA_LB_GPUTPCGMO2Output_prepare 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_output + #define GPUCA_LB_GPUTPCGMO2Output_output 0 + #endif + #ifndef GPUCA_LB_GPUITSFitterKernels + #define GPUCA_LB_GPUITSFitterKernels 0 + #endif + #ifndef GPUCA_LB_GPUTPCStartHitsFinder + #define GPUCA_LB_GPUTPCStartHitsFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCStartHitsSorter + #define GPUCA_LB_GPUTPCStartHitsSorter 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFCheckPadBaseline + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFPeakFinder + #define GPUCA_LB_GPUTPCCFPeakFinder 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFNoiseSuppression + #define GPUCA_LB_GPUTPCCFNoiseSuppression 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFDeconvolution + #define GPUCA_LB_GPUTPCCFDeconvolution 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFClusterizer + #define GPUCA_LB_GPUTPCCFClusterizer 0 + #endif + #ifndef GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels 0 + #endif + #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU + #define GPUCA_LB_GPUTrackingRefitKernel_mode0asGPU 0 + #endif + #ifndef GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov + #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 0 + #endif + #ifndef GPUCA_LB_GPUMemClean16 + #define GPUCA_LB_GPUMemClean16 0 + #endif + #ifndef GPUCA_LB_GPUitoa + #define GPUCA_LB_GPUitoa 0 + #endif + #ifndef GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers + #define GPUCA_LB_GPUTPCExtrapolationTrackingCopyNumbers 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_hitData 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_startHits 0 + #endif + #ifndef GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks + #define GPUCA_LB_GPUTPCSectorDebugSortKernels_sectorTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_clearIds 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_sectorTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_extrapolatedTracks2 0 + #endif + #ifndef GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks + #define GPUCA_LB_GPUTPCGlobalDebugSortKernels_borderTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber + #define GPUCA_LB_GPUTPCGMMergerUnpackSaveNumber 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step1 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerMergeBorders_variant + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_variant 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracks + #define GPUCA_LB_GPUTPCGMMergerSortTracks 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMMergerSortTracksQPt + #define GPUCA_LB_GPUTPCGMMergerSortTracksQPt 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_sort + #define GPUCA_LB_GPUTPCGMO2Output_sort 0 + #endif + #ifndef GPUCA_LB_GPUTPCGMO2Output_mc + #define GPUCA_LB_GPUTPCGMO2Output_mc 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets + #define GPUCA_LB_GPUTPCCFMCLabelFlattener_setRowOffsets 0 + #endif + #ifndef GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten + #define GPUCA_LB_GPUTPCCFMCLabelFlattener_flatten 0 + #endif +#endif // GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS + // clang-format on -#endif +#endif // GPUDEFPARAMETERSDEFAULT_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc new file mode 100644 index 0000000000000..953750b6f925b --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -0,0 +1,56 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersLoad.inc +/// \author David Rohr + +#include "GPUDefParameters.h" +#include "GPUDefMacros.h" +#include +#include + +namespace o2::gpu::internal +{ + +static GPUDefParameters GPUDefParametersLoad() +{ + return GPUDefParameters{ + // clang-format off + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_LB_>,APPEND,)>,$>}, + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} + // clang-format on + }; +} + +#define GPUCA_EXPORT_KERNEL(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define " GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ + i++; + +static std::string GPUDefParametersExport(const GPUDefParameters& par) +{ + std::stringstream o; // clang-format off + int32_t i = 0; + $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL(>,APPEND,)>, + > + return o.str(); // clang-format on +} + +} // namespace o2::gpu::internal diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index ed4fc5c9f7e2d..dfc8e8db3bc7a 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -62,8 +62,11 @@ if (GPUCA_BUILD_DEBUG_SANITIZE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -shared-libasan") endif() endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings -Wno-vla-cxx-extension") -set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -rdynamic -Wl,--no-undefined") +string(APPEND CMAKE_CXX_FLAGS " -Wno-error -Wall -Wextra -Wshadow -Wno-unused-function -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-unknown-pragmas -Wno-write-strings") +string(APPEND CMAKE_SHARED_LINKER_FLAGS " -rdynamic -Wl,--no-undefined") +if(CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") + string(APPEND CMAKE_CXX_FLAGS " -Wno-vla-cxx-extension") +endif() # Find mandatory packages find_package(TBB REQUIRED) diff --git a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h index dac93277d5ec9..499672bf00b50 100644 --- a/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h +++ b/GPU/GPUTracking/cmake/GPUNoFastMathKernels.template.h @@ -12,6 +12,9 @@ /// \file GPUNoFastMathKernels.h /// \author David Rohr +#ifndef GPUNOFASTMATHKERNELS_H +#define GPUNOFASTMATHKERNELS_H + #include #include @@ -21,3 +24,5 @@ namespace o2::gpu::internal static const std::unordered_set noFastMathKernels = {$>,APPEND,">,PREPEND,">,$ >}; // clang-format on } // namespace o2::gpu::internal + +#endif diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index ad348a84264f0..6d8b49116accc 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -37,8 +37,8 @@ o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRAC o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" "NO_REG, (GPUCA_THREAD_COUNT, 1)" int32_t* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index bbbb420354fae..95db55041184f 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -82,7 +82,6 @@ elseif(NOT GPUCA_DETERMINISTIC_MODE MATCHES "^[0-9]+$") message(FATAL_ERROR "Invalid setting ${GPUCA_DETERMINISTIC_MODE} for GPUCA_DETERMINISTIC_MODE") endif() set(GPUCA_DETERMINISTIC_MODE ${GPUCA_DETERMINISTIC_MODE_MAP_${GPUCA_DETERMINISTIC_MODE}}) - message(STATUS "Set to ${GPUCA_DETERMINISTIC_MODE}") endif() if (CMAKE_SYSTEM_NAME MATCHES Darwin OR NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") set(GPUCA_CXX_DENORMALS_FLAGS "") From a0e63ef302252475223531d74e7871dd942c951d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 15:52:06 +0100 Subject: [PATCH 0307/1914] GPU TPC: Drop unused TrackletConstructor_allSectors kernel --- .../Definitions/GPUDefParametersDefault.h | 2 - .../Global/GPUChainTrackingSectorTracker.cxx | 2 +- .../GPUTPCTrackletConstructor.cxx | 75 +------------------ .../SectorTracker/GPUTPCTrackletConstructor.h | 11 +-- GPU/GPUTracking/kernels.cmake | 3 +- 5 files changed, 4 insertions(+), 89 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 7051fff2f177d..1193731acd9cf 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -515,8 +515,6 @@ #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN -#define GPUCA_LB_GPUTPCTrackletConstructor_singleSector GPUCA_LB_GPUTPCTrackletConstructor -#define GPUCA_LB_GPUTPCTrackletConstructor_allSectors GPUCA_LB_GPUTPCTrackletConstructor #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index e161f74a31032..3e7447892307a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -105,7 +105,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); // Initialize Startup Constants - processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; + processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 5a7df0ba8b874..71df683eee1dc 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -476,7 +476,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::DoTracklet(GPUconstantref() GPUTPCT } template <> -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) +GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker) { if (get_local_id(0) == 0) { sMem.mNStartHits = *tracker.NStartHits(); @@ -491,79 +491,6 @@ GPUdii() void GPUTPCTrackletConstructor::Thread -GPUdii() void GPUTPCTrackletConstructor::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& sMem, processorType& GPUrestrict() tracker0) -{ - GPUconstantref() GPUTPCTracker* GPUrestrict() pTracker = &tracker0; -#ifdef GPUCA_GPUCODE - int32_t mySector = get_group_id(0) % GPUCA_NSECTORS; - int32_t currentSector = -1; - - if (get_local_id(0) == 0) { - sMem.mNextStartHitFirstRun = 1; - } - GPUCA_UNROLL(, U()) - for (uint32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { - GPUconstantref() GPUTPCTracker& GPUrestrict() tracker = pTracker[mySector]; - - GPUTPCThreadMemory rMem; - - while ((rMem.mISH = FetchTracklet(tracker, sMem)) != -2) { - if (rMem.mISH >= 0 && get_local_id(0) < GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor)) { - rMem.mISH += get_local_id(0); - } else { - rMem.mISH = -1; - } - - if (mySector != currentSector) { - if (get_local_id(0) == 0) { - sMem.mNStartHits = *tracker.NStartHits(); - } - CA_SHARED_CACHE(&sMem.mRows[0], tracker.TrackingDataRows(), GPUCA_ROW_COUNT * sizeof(GPUTPCRow)); - GPUbarrier(); - currentSector = mySector; - } - - if (rMem.mISH >= 0 && rMem.mISH < sMem.mNStartHits) { - rMem.mGo = true; - DoTracklet(tracker, sMem, rMem); - } - } - if (++mySector >= GPUCA_NSECTORS) { - mySector = 0; - } - } -#else - for (int32_t iSector = 0; iSector < GPUCA_NSECTORS; iSector++) { - Thread(nBlocks, nThreads, iBlock, iThread, sMem, pTracker[iSector]); - } -#endif -} - -#ifdef GPUCA_GPUCODE - -GPUd() int32_t GPUTPCTrackletConstructor::FetchTracklet(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUSharedMemory& sMem) -{ - const uint32_t nStartHit = *tracker.NStartHits(); - GPUbarrier(); - if (get_local_id(0) == 0) { - int32_t firstStartHit = -2; - if (sMem.mNextStartHitFirstRun == 1) { - firstStartHit = (get_group_id(0) - tracker.ISector()) / GPUCA_NSECTORS * GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor); - sMem.mNextStartHitFirstRun = 0; - } else { - if (tracker.GPUParameters()->nextStartHit < nStartHit) { - firstStartHit = CAMath::AtomicAdd(&tracker.GPUParameters()->nextStartHit, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletConstructor)); - } - } - sMem.mNextStartHitFirst = firstStartHit < (int32_t)nStartHit ? firstStartHit : -2; - } - GPUbarrier(); - return (sMem.mNextStartHitFirst); -} - -#endif // GPUCA_GPUCODE - template <> // FIXME: GPUgeneric() needed to make the clang spirv output link correctly GPUd() int32_t GPUTPCTrackletConstructor::GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& GPUrestrict() tracker, GPUsharedref() GPUTPCExtrapolationTracking::GPUSharedMemory& sMem, GPUTPCTrackParam& GPUrestrict() tParam, int32_t row, int32_t increment, int32_t iTracklet, calink* rowHits) { diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index 0f8314ee0fad4..af87d0276f1c7 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -28,14 +28,9 @@ namespace o2::gpu */ class GPUTPCTracker; -class GPUTPCTrackletConstructor +class GPUTPCTrackletConstructor : public GPUKernelTemplate { public: - enum K { - singleSector = 0, - allSectors = 1 - }; - class GPUTPCThreadMemory { friend class GPUTPCTrackletConstructor; //! friend class @@ -89,10 +84,6 @@ class GPUTPCTrackletConstructor GPUd() static void DoTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUTPCTrackletConstructor::GPUSharedMemory& sMem, GPUTPCThreadMemory& rMem); -#ifdef GPUCA_GPUCODE - GPUd() static int32_t FetchTracklet(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() GPUSharedMemory& sMem); -#endif // GPUCA_GPUCODE - template GPUd() static int32_t GPUTPCTrackletConstructorExtrapolationTracking(GPUconstantref() GPUTPCTracker& tracker, GPUsharedref() T& sMem, GPUTPCTrackParam& tParam, int32_t startrow, int32_t increment, int32_t iTracklet, calink* rowHits); diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 6d8b49116accc..994f10a516b10 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -34,8 +34,7 @@ o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRAC o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, singleSector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor, allSectors" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) From a5caa277d89ede13c8a8161438a9c1eda1cb14a2 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 17:00:24 +0100 Subject: [PATCH 0308/1914] GPU: Count kernel number in Cmake, and pass to MACROS, to simplify the preprocessor logic --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 2 +- .../Base/GPUReconstructionProcessing.cxx | 44 ++++++------------- .../Base/GPUReconstructionProcessing.h | 2 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 6 +-- .../opencl/GPUReconstructionOCLKernels.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 4 +- 7 files changed, 24 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d5404618c32b1..d714c6833d18d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -138,7 +138,7 @@ gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProper return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 099fed5afacf0..48d6ddf17959a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -72,7 +72,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ if (cpuFallback) { \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index 58df7f01823dc..95a47dec946e6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -143,38 +143,22 @@ std::unique_ptr GPUReconstructionProc gpu_reconstruction_kernels::threadContext::threadContext() = default; gpu_reconstruction_kernels::threadContext::~threadContext() = default; -template -uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k) -{ - static int32_t num = k; - if (num < 0) { - throw std::runtime_error("Internal Error - Kernel Number not Set"); - } - return num; -} - -namespace o2::gpu::internal -{ -static std::vector initKernelNames() -{ - std::vector retVal; -#define GPUCA_KRNL(x_class, ...) \ - GPUReconstructionProcessing::GetKernelNum(retVal.size()); \ - retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class))); +const std::vector GPUReconstructionProcessing::mKernelNames = { +#define GPUCA_KRNL(x_class, ...) GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)), #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL - return retVal; -} -} // namespace o2::gpu::internal - -const std::vector GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames(); - -#define GPUCA_KRNL(x_class, ...) \ - template uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t); \ - template <> \ - const char* GPUReconstructionProcessing::GetKernelName() \ - { \ - return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ +}; + +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + template <> \ + uint32_t GPUReconstructionProcessing::GetKernelNum() \ + { \ + return x_num; \ + } \ + template <> \ + const char* GPUReconstructionProcessing::GetKernelName() \ + { \ + return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \ } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index f539c91b90a6e..2428027118c0a 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -80,7 +80,7 @@ class GPUReconstructionProcessing : public GPUReconstruction static const char* GetKernelName(); const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; } template - static uint32_t GetKernelNum(int32_t k = -1); + static uint32_t GetKernelNum(); // Public queries for timers auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 2596d0e19ec48..ac79dd7576e48 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -83,14 +83,14 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types) \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ template void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args); #ifndef __HIPCC__ // CUDA version diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index f71336ac35e0e..cca634fba65fc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -91,6 +91,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 99699cc72e940..3c1ad9658566b 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -64,7 +64,9 @@ function(o2_gpu_add_kernel kernel_name kernel_files) endif() set(TMP_PRE "") set(TMP_POST "") - set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}))\n") + get_property(LIST_KERNELS TARGET O2_GPU_KERNELS PROPERTY O2_GPU_KERNELS) + list(LENGTH LIST_KERNELS KERNEL_COUNT) + set(TMP_KERNEL "GPUCA_KRNL${TMP_BOUNDS}((${kernel_name}), (${kernel_extra}), (${OPT1}), (${OPT2}), (${OPT3}), ${KERNEL_COUNT})\n") separate_arguments(kernel_files NATIVE_COMMAND ${kernel_files}) list(GET kernel_files 0 TMP_KERNEL_CLASS_FILE) if (TMP_KERNEL_CLASS_FILE STREQUAL "=") From 90f376389dafe933649987a4cdeaf3e03dcc01bf Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 27 Mar 2025 17:02:08 +0100 Subject: [PATCH 0309/1914] GPU: Rewrite virtual kernel call to a single virtual function, should enable further simplifications in the future --- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 6 ++++-- GPU/GPUTracking/Base/GPUReconstructionKernels.h | 16 +++++++++++----- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 48d6ddf17959a..163b00c804d7f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -75,10 +75,12 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ { \ + krnlSetupArgs args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \ + const uint32_t num = GetKernelNum(); \ if (cpuFallback) { \ - GPUReconstructionCPU::runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + GPUReconstructionCPU::runKernelImpl(num, &args); \ } else { \ - runKernelImpl(krnlSetupArgs(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \ + runKernelImpl(num, &args); \ } \ } #include "GPUReconstructionKernelList.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h index f3d52da8b5613..7f500d471de1f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernels.h @@ -95,13 +95,19 @@ class GPUReconstructionKernels : public T template using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \ - virtual void runKernelImpl(const krnlSetupArgs& args) \ - { \ - T::template runKernelBackend(args); \ - } + virtual void runKernelImpl(const int num, const void* args) + { + switch (num) { // clang-format off +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + case x_num: { \ + const auto& args2 = *(const krnlSetupArgs*)args; \ + T::template runKernelBackend(args2); \ + break; \ + } #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL + } // clang-format on + } }; } // namespace o2::gpu From 3f679a66429822f285a152c16ee9959556bdba84 Mon Sep 17 00:00:00 2001 From: Andreas Molander Date: Thu, 20 Mar 2025 10:43:35 +0200 Subject: [PATCH 0310/1914] FIT: Geometry alignment macros - Make misalignment macros usable for creating exact misalignments - Add macro for reading misalignments from CCDB - Add a couple of plots to hit reading marcos (needed to verify misalignments) - Minor change to FV0 geometry to provoding symbolic names of alignable volumes --- Detectors/FIT/FT0/macros/FT0Misaligner.C | 44 ++++++++++------ .../FIT/FV0/base/include/FV0Base/Geometry.h | 12 +++++ Detectors/FIT/FV0/macros/FV0Misaligner.C | 46 ++++++++++++----- Detectors/FIT/FV0/simulation/src/Detector.cxx | 19 +++---- Detectors/FIT/macros/CMakeLists.txt | 4 ++ Detectors/FIT/macros/readAlignParam.C | 51 +++++++++++++++++++ Detectors/FIT/macros/readFT0hits.C | 29 +++++++++-- Detectors/FIT/macros/readFV0hits.C | 37 ++++++++++++-- 8 files changed, 196 insertions(+), 46 deletions(-) create mode 100644 Detectors/FIT/macros/readAlignParam.C diff --git a/Detectors/FIT/FT0/macros/FT0Misaligner.C b/Detectors/FIT/FT0/macros/FT0Misaligner.C index 7585411066934..9621d1a079bc9 100644 --- a/Detectors/FIT/FT0/macros/FT0Misaligner.C +++ b/Detectors/FIT/FT0/macros/FT0Misaligner.C @@ -1,15 +1,30 @@ +// Copyright 2021-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file FT0Misaligner.C +/// \brief ROOT macro for creating an FT0 geometry alignment object. Based on ITSMisaligner.C +/// +/// \author Andreas Molander andreas.molander@cern.ch, Alla Maevskaya + #if !defined(__CLING__) || defined(__ROOTCLING__) -//#define ENABLE_UPGRADES + +#include "CCDB/CcdbApi.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/AlignParam.h" -#include "DetectorsBase/GeometryManager.h" -#include "CCDB/CcdbApi.h" -#include "FT0Base/Geometry.h" -#include + #include #include #include + #endif using AlgPar = std::array; @@ -23,19 +38,15 @@ void FT0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" const std::string& fileName = "FT0Alignment.root") { std::vector params; - o2::base::GeometryManager::loadGeometry("", false); - // auto geom = o2::ft0::Geometry::Instance(); AlgPar pars; bool glo = true; o2::detectors::DetID detFT0("FT0"); - // FT0 detector - //set A side std::string symNameA = "FT0A"; pars = generateMisalignment(xA, yA, zA, psiA, thetaA, phiA); params.emplace_back(symNameA.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); - //set C side + std::string symNameC = "FT0C"; pars = generateMisalignment(xC, yC, zC, psiC, thetaC, phiC); params.emplace_back(symNameC.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); @@ -57,14 +68,15 @@ void FT0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" algFile.Close(); } } + AlgPar generateMisalignment(double x, double y, double z, double psi, double theta, double phi) { AlgPar pars; - pars[0] = gRandom->Gaus(0, x); - pars[1] = gRandom->Gaus(0, y); - pars[2] = gRandom->Gaus(0, z); - pars[3] = gRandom->Gaus(0, psi); - pars[4] = gRandom->Gaus(0, theta); - pars[5] = gRandom->Gaus(0, phi); + pars[0] = x; + pars[1] = y; + pars[2] = z; + pars[3] = psi; + pars[4] = theta; + pars[5] = phi; return std::move(pars); } diff --git a/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h b/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h index 3b50be7441ec2..ec87c07c57c45 100644 --- a/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h +++ b/Detectors/FIT/FV0/base/include/FV0Base/Geometry.h @@ -133,6 +133,16 @@ class Geometry return o2::base::GeometryManager::getPNEntry(getDetID(), index); } + static std::string getDetectorRightSymName() + { + return sDetectorRightName + "_0"; + } + + static std::string getDetectorLeftSymName() + { + return sDetectorLeftName + "_1"; + } + /// Get the density of the PMTs. static constexpr float getPmtDensity() { @@ -143,6 +153,8 @@ class Geometry explicit Geometry(EGeoType initType); inline static const std::string sDetectorName = "FV0"; + inline static const std::string sDetectorRightName = sDetectorName + "RIGHT"; + inline static const std::string sDetectorLeftName = sDetectorName + "LEFT"; // General geometry constants static constexpr float sEpsilon = 0.01; ///< Used to make one spatial dimension infinitesimally larger than other diff --git a/Detectors/FIT/FV0/macros/FV0Misaligner.C b/Detectors/FIT/FV0/macros/FV0Misaligner.C index 500bdaf565965..88f7a0b82b8b3 100644 --- a/Detectors/FIT/FV0/macros/FV0Misaligner.C +++ b/Detectors/FIT/FV0/macros/FV0Misaligner.C @@ -1,13 +1,32 @@ +// Copyright 2021-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file FV0Misaligner.C +/// \brief ROOT macro for creating an FV0 geometry alignment object. The alignment object will align both +/// detector halves in the same way. Based on ITSMisaligner.C +/// +/// \author Andreas Molander andreas.molander@cern.ch, Alla Maevskaya + #if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "CCDB/CcdbApi.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/AlignParam.h" -#include "DetectorsBase/GeometryManager.h" -#include "CCDB/CcdbApi.h" -#include +#include "FV0Base/Geometry.h" + #include #include #include + #endif using AlgPar = std::array; @@ -20,16 +39,14 @@ void FV0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" const std::string& fileName = "FV0Alignment.root") { std::vector params; - o2::base::GeometryManager::loadGeometry("", false); AlgPar pars; bool glo = true; o2::detectors::DetID detFV0("FV0"); - // FV0 detector - for (int ihalf = 1; ihalf < 3; ihalf++) { - std::string symName = Form("FV0half_%i", ihalf); - pars = generateMisalignment(x, y, z, psi, theta, phi); + pars = generateMisalignment(x, y, z, psi, theta, phi); + + for (auto& symName : {o2::fv0::Geometry::getDetectorRightSymName(), o2::fv0::Geometry::getDetectorLeftSymName()}) { params.emplace_back(symName.c_str(), -1, pars[0], pars[1], pars[2], pars[3], pars[4], pars[5], glo); } @@ -50,14 +67,15 @@ void FV0Misaligner(const std::string& ccdbHost = "http://ccdb-test.cern.ch:8080" algFile.Close(); } } + AlgPar generateMisalignment(double x, double y, double z, double psi, double theta, double phi) { AlgPar pars; - pars[0] = gRandom->Gaus(0, x); - pars[1] = gRandom->Gaus(0, y); - pars[2] = gRandom->Gaus(0, z); - pars[3] = gRandom->Gaus(0, psi); - pars[4] = gRandom->Gaus(0, theta); - pars[5] = gRandom->Gaus(0, phi); + pars[0] = x; + pars[1] = y; + pars[2] = z; + pars[3] = psi; + pars[4] = theta; + pars[5] = phi; return std::move(pars); } diff --git a/Detectors/FIT/FV0/simulation/src/Detector.cxx b/Detectors/FIT/FV0/simulation/src/Detector.cxx index 8cf1f5530e93d..07eb9053bf3b8 100644 --- a/Detectors/FIT/FV0/simulation/src/Detector.cxx +++ b/Detectors/FIT/FV0/simulation/src/Detector.cxx @@ -280,6 +280,7 @@ void Detector::ConstructGeometry() // mGeometry->enableComponent(Geometry::eAluminiumContainer, false); mGeometry->buildGeometry(); } + void Detector::addAlignableVolumes() const { // @@ -292,19 +293,19 @@ void Detector::addAlignableVolumes() const LOG(info) << "FV0: Add alignable volumes"; if (!gGeoManager) { - LOG(fatal) << "TGeoManager doesn't exist !"; + LOG(fatal) << "TGeoManager doesn't exist!"; return; } - TString volPath, symName; - for (auto& half : {"RIGHT_0", "LEFT_1"}) { - volPath = Form("/cave_1/barrel_1/FV0_1/FV0%s", half); - symName = Form("FV0%s", half); - LOG(info) << "FV0: Add alignable volume: " << symName << ": " << volPath; - if (!gGeoManager->SetAlignableEntry(symName.Data(), volPath.Data())) { - LOG(fatal) << "FV0: Unable to set alignable entry! " << symName << ": " << volPath; + auto addAlignabelVolume = [](const std::string& volPath, const std::string& symName) -> void { + LOG(info) << "FV0: Add alignable volume: " << symName << " <-> " << volPath; + if (!gGeoManager->SetAlignableEntry(symName.c_str(), volPath.c_str())) { + LOG(fatal) << "FV0: Unable to set alignable entry! " << symName << " <-> " << volPath; } - } + }; + + addAlignabelVolume("/cave_1/barrel_1/FV0_1/FV0RIGHT_0", Geometry::getDetectorRightSymName()); + addAlignabelVolume("/cave_1/barrel_1/FV0_1/FV0LEFT_1", Geometry::getDetectorLeftSymName()); } o2::fv0::Hit* Detector::addHit(Int_t trackId, Int_t cellId, diff --git a/Detectors/FIT/macros/CMakeLists.txt b/Detectors/FIT/macros/CMakeLists.txt index e7debb4184325..a6bf1799a5dde 100644 --- a/Detectors/FIT/macros/CMakeLists.txt +++ b/Detectors/FIT/macros/CMakeLists.txt @@ -45,5 +45,9 @@ o2_add_test_root_macro(compareRecPoints.C O2::DataFormatsFIT LABELS fit) +o2_add_test_root_macro(readAlignParam.C + PUBLIC_LINK_LIBRARIES O2::CCDB + LABELS fit) + o2_data_file(COPY readFITDCSdata.C DESTINATION Detectors/FIT/macros/) o2_data_file(COPY readFITDeadChannelMap.C DESTINATION Detectors/FIT/macros/) \ No newline at end of file diff --git a/Detectors/FIT/macros/readAlignParam.C b/Detectors/FIT/macros/readAlignParam.C new file mode 100644 index 0000000000000..c438e7a0c86a5 --- /dev/null +++ b/Detectors/FIT/macros/readAlignParam.C @@ -0,0 +1,51 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file readAlignParam.C +/// \brief ROOT macro for reading geometry alignment parameters +/// +/// \author Andreas Molander + +#if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "CCDB/BasicCCDBManager.h" +#include "DetectorsCommonDataFormats/AlignParam.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/DetectorNameConf.h" + +#include +#include + +#endif + +int readAlignParam(const std::string& detectorName = "FT0", + long timestamp = -1, + const std::string& ccdbUrl = "https://alice-ccdb.cern.ch") +{ + o2::ccdb::BasicCCDBManager& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); + ccdbManager.setURL(ccdbUrl); + ccdbManager.setTimestamp(timestamp); + + const o2::detectors::DetID detID(detectorName.c_str()); + const std::string alignmentPath = o2::base::DetectorNameConf::getAlignmentPath(detID); + const auto alignments = ccdbManager.get>(alignmentPath); + + if (!alignments) { + std::cerr << "No alignment parameters found at " << alignmentPath << std::endl; + return 1; + } + + for (auto alignment : *alignments) { + alignment.print(); + } + + return 0; +} \ No newline at end of file diff --git a/Detectors/FIT/macros/readFT0hits.C b/Detectors/FIT/macros/readFT0hits.C index 14d25fa4a99a8..fafcaac570311 100644 --- a/Detectors/FIT/macros/readFT0hits.C +++ b/Detectors/FIT/macros/readFT0hits.C @@ -1,13 +1,29 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #if !defined(__CLING__) || defined(__ROOTCLING__) + +#include "DataFormatsFIT/Triggers.h" #include "DataFormatsFT0/Digit.h" #include "DataFormatsFT0/HitType.h" #include "SimulationDataFormat/MCEventHeader.h" #include +#include #include #include #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" +#endif + void readFT0hits() { @@ -24,6 +40,8 @@ void readFT0hits() TH2F* hPel = new TH2F("hPelDig", "N p.e. ", 220, 0, 220, 500, 0, 10000); TH2F* hXYA = new TH2F("hXYA", "X vs Y A side", 400, -20, 20, 400, -20, 20); TH2F* hXYC = new TH2F("hXYC", "X vs Y C side", 400, -20, 20, 400, -20, 20); + TH1F* hZA = new TH1F("hZA", "Z A side", 200, 330, 340); + TH1F* hZC = new TH1F("hZC", "Z C side", 200, -90, -80); gDirectory = cwd; @@ -59,10 +77,13 @@ void readFT0hits() hTimeHitA->Fill(detID, hit_time[detID] - 11.04); hTimeHitC->Fill(detID, hit_time[detID] - 2.91); countE[detID]++; - if (detID < 96) + if (detID < 96) { hXYA->Fill(hit.GetX(), hit.GetY()); - if (detID > 95) + hZA->Fill(hit.GetZ()); + } else { hXYC->Fill(hit.GetX(), hit.GetY()); + hZC->Fill(hit.GetZ()); + } } for (int ii = 0; ii < 220; ii++) { if (countE[ii] > 100) { @@ -82,6 +103,6 @@ void readFT0hits() hMultHit->Write(); hXYA->Write(); hXYC->Write(); - + hZA->Write(); + hZC->Write(); } // end of macro -#endif diff --git a/Detectors/FIT/macros/readFV0hits.C b/Detectors/FIT/macros/readFV0hits.C index 5b0dfa8428dc7..933138fb1434b 100644 --- a/Detectors/FIT/macros/readFV0hits.C +++ b/Detectors/FIT/macros/readFV0hits.C @@ -1,3 +1,14 @@ +// Copyright 2019-2025 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + #if !defined(__CLING__) || defined(__ROOTCLING__) #include @@ -22,6 +33,8 @@ #include "DetectorsCommonDataFormats/DetectorNameConf.h" #include "DetectorsCommonDataFormats/DetID.h" +#endif + void AdjustStatBox(TH1* h, float x1ndc, float x2ndc, float y1ndc, float y2ndc) { gPad->Update(); @@ -54,6 +67,9 @@ void InitHistoNames(std::vector& vhName, std::vector& vPdg) vhName.push_back("hElossDet"); vhName.push_back("hEtotVsR"); vhName.push_back("hEtotVsEloss"); + vhName.push_back("hXY"); + vhName.push_back("hXYzoom"); + vhName.push_back("hZ"); for (UInt_t ipdg = 0; ipdg < vPdg.size(); ipdg++) { std::stringstream ss; @@ -63,7 +79,7 @@ void InitHistoNames(std::vector& vhName, std::vector& vPdg) } } -void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) +void readFV0hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) { using namespace o2::detectors; std::string simFName(o2::base::DetectorNameConf::getHitsFileName(DetID::FV0, simPrefix)); @@ -85,6 +101,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) TH2F* hElossDet = new TH2F(vHistoNames.at(8).c_str(), "", nEl, 0, el1, nCells, 0, nCells); TH2F* hEtotVsR = new TH2F(vHistoNames.at(9).c_str(), "", 30000, 0, 300, 80, 0, 80); TH2F* hEtotVsEloss = new TH2F(vHistoNames.at(10).c_str(), "", 30000, 0, 300, nEl, 0, el1); + TH2F* hXY = new TH2F(vHistoNames.at(11).c_str(), "", 200, -100, 100, 200, -100, 100); + TH2F* hXYzoom = new TH2F(vHistoNames.at(12).c_str(), "", 200, -20, 20, 200, -20, 20); + TH1F* hZ = new TH1F(vHistoNames.at(13).c_str(), "", 200, 315, 325); // Setup histo properties hElossDet->SetXTitle("Energy loss [MeV]"); @@ -96,6 +115,14 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) hEtotVsEloss->SetXTitle("Total energy at entrance [MeV]"); hEtotVsEloss->SetYTitle("Energy loss [MeV]"); hEtotVsEloss->SetZTitle("Counts"); + hXY->SetXTitle("X [cm]"); + hXY->SetYTitle("Y [cm]"); + hXY->SetZTitle("Counts"); + hXYzoom->SetXTitle("X [cm]"); + hXYzoom->SetYTitle("Y [cm]"); + hXYzoom->SetZTitle("Counts"); + hZ->SetXTitle("Hit Z-coordinate [cm]"); + hZ->SetYTitle("Counts"); for (UInt_t ih = 0; ih < vhElossVsDistance.size(); ih++) { TH2F* h = vhElossVsDistance.at(ih); std::stringstream ss; @@ -124,6 +151,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) vh.push_back(hEtotVsEloss); vh.insert(vh.end(), vhElossVsDistance.begin(), vhElossVsDistance.end()); vh.insert(vh.end(), vhElossVsEtot.begin(), vhElossVsEtot.end()); + vh.push_back(hXY); + vh.push_back(hXYzoom); + vh.push_back(hZ); for (UInt_t ih = 0; ih < vh.size(); ih++) { vh[ih]->SetDirectory(0); vh[ih]->GetXaxis()->SetTitleSize(fontsize); @@ -177,6 +207,9 @@ void readFV0Hits(std::string simPrefix = "o2sim", UInt_t rebin = 1) vhElossVsDistance.at(vhElossVsDistance.size() - 1)->Fill(hit->GetEnergyLoss() * 1e3, distance); vhElossVsEtot.at(vhElossVsEtot.size() - 1)->Fill(hit->GetEnergyLoss() * 1e3, hit->GetTotalEnergyAtEntrance() * 1e3); } + hXY->Fill(hit->GetX(), hit->GetY()); + hXYzoom->Fill(hit->GetX(), hit->GetY()); + hZ->Fill(hit->GetZ()); } } @@ -323,5 +356,3 @@ int compareFV0Hits(std::string simFName1 = "fv0hit-rawhistos.root", std::string } return 0; } - -#endif From 5a2ecfc89359b70fdc20c4bd05d0cf62c22a889b Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Fri, 28 Mar 2025 09:25:38 +0100 Subject: [PATCH 0311/1914] Update tests in Clean PR action (#14119) --- .github/workflows/clean-test.yml | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/.github/workflows/clean-test.yml b/.github/workflows/clean-test.yml index cbc524910c33e..0f15301d4eed9 100644 --- a/.github/workflows/clean-test.yml +++ b/.github/workflows/clean-test.yml @@ -19,10 +19,6 @@ name: Clean PR checks # Warning: the check_* keys are magic and must consist of the string # "check_" followed by the applicable check name exactly. The # "description" field is only the human-readable label for the input. - 'check_build/O2/o2': - description: build/O2/o2 - type: boolean - default: true 'check_build/AliceO2/O2/o2/macOS': description: build/AliceO2/O2/o2/macOS type: boolean @@ -31,14 +27,10 @@ name: Clean PR checks description: build/AliceO2/O2/o2/macOS-arm type: boolean default: true - 'check_build/O2/fullCI': + 'check_build/O2/fullCI_slc9': description: build/O2/fullCI type: boolean default: true - 'check_build/O2/o2-cs8': - description: build/O2/o2-cs8 - type: boolean - default: true 'check_build/O2/o2-dataflow-cs8': description: build/O2/o2-dataflow-cs8 type: boolean From 1e69f5ebf9c1100d52752410592a9198345bc0a7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 28 Mar 2025 12:45:28 +0100 Subject: [PATCH 0312/1914] GPU TPC: Fix Bz=0 threshold --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 288a24dee5d99..f03964d35ff82 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1740,7 +1740,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.DzDs() = p2.DzDs(); p1.QPt() = p2.QPt(); mergedTrack.SetAlpha(p2.Alpha()); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } From ebf86708921b595c8c7c190e0a6bb0acb058d05e Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Fri, 28 Mar 2025 12:08:28 +0100 Subject: [PATCH 0313/1914] Generator example for quick HepMC extraction from Pythia8 --- .../Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro | 44 +++++++++++++++++++ .../Pythia8_HepMC_Wrapper/README.md | 16 +++++++ run/SimExamples/Pythia8_HepMC_Wrapper/run.sh | 19 ++++++++ run/SimExamples/README.md | 1 + 4 files changed, 80 insertions(+) create mode 100644 run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro create mode 100644 run/SimExamples/Pythia8_HepMC_Wrapper/README.md create mode 100755 run/SimExamples/Pythia8_HepMC_Wrapper/run.sh diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro b/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro new file mode 100644 index 0000000000000..9e60bc0a5d851 --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/Pythia8HepMC3.macro @@ -0,0 +1,44 @@ +/// \author Marco Giacalone - March 2025 + +// A simple wrapper and demonstrator around Pythia8 for extracting HepMC3 files. + +#include "Pythia8/Pythia.h" +#include "Pythia8Plugins/HepMC3.h" + +using namespace o2::eventgen; + +class HepMC3_Pythia8Wrapper : public GeneratorPythia8 +{ + public: + HepMC3_Pythia8Wrapper(std::string filename = "pythia8.hepmc") : GeneratorPythia8(), mFileName(filename) + { + // HepMC conversion object. + mToHepMC = std::make_unique(); + mToHepMC->setNewFile((filename == "" ? "pythia.hepmc" : filename)); + }; + ~HepMC3_Pythia8Wrapper() = default; + + bool importParticles() override + { + // events are written after the importParticles step + // since some filtering is happening there + auto ret = GeneratorPythia8::importParticles(); + if (ret) { + LOG(info) << "Writing event to HepMC3 format"; + mToHepMC->writeNextEvent(mPythia); + } + return ret; + }; + + private: + std::string mFileName = "pythia8.hepmc"; + std::unique_ptr mToHepMC; +}; + +FairGenerator* + hepmc_pythia8(std::string filename = "pythia8.hepmc") +{ + std::cout << "HepMC3_Pythia8Wrapper initialising with filename: " << filename << std::endl; + auto py8 = new HepMC3_Pythia8Wrapper(filename); + return py8; +} diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/README.md b/run/SimExamples/Pythia8_HepMC_Wrapper/README.md new file mode 100644 index 0000000000000..a334b7b3ef81e --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/README.md @@ -0,0 +1,16 @@ + + +This example demonstrates how we can extend GeneratorPythia8 in a user-defined macro (or external generator), +to achieve additional HepMC3 export of generated Pythia8 events. + +The example provides a small utility for poeple in need to obtain HepMC files from Pythia8. +Note that many other methods to achieve this are possible (See original Pythia8 example). + +The example provides: + +- The external generator implementation `Pythia8HepMC3.C` +- a `run.sh` script demonstrating it's usage and a check feeding back the generated hepmc into the simulation + + diff --git a/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh b/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh new file mode 100755 index 0000000000000..16ff80f76b1d4 --- /dev/null +++ b/run/SimExamples/Pythia8_HepMC_Wrapper/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# +# Script doing Pythia8 event generation and writing these events into HepMC3 files +# (next to generating the usual MCTrack kinematics output). +# +# The script also performs a second event generation based on the generated HepMC3 files. +# In principle it should yield identical kinematics files. +# + +NEVENTS=1000 +SEED=11 + +o2-sim -j 1 -g external --configKeyValues 'GeneratorExternal.fileName=Pythia8HepMC3.macro;GeneratorExternal.funcName=hepmc_pythia8("skimmed.hepmc");GeneratorPythia8.config=${O2_ROOT}/share/Generators/egconfig/pythia8_inel.cfg' --seed ${SEED} --noGeant -o pythia8_skimmed -n ${NEVENTS} +o2-sim -j 1 -g external --configKeyValues 'GeneratorExternal.fileName=Pythia8HepMC3.macro;GeneratorExternal.funcName=hepmc_pythia8("unskimmed.hepmc");GeneratorPythia8.config=${O2_ROOT}/share/Generators/egconfig/pythia8_inel.cfg;GeneratorPythia8.includePartonEvent=true' --seed ${SEED} --noGeant -o pythia8_unskimmed -n ${NEVENTS} + +# propagate generated hepmc file; it should produce the same kinematics as the original Pythia8 +o2-sim -j 1 -g hepmc --configKeyValues="GeneratorFileOrCmd.fileNames=skimmed.hepmc" --vertexMode kNoVertex --noGeant -o fromhepmc_skimmed -n ${NEVENTS} --seed ${SEED} +o2-sim -j 1 -g hepmc --configKeyValues="GeneratorFileOrCmd.fileNames=unskimmed.hepmc" --vertexMode kNoVertex --noGeant -o fromhepmc_unskimmed -n ${NEVENTS} --seed ${SEED} diff --git a/run/SimExamples/README.md b/run/SimExamples/README.md index 725d60c4854ca..3a54625acf413 100644 --- a/run/SimExamples/README.md +++ b/run/SimExamples/README.md @@ -6,6 +6,7 @@ n+1 (alpide manual) + toa = (sdel_i * mStep) + 1; break; } } @@ -1146,24 +1140,23 @@ std::vector ITSThresholdCalibrator::calculatePulseParams(const short int& countTot++; } - if (rt_maxdel > rt_mindel && rt_maxdel > 0 && rt_mindel > 0) { - sumRt += rt_maxdel - rt_mindel + mStrobeWindow; - sumSqRt += (rt_maxdel - rt_mindel + mStrobeWindow) * (rt_maxdel - rt_mindel + mStrobeWindow); - countRt++; + if (toa > 0) { + sumToA += toa + float(mStrobeWindow) / 2.; + sumSqToA += (toa + float(mStrobeWindow) / 2.) * (toa + float(mStrobeWindow) / 2.); + countToA++; } - rt_mindel = -1; - rt_maxdel = -1; + toa = -1.; tot_maxdel = -1; tot_mindel = -1; } // end loop over col_i } // end loop over chip rows - std::vector output; // {avgRt, rmsRt, avgTot, rmsTot} + std::vector output; // {avgToA, rmsToA, avgTot, rmsTot} // Avg Rt - output.push_back(!countRt ? 0. : (float)sumRt / (float)countRt); + output.push_back(!countToA ? 0. : (float)sumToA / (float)countToA); // Rms Rt - output.push_back(!countRt ? 0. : (std::sqrt((float)sumSqRt / (float)countRt - output[0] * output[0])) * 25.); + output.push_back(!countToA ? 0. : (std::sqrt((float)sumSqToA / (float)countToA - output[0] * output[0])) * 25.); output[0] *= 25.; // Avg ToT output.push_back(!countTot ? 0. : (float)sumTot / (float)countTot); @@ -1728,8 +1721,8 @@ void ITSThresholdCalibrator::addDatabaseEntry( o2::dcs::addConfigItem(this->mTuning, "ChipDbID", std::to_string(confDBid)); o2::dcs::addConfigItem(this->mTuning, "Tot", std::to_string(data[2])); // time over threshold o2::dcs::addConfigItem(this->mTuning, "TotRms", std::to_string(data[3])); // time over threshold rms - o2::dcs::addConfigItem(this->mTuning, "Rt", std::to_string(data[0])); // rise time - o2::dcs::addConfigItem(this->mTuning, "RtRms", std::to_string(data[1])); // rise time rms + o2::dcs::addConfigItem(this->mTuning, "ToA", std::to_string(data[0])); // rise time + o2::dcs::addConfigItem(this->mTuning, "ToARms", std::to_string(data[1])); // rise time rms } //- Pulse shape 2D: avgToT, rmsToT, MTC, rmsMTC, avgMTCD, rmsMTCD, avgMPL, rmsMPL, avgMPLC, rmsMPLC From 24c97f247de4de1b811157c401bd2e9ef80f155b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 9 Apr 2025 09:23:07 +0200 Subject: [PATCH 0358/1914] DPL: fix setting of run number (#14152) This is actually a workaround. The real issue is that timer at the moment are completely outside of the data streaming and therefore do not have access to the DataTakingService, where the proper calculation for the run number happens and it's cached. OK for now. In the future we should make sure that the LifetimeHelpers::enumerate gets a "Streaming" context, not the global one. --- Framework/Core/src/LifetimeHelpers.cxx | 2 +- Framework/Core/test/test_SimpleTimer.cxx | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Framework/Core/src/LifetimeHelpers.cxx b/Framework/Core/src/LifetimeHelpers.cxx index 4621738ec154e..21aa29c1f10e9 100644 --- a/Framework/Core/src/LifetimeHelpers.cxx +++ b/Framework/Core/src/LifetimeHelpers.cxx @@ -423,7 +423,7 @@ ExpirationHandler::Handler LifetimeHelpers::enumerate(ConcreteDataMatcher const& dh.payloadSerializationMethod = gSerializationMethodNone; dh.tfCounter = timestamp; try { - dh.runNumber = atoi(services.get().runNumber.c_str()); + dh.runNumber = strtoull(services.get().device()->fConfig->GetProperty("runNumber", "0").c_str(), nullptr, 10); } catch (...) { dh.runNumber = 0; } diff --git a/Framework/Core/test/test_SimpleTimer.cxx b/Framework/Core/test/test_SimpleTimer.cxx index df935eb6eb2a0..b2b5a4558f961 100644 --- a/Framework/Core/test/test_SimpleTimer.cxx +++ b/Framework/Core/test/test_SimpleTimer.cxx @@ -38,7 +38,10 @@ std::vector defineDataProcessing(ConfigContext const&) InputSpec{"atimer", "TST", "TIMER", 0, Lifetime::Timer}}, {}, AlgorithmSpec{ - adaptStateless([](ControlService& control) { + adaptStateless([](ControlService& control, InputRecord& inputs) { + DataRef ref = inputs.get("atimer"); + auto* header = o2::header::get(ref.header); + LOG(info) << "Run number: " << header->runNumber; // This is invoked autonomously by the timer. control.readyToQuit(QuitRequest::Me); })}}, From 26402846cfd9e7dfb2bfe3f7ff09345ce794aa5e Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 9 Apr 2025 21:45:14 +0200 Subject: [PATCH 0359/1914] DPL: set run number also on EoS (#14158) --- .../Core/src/ExternalFairMQDeviceProxy.cxx | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index 449beb0cb8c0b..e67e484f7faf5 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -397,6 +397,11 @@ void injectMissingData(fair::mq::Device& device, fair::mq::Parts& parts, std::ve } std::string missing = ""; bool showAlarm = false; + uint32_t runNumber = 0; + try { + runNumber = strtoul(device.fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } for (auto mi : unmatchedDescriptions) { auto& spec = routes[mi].matcher; missing += " " + DataSpecUtils::describe(spec); @@ -412,6 +417,7 @@ void injectMissingData(fair::mq::Device& device, fair::mq::Parts& parts, std::ve dh.dataDescription = concrete.description; dh.subSpecification = *subSpec; dh.payloadSize = 0; + dh.runNumber = runNumber; dh.splitPayloadParts = 0; dh.splitPayloadIndex = 0; dh.payloadSerializationMethod = header::gSerializationMethodNone; @@ -504,7 +510,8 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL LOG(error) << "unexpected nullptr found. Skipping message pair."; continue; } - const auto dh = o2::header::get(parts.At(msgidx)->GetData()); + auto* header = parts.At(msgidx)->GetData(); + const auto dh = o2::header::get(header); if (!dh) { LOG(error) << "data on input " << msgidx << " does not follow the O2 data model, DataHeader missing"; if (msgidx > 0) { @@ -512,7 +519,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL } continue; } - auto dph = o2::header::get(parts.At(msgidx)->GetData()); + auto dph = o2::header::get(header); if (!dph) { LOG(error) << "data on input " << msgidx << " does not follow the O2 data model, DataProcessingHeader missing"; continue; @@ -527,7 +534,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); - if (dh->runNumber == 0 || dh->tfCounter == 0 || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + if (dh->runNumber == 0 || (dh->tfCounter == 0 && o2::header::get(header) == nullptr) || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber << " for msgidx " << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); @@ -623,6 +630,11 @@ InjectorFunction incrementalConverter(OutputSpec const& spec, o2::header::Serial auto timesliceId = std::make_shared(startTime); return [timesliceId, spec, step, method](TimingInfo&, ServiceRegistryRef const& services, fair::mq::Parts& parts, ChannelRetriever channelRetriever, size_t newTimesliceId, bool&) { auto* device = services.get().device(); + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } // We iterate on all the parts and we send them two by two, // adding the appropriate O2 header. for (int i = 0; i < parts.Size(); ++i) { @@ -635,6 +647,7 @@ InjectorFunction incrementalConverter(OutputSpec const& spec, o2::header::Serial dh.dataDescription = matcher.description; dh.subSpecification = matcher.subSpec; dh.payloadSize = parts.At(i)->GetSize(); + dh.runNumber = runNumber; DataProcessingHeader dph{newTimesliceId, 0}; if (*timesliceId != newTimesliceId) { @@ -977,11 +990,18 @@ DataProcessorSpec specifyFairMQDeviceOutputProxy(char const* name, if (channelName != outputChannelName) { continue; } + + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } DataHeader dh; dh.dataOrigin = "DPL"; dh.dataDescription = "EOS"; dh.subSpecification = 0; dh.payloadSize = 0; + dh.runNumber = runNumber; dh.payloadSerializationMethod = o2::header::gSerializationMethodNone; dh.tfCounter = 0; dh.firstTForbit = 0; @@ -1091,12 +1111,18 @@ DataProcessorSpec specifyFairMQDeviceMultiOutputProxy(char const* name, if (!checkChannel(channelName)) { continue; } + uint32_t runNumber = 0; + try { + runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } DataHeader dh; dh.dataOrigin = "DPL"; dh.dataDescription = "EOS"; dh.subSpecification = 0; dh.payloadSize = 0; dh.payloadSerializationMethod = o2::header::gSerializationMethodNone; + dh.runNumber = runNumber; dh.tfCounter = 0; dh.firstTForbit = 0; SourceInfoHeader sih; From fcdf98dec8cf81dbaa3e982e4b4a528d4b059762 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 10 Apr 2025 08:52:42 +0200 Subject: [PATCH 0360/1914] ITS3: Fix matrix generation in helper class (#13895) * ITS: change layer name to debug severity Signed-off-by: Felix Schlepper * ITS: ITS3 matrix generation fix + demoting&removal of logging Signed-off-by: Felix Schlepper * ITS3: Propagate rename to ITS helper class Signed-off-by: Felix Schlepper * ITS3: make ITSRESPONSE cached var Allows reusing this variable later on. Signed-off-by: Felix Schlepper * ITS: digiparams make print func virtual Signed-off-by: Felix Schlepper --------- Signed-off-by: Felix Schlepper --- .../ITSMFT/ITS/base/src/GeometryTGeo.cxx | 100 ++++++++---------- .../ITSMFT/ITS/simulation/src/Detector.cxx | 6 +- .../data/AlpideResponseData/CMakeLists.txt | 4 +- .../include/ITSMFTSimulation/DigiParams.h | 20 ++-- 4 files changed, 58 insertions(+), 72 deletions(-) diff --git a/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx b/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx index b52fd8f58320f..89b4d63729543 100644 --- a/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx +++ b/Detectors/ITSMFT/ITS/base/src/GeometryTGeo.cxx @@ -24,8 +24,6 @@ #ifdef ENABLE_UPGRADES #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" -using SuperSegmentation = o2::its3::SegmentationSuperAlpide; #endif #include // for TGeoBBox @@ -420,33 +418,20 @@ TGeoHMatrix* GeometryTGeo::extractMatrixSensor(int index) const static int chipInGlo{0}; // account for the difference between physical sensitive layer (where charge collection is simulated) and effective sensor thicknesses + // in the ITS3 case this accounted by specialized functions double delta = Segmentation::SensorLayerThickness - Segmentation::SensorLayerThicknessEff; -#ifdef ENABLE_UPGRADES - if (mIsLayerITS3[getLayer(index)]) { - delta = its3::SegmentationSuperAlpide::mSensorLayerThickness - its3::SegmentationSuperAlpide::mSensorLayerThicknessEff; - } -#endif - static TGeoTranslation tra(0., 0.5 * delta, 0.); - +#ifdef ENABLE_UPGRADES // only apply for non ITS3 OB layers + if (!mIsLayerITS3[getLayer(index)]) { + matTmp *= tra; + } +#else matTmp *= tra; +#endif return &matTmp; } -//__________________________________________________________________________ -const o2::math_utils::Transform3D GeometryTGeo::getT2LMatrixITS3(int isn, float alpha) -{ - // create for sensor isn the TGeo matrix for Tracking to Local frame transformations - static TGeoHMatrix t2l; - t2l.Clear(); - t2l.RotateZ(alpha * RadToDeg()); // rotate in direction of normal to the tangent to the cylinder - const TGeoHMatrix& matL2G = getMatrixL2G(isn); - const auto& matL2Gi = matL2G.Inverse(); - t2l.MultiplyLeft(&matL2Gi); - return Mat3D(t2l); -} - //__________________________________________________________________________ void GeometryTGeo::Build(int loadTrans) { @@ -492,23 +477,6 @@ void GeometryTGeo::Build(int loadTrans) mLastChipIndex[i] = numberOfChips - 1; } - LOGP(debug, "Summary of extracted Geometry:"); - LOGP(debug, " There are {} Layers and {} HalfBarrels", mNumberOfLayers, mNumberOfHalfBarrels); - for (int i = 0; i < mNumberOfLayers; i++) { - LOGP(debug, " Layer {}: {:*^30}", i, "START"); - LOGP(debug, " - mNumberOfStaves={}", mNumberOfStaves[i]); - LOGP(debug, " - mNumberOfChipsPerStave={}", mNumberOfChipsPerStave[i]); - LOGP(debug, " - mNumberOfHalfStaves={}", mNumberOfHalfStaves[i]); - LOGP(debug, " - mNumberOfChipsPerHalfStave={}", mNumberOfChipsPerHalfStave[i]); - LOGP(debug, " - mNumberOfModules={}", mNumberOfModules[i]); - LOGP(debug, " - mNumberOfChipsPerModules={}", mNumberOfChipsPerModule[i]); - LOGP(debug, " - mNumberOfChipsPerLayer={}", mNumberOfChipsPerLayer[i]); - LOGP(debug, " - mNumberOfChipsPerHalfBarrel={}", mNumberOfChipsPerHalfBarrel[i]); - LOGP(debug, " - mLastChipIndex={}", mLastChipIndex[i]); - LOGP(debug, " Layer {}: {:*^30}", i, "END"); - } - LOGP(debug, "In total there {} chips registered", numberOfChips); - #ifdef ENABLE_UPGRADES if (std::any_of(mIsLayerITS3.cbegin(), mIsLayerITS3.cend(), [](auto b) { return b; })) { LOGP(info, "Found active IT3 layers -> Renaming Detector ITS to IT3"); @@ -880,34 +848,39 @@ void GeometryTGeo::extractSensorXAlpha(int isn, float& x, float& alp) const TGeoHMatrix* matL2G = extractMatrixSensor(isn); double locA[3] = {-100., 0., 0.}, locB[3] = {100., 0., 0.}, gloA[3], gloB[3]; - int iLayer = getLayer(isn); + double xp{0}, yp{0}; #ifdef ENABLE_UPGRADES - if (mIsLayerITS3[iLayer]) { - // We need to calcualte the line tangent at the mid-point in the geometry + if (int iLayer = getLayer(isn); mIsLayerITS3[iLayer]) { + // For a TGeoTubeSeg the local coordinate system is defined at the origin + // of the circle of the side, since in our implementation we rotated the geometry a bit const auto radius = o2::its3::constants::radii[iLayer]; const auto phi1 = o2::its3::constants::tile::width / radius; const auto phi2 = o2::its3::constants::pixelarray::width / radius + phi1; const auto phi3 = (phi2 - phi1) / 2.; // mid-point in phi - const auto x = radius * std::cos(phi3); - const auto y = radius * std::sin(phi3); - // For the tangent we make the parametric line equation y = m * x - c - const auto m = x / y; - const auto c = y - m * x; - // Now we can given any x calulate points along this line, we pick points far away, - // the calculation of the normal should work then below. - locA[1] = m * locA[0] + c; - locB[1] = m * locB[0] + c; - } -#endif - + locA[0] = radius * std::cos(phi3); + locA[1] = radius * std::sin(phi3); + matL2G->LocalToMaster(locA, gloA); + xp = gloA[0]; + yp = gloA[1]; + } else { + matL2G->LocalToMaster(locA, gloA); + matL2G->LocalToMaster(locB, gloB); + double dx = gloB[0] - gloA[0], dy = gloB[1] - gloA[1]; + double t = (gloB[0] * dx + gloB[1] * dy) / (dx * dx + dy * dy); + xp = gloB[0] - dx * t; + yp = gloB[1] - dy * t; + } +#else // just ITS2 part matL2G->LocalToMaster(locA, gloA); matL2G->LocalToMaster(locB, gloB); double dx = gloB[0] - gloA[0], dy = gloB[1] - gloA[1]; double t = (gloB[0] * dx + gloB[1] * dy) / (dx * dx + dy * dy); - double xp = gloB[0] - dx * t, yp = gloB[1] - dy * t; - x = Sqrt(xp * xp + yp * yp); - alp = ATan2(yp, xp); + xp = gloB[0] - dx * t; + yp = gloB[1] - dy * t; +#endif + x = std::hypot(xp, yp); + alp = std::atan2(yp, xp); o2::math_utils::bringTo02Pi(alp); } @@ -926,6 +899,19 @@ TGeoHMatrix& GeometryTGeo::createT2LMatrix(int isn) return t2l; } +//__________________________________________________________________________ +const o2::math_utils::Transform3D GeometryTGeo::getT2LMatrixITS3(int isn, float alpha) +{ + // create for sensor isn the TGeo matrix for Tracking to Local frame transformations with correction for effective thickness + static TGeoHMatrix t2l; + t2l.Clear(); + t2l.RotateZ(alpha * RadToDeg()); // rotate in direction of normal to the tangent to the cylinder + const TGeoHMatrix& matL2G = getMatrixL2G(isn); + const auto& matL2Gi = matL2G.Inverse(); + t2l.MultiplyLeft(&matL2Gi); + return Mat3D(t2l); +} + //__________________________________________________________________________ int GeometryTGeo::extractVolumeCopy(const char* name, const char* prefix) const { diff --git a/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx b/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx index bf2e997794ee4..2304a9102092a 100644 --- a/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx +++ b/Detectors/ITSMFT/ITS/simulation/src/Detector.cxx @@ -190,7 +190,7 @@ Detector::Detector(Bool_t active, TString name) } else { mLayerName[j].Form("%s%d", GeometryTGeo::getITSSensorPattern(), j); // See V3Layer } - LOGP(info, "{}: mLayerName={}", j, mLayerName[j].Data()); + LOGP(debug, "{}: mLayerName={}", j, mLayerName[j].Data()); } if (mNumberLayers > 0) { // if not, we'll Fatal-ize in CreateGeometry @@ -723,8 +723,8 @@ void Detector::defineLayer(Int_t nlay, Double_t phi0, Double_t r, Int_t nstav, I // Return: // none. - LOG(info) << "L# " << nlay << " Phi:" << phi0 << " R:" << r << " Nst:" << nstav << " Nunit:" << nunit - << " Lthick:" << lthick << " Dthick:" << dthick << " DetID:" << dettypeID << " B:" << buildLevel; + LOG(debug) << "L# " << nlay << " Phi:" << phi0 << " R:" << r << " Nst:" << nstav << " Nunit:" << nunit + << " Lthick:" << lthick << " Dthick:" << dthick << " DetID:" << dettypeID << " B:" << buildLevel; if (nlay >= mNumberLayers || nlay < 0) { LOG(error) << "Wrong layer number " << nlay; diff --git a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt index 381e4f4b54c01..d1f3e756394b1 100644 --- a/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt +++ b/Detectors/ITSMFT/common/data/AlpideResponseData/CMakeLists.txt @@ -20,10 +20,10 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${CMAKE_SOURCE_DI if(ITSRESPONSE) message(STATUS "ITSRESPONSE option provided, setting ITSRESPONSE_DIR from it: " ${ITSRESPONSE}) - set(ITSRESPONSE_DIR ${ITSRESPONSE}) + set(ITSRESPONSE_DIR ${ITSRESPONSE} CACHE PATH "ITSResponse directory") else() message(STATUS "ITSRESPONSE option not provided, setting ITSRESPONSE_DIR from environment ITSRESPONSE_ROOT: " $ENV{ITSRESPONSE_ROOT}) - set(ITSRESPONSE_DIR $ENV{ITSRESPONSE_ROOT}) + set(ITSRESPONSE_DIR $ENV{ITSRESPONSE_ROOT} CACHE PATH "ITSResponse directory") endif() add_custom_command(TARGET O2exe-alpide-response-generator POST_BUILD diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h index 7772c47237ae8..b27739c26bc4d 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/DigiParams.h @@ -96,18 +96,18 @@ class DigiParams const SignalShape& getSignalShape() const { return mSignalShape; } SignalShape& getSignalShape() { return (SignalShape&)mSignalShape; } - void print() const; + virtual void print() const; private: static constexpr double infTime = 1e99; - bool mIsContinuous = false; ///< flag for continuous simulation - float mNoisePerPixel = 1.e-8; ///< ALPIDE Noise per chip - int mROFrameLengthInBC = 0; ///< ROF length in BC for continuos mode - float mROFrameLength = 0; ///< length of RO frame in ns - float mStrobeDelay = 0.; ///< strobe start (in ns) wrt ROF start - float mStrobeLength = 0; ///< length of the strobe in ns (sig. over threshold checked in this window only) - double mTimeOffset = -2 * infTime; ///< time offset (in seconds!) to calculate ROFrame from hit time - int mROFrameBiasInBC = 0; ///< misalignment of the ROF start in BC + bool mIsContinuous = false; ///< flag for continuous simulation + float mNoisePerPixel = 1.e-8; ///< ALPIDE Noise per chip + int mROFrameLengthInBC = 0; ///< ROF length in BC for continuos mode + float mROFrameLength = 0; ///< length of RO frame in ns + float mStrobeDelay = 0.; ///< strobe start (in ns) wrt ROF start + float mStrobeLength = 0; ///< length of the strobe in ns (sig. over threshold checked in this window only) + double mTimeOffset = -2 * infTime; ///< time offset (in seconds!) to calculate ROFrame from hit time + int mROFrameBiasInBC = 0; ///< misalignment of the ROF start in BC int mChargeThreshold = 150; ///< charge threshold in Nelectrons int mMinChargeToAccount = 15; ///< minimum charge contribution to account int mNSimSteps = 7; ///< number of steps in response simulation @@ -125,7 +125,7 @@ class DigiParams float mROFrameLengthInv = 0; ///< inverse length of RO frame in ns float mNSimStepsInv = 0; ///< its inverse - ClassDefNV(DigiParams, 2); + ClassDef(DigiParams, 2); }; } // namespace itsmft } // namespace o2 From 04b2596a380cba6b4a5aae332ebace594231e21b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 09:13:35 +0200 Subject: [PATCH 0361/1914] DPL: do not compute GUI metrics if we are in online mode (#14163) --- Framework/Core/src/CommonServices.cxx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index e13f1cb2094b7..9d30dac8c4de8 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -848,7 +848,9 @@ auto flushMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) -> vo } monitoring.send(std::move(metric)); }); - relayer.sendContextState(); + if (DefaultsHelpers::onlineDeploymentMode() == false) { + relayer.sendContextState(); + } monitoring.flushBuffer(); O2_SIGNPOST_END(monitoring_service, sid, "flush", "done flushing metrics"); }; From 0a831b2817e686e14d9b5129f6fd0fc45ea035db Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 14:28:28 +0200 Subject: [PATCH 0362/1914] DPL: extend DataRefUtils::match to support multiple headers (#14160) --- .../Core/include/Framework/DataRefUtils.h | 23 ++++++++++++++----- .../include/Framework/InputRecordWalker.h | 5 ++-- .../include/DPLUtils/DPLRawPageSequencer.h | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Framework/Core/include/Framework/DataRefUtils.h b/Framework/Core/include/Framework/DataRefUtils.h index 4c1bd0ed7ed10..d50699badc63b 100644 --- a/Framework/Core/include/Framework/DataRefUtils.h +++ b/Framework/Core/include/Framework/DataRefUtils.h @@ -11,6 +11,7 @@ #ifndef O2_FRAMEWORK_DATAREFUTILS_H_ #define O2_FRAMEWORK_DATAREFUTILS_H_ +#include "Framework/DataDescriptorMatcher.h" #include "Framework/DataRef.h" #include "Framework/RootSerializationSupport.h" #include "Framework/SerializationMethods.h" @@ -33,6 +34,9 @@ class ConfigurableParam; namespace o2::framework { +template +concept DataHeaderLike = requires(H& dh) {dh.dataOrigin; dh.dataDescription; dh.subSpecification; }; + // FIXME: Should enforce the fact that DataRefs are read only... struct DataRefUtils { @@ -52,7 +56,7 @@ struct DataRefUtils { if ((payloadSize % sizeof(T)) != 0) { throw runtime_error("Cannot extract POD from message as size do not match"); } - //FIXME: provide a const collection + // FIXME: provide a const collection return gsl::span(reinterpret_cast(const_cast(ref.payload)), payloadSize / sizeof(T)); } else if constexpr (has_root_dictionary::value == true && is_messageable::value == false) { @@ -220,17 +224,24 @@ struct DataRefUtils { return ref.spec != nullptr && ref.spec->binding == binding; } - /// check if the O2 message referred by DataRef matches a particular - /// input spec. The DataHeader is retrieved from the header message and matched - /// against @ref spec parameter. - static bool match(DataRef const& ref, InputSpec const& spec) + template + static bool matchHeader(DataRef const& ref, InputSpec const& spec) { - auto dh = DataRefUtils::getHeader(ref); + auto const* dh = o2::header::get(ref.header); if (dh == nullptr) { return false; } return DataSpecUtils::match(spec, dh->dataOrigin, dh->dataDescription, dh->subSpecification); } + + /// check if the O2 message referred by DataRef matches a particular + /// input spec. The DataHeader is retrieved from the header message and matched + /// against @ref spec parameter. + template + static bool match(DataRef const& ref, InputSpec const& spec) + { + return (DataRefUtils::matchHeader(ref, spec) || ... || matchHeader(ref, spec)); + } }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/InputRecordWalker.h b/Framework/Core/include/Framework/InputRecordWalker.h index a67a7dfb04820..4d36a1f17bc82 100644 --- a/Framework/Core/include/Framework/InputRecordWalker.h +++ b/Framework/Core/include/Framework/InputRecordWalker.h @@ -12,11 +12,11 @@ #define FRAMEWORK_INPUTRECORDWALKER_H /// @file InputRecordWalker.h -/// @author Matthias Richter /// @since 2020-03-25 /// @brief A helper class to iteratate over all parts of all input routes #include "Framework/InputRecord.h" +#include "Framework/DataRefUtils.h" namespace o2::framework { @@ -49,6 +49,7 @@ namespace o2::framework /// for (auto const& ref : InputRecordWalker(inputs, filter)) { /// // do something with the data /// } +template class InputRecordWalker { public: @@ -131,7 +132,7 @@ class InputRecordWalker if (mFilterSpecs.size() > 0) { bool isSelected = false; for (auto const& spec : mFilterSpecs) { - if ((isSelected = DataRefUtils::match(*mCurrent, spec)) == true) { + if ((isSelected = DataRefUtils::match(*mCurrent, spec)) == true) { break; } } diff --git a/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h b/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h index 785dc9e04bd45..2fb8374e45c12 100644 --- a/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h +++ b/Framework/Utils/include/DPLUtils/DPLRawPageSequencer.h @@ -191,7 +191,7 @@ class DPLRawPageSequencer } private: - InputRecordWalker mInput; + InputRecordWalker<> mInput; template void forwardInternal(Predicate pred, Inserter inserter, const char* data, size_t size, const o2::header::DataHeader* dh) From b78b50c251ad7dca06d844c4c4860954b08b113b Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Thu, 10 Apr 2025 15:28:11 +0200 Subject: [PATCH 0363/1914] ctpdev: orbitreset and orbitsox via zmq II (#14147) * dev: finishing orbitsox and orbitreset via zmq * clang * fix * fix * dev: removed infologger and using LOG(important) * clang * fix --- .../include/CTPWorkflowScalers/RunManager.h | 1 + .../CTP/workflowScalers/src/RunManager.cxx | 39 ++++++++++++++++++- .../workflowScalers/src/ctpCCDBManager.cxx | 5 ++- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index e0b204e6c4ce5..72fb9c2056367 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -18,6 +18,7 @@ #include "DataFormatsCTP/Configuration.h" #include "BookkeepingApi/BkpClientFactory.h" #include "BookkeepingApi/BkpClient.h" + using namespace o2::bkp::api; namespace o2 { diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index ac3eda60094e9..5d0b906e28088 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -18,6 +18,7 @@ #include #include "CommonUtils/StringUtils.h" #include + using namespace o2::ctp; /// /// Active run to keep cfg and saclers of active runs @@ -232,10 +233,44 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message return 0; } if (topic.find("soxorbit") != std::string::npos) { - return 0; + std::vector tokens = o2::utils::Str::tokenize(message, ' '); + int ret = 0; + if (tokens.size() == 3) { + long timestamp = std::stol(tokens[0]); + uint32_t runnumber = std::stoul(tokens[1]); + uint32_t orbit = std::stoul(tokens[2]); + ret = saveSoxOrbit(runnumber, orbit, timestamp); + std::string logmessage; + if (ret) { + logmessage = "Failed to update CCDB with SOX orbit."; + } else { + logmessage = "CCDB updated with SOX orbit."; + } + LOG(important) << logmessage << " run:" << runnumber << " sox orbit:" << orbit << " ts:" << timestamp; + } else { + LOG(error) << "Topic soxorbit dize !=3: " << message << " token size:" << tokens.size(); + ret = 1; + } + return ret; } if (topic.find("orbitreset") != std::string::npos) { - return 0; + std::vector tokens = o2::utils::Str::tokenize(message, ' '); + int ret = 0; + if (tokens.size() == 1) { + long timestamp = std::stol(tokens[0]); + ret = saveOrbitReset(timestamp); + std::string logmessage; + if (ret) { + logmessage = "Failed to update CCDB with orbitreset. "; + } else { + logmessage = "CCDB updated with orbitreset. "; + } + LOG(important) << logmessage << timestamp; + } else { + LOG(error) << "Topic orbit reset != 2: " << message << " token size:" << tokens.size(); + ret = 1; + } + return ret; } static int nerror = 0; if (topic.find("sox") != std::string::npos) { diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index 0d81b896b3e91..cbe8fe5dd675f 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -122,7 +122,7 @@ int ctpCCDBManager::saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long tim vect.push_back(timestamp); vect.push_back((uint64_t)runNumber); vect.push_back((uint64_t)soxOrbit); - long tmin = timestamp; + long tmin = timestamp / 1000; long tmax = tmin + 381928219; o2::ccdb::CcdbApi api; map metadata; // can be empty @@ -149,9 +149,10 @@ int ctpCCDBManager::saveOrbitReset(long timeStamp) if (timeStamp == 0) { auto now = std::chrono::system_clock::now(); timeStamp = std::chrono::duration_cast(now.time_since_epoch()).count(); + LOG(warn) << "Received timestamp = 0 , using current time:" << timeStamp; } vect.push_back(timeStamp); - long tmin = timeStamp; + long tmin = timeStamp / 1000; long tmax = tmin + 381928219; o2::ccdb::CcdbApi api; map metadata; // can be empty From b17041de14862c743f6a883c8cc66d7743295916 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 9 Apr 2025 16:05:59 +0200 Subject: [PATCH 0364/1914] Possibility to request TPC occupancy map w/o askig for clusters --- .../DataFormatsGlobalTracking/RecoContainer.h | 2 ++ .../GlobalTracking/src/RecoContainer.cxx | 25 ++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h b/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h index d128467168c92..31d531ef19265 100644 --- a/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h +++ b/DataFormats/Detectors/GlobalTracking/include/DataFormatsGlobalTracking/RecoContainer.h @@ -225,6 +225,7 @@ struct DataRequest { void requestITSClusters(bool mc); void requestMFTClusters(bool mc); void requestTPCClusters(bool mc); + void requestTPCOccMap(); void requestTPCTriggers(); void requestTOFClusters(bool mc); void requestTRDTracklets(bool mc); @@ -377,6 +378,7 @@ struct RecoContainer { void addITSClusters(o2::framework::ProcessingContext& pc, bool mc); void addMFTClusters(o2::framework::ProcessingContext& pc, bool mc); void addTPCClusters(o2::framework::ProcessingContext& pc, bool mc, bool shmap, bool occmap); + void addTPCOccMap(o2::framework::ProcessingContext& pc); void addTPCTriggers(o2::framework::ProcessingContext& pc); void addTOFClusters(o2::framework::ProcessingContext& pc, bool mc); void addHMPClusters(o2::framework::ProcessingContext& pc, bool mc); diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index c26de2bfda896..39cc05d8a69e7 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -123,7 +123,7 @@ void DataRequest::requestTPCTracks(bool mc) addInput({"trackTPCClRefs", "TPC", "CLUSREFS", 0, Lifetime::Timeframe}); if (requestMap.find("clusTPC") != requestMap.end()) { addInput({"clusTPCshmap", "TPC", "CLSHAREDMAP", 0, Lifetime::Timeframe}); - addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestTPCOccMap(); } if (mc) { addInput({"trackTPCMCTR", "TPC", "TRACKSMCLBL", 0, Lifetime::Timeframe}); @@ -267,6 +267,12 @@ void DataRequest::requestMFTClusters(bool mc) requestMap["clusMFT"] = mc; } +void DataRequest::requestTPCOccMap() +{ + addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestMap["TPCOcc"] = false; +} + void DataRequest::requestTPCClusters(bool mc) { addInput({"clusTPC", ConcreteDataTypeMatcher{"TPC", "CLUSTERNATIVE"}, Lifetime::Timeframe}); @@ -275,7 +281,7 @@ void DataRequest::requestTPCClusters(bool mc) } if (requestMap.find("trackTPC") != requestMap.end()) { addInput({"clusTPCshmap", "TPC", "CLSHAREDMAP", 0, Lifetime::Timeframe}); - addInput({"clusTPCoccmap", "TPC", "TPCOCCUPANCYMAP", 0, Lifetime::Timeframe}); + requestTPCOccMap(); } if (mc) { addInput({"clusTPCMC", ConcreteDataTypeMatcher{"TPC", "CLNATIVEMCLBL"}, Lifetime::Timeframe}); @@ -704,10 +710,17 @@ void RecoContainer::collectData(ProcessingContext& pc, const DataRequest& reques addMFTClusters(pc, req->second); } + req = reqMap.find("TPCOcc"); + bool TPCOccDone = false; + if (req != reqMap.end()) { + TPCOccDone = true; + addTPCOccMap(pc); + } + req = reqMap.find("clusTPC"); if (req != reqMap.end()) { auto tracksON = reqMap.find("trackTPC") != reqMap.end(); - addTPCClusters(pc, req->second, tracksON, tracksON); + addTPCClusters(pc, req->second, tracksON, tracksON && (!TPCOccDone)); } req = reqMap.find("trigTPC"); @@ -1100,6 +1113,12 @@ void RecoContainer::addMFTClusters(ProcessingContext& pc, bool mc) } } +//__________________________________________________________ +void RecoContainer::addTPCOccMap(ProcessingContext& pc) +{ + occupancyMapTPC = pc.inputs().get>("clusTPCoccmap"); +} + //__________________________________________________________ void RecoContainer::addTPCClusters(ProcessingContext& pc, bool mc, bool shmap, bool occmap) { From 276c3223609bbc50a8609a4131157a84f89c7e98 Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 9 Apr 2025 16:07:10 +0200 Subject: [PATCH 0365/1914] ITSTPC QC does not need TPC clusters, just occ. --- Detectors/GLOQC/src/MatchITSTPCQC.cxx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Detectors/GLOQC/src/MatchITSTPCQC.cxx b/Detectors/GLOQC/src/MatchITSTPCQC.cxx index 86de9cd9c056e..e1832056f072c 100644 --- a/Detectors/GLOQC/src/MatchITSTPCQC.cxx +++ b/Detectors/GLOQC/src/MatchITSTPCQC.cxx @@ -470,7 +470,7 @@ void MatchITSTPCQC::initDataRequest() if (mDoK0QC) { mDataRequest->requestPrimaryVertices(mUseMC); mDataRequest->requestSecondaryVertices(mUseMC); - mDataRequest->requestTPCClusters(false); + mDataRequest->requestTPCOccMap(); } } @@ -478,7 +478,6 @@ void MatchITSTPCQC::initDataRequest() void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) { - // Getting the B field mBz = o2::base::Propagator::Instance()->getNominalBz(); @@ -1058,7 +1057,6 @@ void MatchITSTPCQC::run(o2::framework::ProcessingContext& ctx) } else { mTBinClOcc.resize(1); } - auto v0IDs = mRecoCont.getV0sIdx(); auto nv0 = v0IDs.size(); if (nv0 > mRecoCont.getV0s().size()) { From 4f4b5458db0b1b5f1fcbf8e7c6111ab300d6b370 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 20:44:31 +0200 Subject: [PATCH 0366/1914] DPL: Do not compute metrics if the GUI is not available (#14170) --- Framework/Core/src/CommonServices.cxx | 4 +--- Framework/Core/src/DataRelayer.cxx | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 9d30dac8c4de8..e13f1cb2094b7 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -848,9 +848,7 @@ auto flushMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) -> vo } monitoring.send(std::move(metric)); }); - if (DefaultsHelpers::onlineDeploymentMode() == false) { - relayer.sendContextState(); - } + relayer.sendContextState(); monitoring.flushBuffer(); O2_SIGNPOST_END(monitoring_service, sid, "flush", "done flushing metrics"); }; diff --git a/Framework/Core/src/DataRelayer.cxx b/Framework/Core/src/DataRelayer.cxx index f30866dc0aa1b..c6333350d6da7 100644 --- a/Framework/Core/src/DataRelayer.cxx +++ b/Framework/Core/src/DataRelayer.cxx @@ -1034,6 +1034,9 @@ uint64_t DataRelayer::getCreationTimeForSlot(TimesliceSlot slot) void DataRelayer::sendContextState() { + if (!mContext.get().driverHasGUI) { + return; + } std::scoped_lock lock(mMutex); auto& states = mContext.get(); for (size_t ci = 0; ci < mTimesliceIndex.size(); ++ci) { From 8688938bd04ddeb7227b43ff136d39f463b60e37 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 20:46:13 +0200 Subject: [PATCH 0367/1914] DataSampling: make sure the original DataHeader is available in the sampled data (#14164) --- .../include/DataSampling/DataSamplingHeader.h | 13 ++++-- .../include/DataSampling/Dispatcher.h | 4 +- .../DataSampling/src/DataSamplingHeader.cxx | 13 +++--- Utilities/DataSampling/src/Dispatcher.cxx | 7 ++-- .../test/test_DataSamplingHeader.cxx | 42 +++++++++++-------- 5 files changed, 47 insertions(+), 32 deletions(-) diff --git a/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h b/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h index adc2c97759f52..0cc96e2125dec 100644 --- a/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h +++ b/Utilities/DataSampling/include/DataSampling/DataSamplingHeader.h @@ -41,8 +41,15 @@ struct DataSamplingHeader : public header::BaseHeader { uint32_t totalEvaluatedMessages = 0; DeviceIDType deviceID = ""; - DataSamplingHeader(); - DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID); + /// Presampled description for the data. Copied from the original DataHeader. + header::DataDescription dataDescription; + /// Presampled origin for the data. Copied from the original DataHeader. + header::DataOrigin dataOrigin; + /// Presampled subSpecification for the data. + header::DataHeader::SubSpecificationType subSpecification; + + DataSamplingHeader() = delete; + DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID, header::DataHeader const& original); DataSamplingHeader(const DataSamplingHeader&) = default; DataSamplingHeader& operator=(const DataSamplingHeader&) = default; @@ -51,4 +58,4 @@ struct DataSamplingHeader : public header::BaseHeader { } // namespace o2::utilities -#endif //ALICEO2_DATASAMPLINGHEADER_H +#endif // ALICEO2_DATASAMPLINGHEADER_H diff --git a/Utilities/DataSampling/include/DataSampling/Dispatcher.h b/Utilities/DataSampling/include/DataSampling/Dispatcher.h index d92876f4c6125..1d34269f87536 100644 --- a/Utilities/DataSampling/include/DataSampling/Dispatcher.h +++ b/Utilities/DataSampling/include/DataSampling/Dispatcher.h @@ -64,7 +64,7 @@ class Dispatcher : public framework::Task framework::Options getOptions(); private: - DataSamplingHeader prepareDataSamplingHeader(const DataSamplingPolicy& policy); + DataSamplingHeader prepareDataSamplingHeader(const DataSamplingPolicy& policy, header::DataHeader const& original); header::Stack extractAdditionalHeaders(const char* inputHeaderStack) const; void reportStats(monitoring::Monitoring& monitoring) const; void send(framework::DataAllocator& dataAllocator, const framework::DataRef& inputData, const framework::Output& output) const; @@ -78,4 +78,4 @@ class Dispatcher : public framework::Task } // namespace o2::utilities -#endif //ALICEO2_DISPATCHER_H +#endif // ALICEO2_DISPATCHER_H diff --git a/Utilities/DataSampling/src/DataSamplingHeader.cxx b/Utilities/DataSampling/src/DataSamplingHeader.cxx index 392e37a5d9117..204d4aa2b56a6 100644 --- a/Utilities/DataSampling/src/DataSamplingHeader.cxx +++ b/Utilities/DataSampling/src/DataSamplingHeader.cxx @@ -19,16 +19,15 @@ namespace o2::utilities { -DataSamplingHeader::DataSamplingHeader() : BaseHeader(sizeof(DataSamplingHeader), sHeaderType, sSerializationMethod, sVersion) -{ -} - -DataSamplingHeader::DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID) +DataSamplingHeader::DataSamplingHeader(uint64_t _sampleTimeUs, uint32_t _totalAcceptedMessages, uint32_t _totalEvaluatedMessages, DeviceIDType _deviceID, header::DataHeader const& _original) : BaseHeader(sizeof(DataSamplingHeader), sHeaderType, sSerializationMethod, sVersion), sampleTimeUs(_sampleTimeUs), totalAcceptedMessages(_totalAcceptedMessages), totalEvaluatedMessages(_totalEvaluatedMessages), - deviceID(_deviceID) + deviceID(_deviceID), + dataDescription(_original.dataDescription), + dataOrigin(_original.dataOrigin), + subSpecification(_original.subSpecification) { } @@ -42,4 +41,4 @@ const uint32_t o2::utilities::DataSamplingHeader::sVersion = 1; const o2::header::HeaderType o2::utilities::DataSamplingHeader::sHeaderType = header::String2("DataSamp"); const o2::header::SerializationMethod o2::utilities::DataSamplingHeader::sSerializationMethod = o2::header::gSerializationMethodNone; -} // namespace o2::utilities \ No newline at end of file +} // namespace o2::utilities diff --git a/Utilities/DataSampling/src/Dispatcher.cxx b/Utilities/DataSampling/src/Dispatcher.cxx index 28ff4d5568da9..38ad15f5fd752 100644 --- a/Utilities/DataSampling/src/Dispatcher.cxx +++ b/Utilities/DataSampling/src/Dispatcher.cxx @@ -99,7 +99,7 @@ void Dispatcher::run(ProcessingContext& ctx) // a "TST/RAWDATA/*" output. if (auto route = policy->match(inputMatcher); route != nullptr && policy->decide(firstPart)) { auto routeAsConcreteDataType = DataSpecUtils::asConcreteDataTypeMatcher(*route); - auto dsheader = prepareDataSamplingHeader(*policy); + auto dsheader = prepareDataSamplingHeader(*policy, *firstInputHeader); for (const auto& part : inputIt) { if (part.header != nullptr) { // We copy every header which is not DataHeader or DataProcessingHeader, @@ -144,7 +144,7 @@ void Dispatcher::reportStats(Monitoring& monitoring) const monitoring.send(Metric{dispatcherTotalAcceptedMessages, "Dispatcher_messages_passed", Verbosity::Prod}.addTag(tags::Key::Subsystem, tags::Value::DataSampling)); } -DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolicy& policy) +DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolicy& policy, header::DataHeader const& original) { uint64_t sampleTime = static_cast(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count()); @@ -152,7 +152,8 @@ DataSamplingHeader Dispatcher::prepareDataSamplingHeader(const DataSamplingPolic sampleTime, policy.getTotalAcceptedMessages(), policy.getTotalEvaluatedMessages(), - mDeviceID}; + mDeviceID, + original}; } header::Stack Dispatcher::extractAdditionalHeaders(const char* inputHeaderStack) const diff --git a/Utilities/DataSampling/test/test_DataSamplingHeader.cxx b/Utilities/DataSampling/test/test_DataSamplingHeader.cxx index 48ab5ba953eec..377e9d855467d 100644 --- a/Utilities/DataSampling/test/test_DataSamplingHeader.cxx +++ b/Utilities/DataSampling/test/test_DataSamplingHeader.cxx @@ -21,57 +21,62 @@ using namespace o2::utilities; using namespace o2::header; -BOOST_AUTO_TEST_CASE(DataSamplingHeaderDefault) -{ - DataSamplingHeader header; - - BOOST_CHECK_EQUAL(header.sampleTimeUs, 0); - BOOST_CHECK_EQUAL(header.totalAcceptedMessages, 0); - BOOST_CHECK_EQUAL(header.totalEvaluatedMessages, 0); - BOOST_CHECK_EQUAL(strcmp(header.deviceID.str, ""), 0); -} - BOOST_AUTO_TEST_CASE(DataSamplingHeaderInit) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; BOOST_CHECK_EQUAL(header.sampleTimeUs, 123); BOOST_CHECK_EQUAL(header.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(header.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(header.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(header.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(header.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(header.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderCopy) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; DataSamplingHeader copy(header); BOOST_CHECK_EQUAL(copy.sampleTimeUs, 123); BOOST_CHECK_EQUAL(copy.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(copy.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(copy.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(copy.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(copy.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(copy.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderAssignement) { - DataSamplingHeader first{123, 456, 789, "abc"}; - DataSamplingHeader second; - second = first; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader first{123, 456, 789, "abc", original}; + DataSamplingHeader second = first; BOOST_CHECK_EQUAL(first.sampleTimeUs, 123); BOOST_CHECK_EQUAL(first.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(first.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(first.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(first.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(first.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(first.subSpecification, 1); BOOST_CHECK_EQUAL(second.sampleTimeUs, 123); BOOST_CHECK_EQUAL(second.totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(second.totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(second.deviceID.str, "abc"), 0); + BOOST_CHECK_EQUAL(strcmp(second.dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(second.dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(second.subSpecification, 1); } BOOST_AUTO_TEST_CASE(DataSamplingHeaderOnStack) { - DataSamplingHeader header{123, 456, 789, "abc"}; + o2::header::DataHeader original("A", "TST", 1); + DataSamplingHeader header{123, 456, 789, "abc", original}; Stack headerStack{header}; const auto* dsHeaderFromStack = get(headerStack.data()); @@ -81,4 +86,7 @@ BOOST_AUTO_TEST_CASE(DataSamplingHeaderOnStack) BOOST_CHECK_EQUAL(dsHeaderFromStack->totalAcceptedMessages, 456); BOOST_CHECK_EQUAL(dsHeaderFromStack->totalEvaluatedMessages, 789); BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->deviceID.str, "abc"), 0); -} \ No newline at end of file + BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->dataOrigin.str, "TST"), 0); + BOOST_CHECK_EQUAL(strcmp(dsHeaderFromStack->dataDescription.str, "A"), 0); + BOOST_CHECK_EQUAL(dsHeaderFromStack->subSpecification, 1); +} From a4e08418b60179e006690b1c056f5d3e553b934a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 10 Apr 2025 22:40:19 +0200 Subject: [PATCH 0368/1914] DPL: exclude tfCounter == 0 messages for EOS (#14169) * DPL: exclude tfCounter == 0 messages for EOS --- Framework/Core/src/ExternalFairMQDeviceProxy.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx index e67e484f7faf5..cfc445725a92d 100644 --- a/Framework/Core/src/ExternalFairMQDeviceProxy.cxx +++ b/Framework/Core/src/ExternalFairMQDeviceProxy.cxx @@ -534,7 +534,7 @@ InjectorFunction dplModelAdaptor(std::vector const& filterSpecs, DPL timingInfo.runNumber = dh->runNumber; timingInfo.tfCounter = dh->tfCounter; LOG(debug) << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); - if (dh->runNumber == 0 || (dh->tfCounter == 0 && o2::header::get(header) == nullptr) || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { + if (dh->runNumber == 0 || (dh->tfCounter == 0 && dh->dataDescription.as() != "EOS") || (fmqRunNumber > 0 && fmqRunNumber != dh->runNumber)) { LOG(error) << "INVALID runNumber / tfCounter: runNumber " << dh->runNumber << ", tfCounter " << dh->tfCounter << ", FMQ runNumber " << fmqRunNumber << " for msgidx " << msgidx << ": " << DataSpecUtils::describe(OutputSpec{dh->dataOrigin, dh->dataDescription, dh->subSpecification}) << " part " << dh->splitPayloadIndex << " of " << dh->splitPayloadParts << " payload " << parts.At(msgidx + 1)->GetSize(); From 4655f501f3a603da52150084145b31c225f5b4e5 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 14:31:11 +0200 Subject: [PATCH 0369/1914] TPC cluster/digits helper can walk over sampled data --- DataFormats/Detectors/TPC/CMakeLists.txt | 1 + .../Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DataFormats/Detectors/TPC/CMakeLists.txt b/DataFormats/Detectors/TPC/CMakeLists.txt index b8b93c308e85d..81b1d5efad59a 100644 --- a/DataFormats/Detectors/TPC/CMakeLists.txt +++ b/DataFormats/Detectors/TPC/CMakeLists.txt @@ -34,6 +34,7 @@ o2_add_library( O2::ReconstructionDataFormats O2::CommonDataFormat O2::Headers + O2::DataSampling O2::Algorithm) o2_target_root_dictionary( diff --git a/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h b/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h index 30b40ed70b9c7..f4a318bc30101 100644 --- a/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h +++ b/DataFormats/Detectors/TPC/include/DataFormatsTPC/WorkflowHelper.h @@ -21,6 +21,7 @@ #include "Framework/DataRefUtils.h" #include #include "Framework/InputRecordWalker.h" +#include "DataSampling/DataSamplingHeader.h" #include "DataFormatsTPC/TrackTPC.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" @@ -80,7 +81,7 @@ static auto getWorkflowTPCInput(o2::framework::ProcessingContext& pc, int verbos if (do_digits) { std::fill(inputDigitsMCIndex.begin(), inputDigitsMCIndex.end(), -1); } - for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { + for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { auto const* sectorHeader = o2::framework::DataRefUtils::getHeader(ref); if (sectorHeader == nullptr) { // FIXME: think about error policy @@ -127,7 +128,7 @@ static auto getWorkflowTPCInput(o2::framework::ProcessingContext& pc, int verbos {"check", o2::framework::ConcreteDataTypeMatcher{o2::header::gDataOriginTPC, "CLUSTERNATIVE"}, o2::framework::Lifetime::Timeframe}, }; unsigned long recvMask = 0; - for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { + for (auto const& ref : o2::framework::InputRecordWalker(pc.inputs(), filter)) { auto const* sectorHeader = o2::framework::DataRefUtils::getHeader(ref); if (sectorHeader == nullptr) { throw std::runtime_error("sector header missing on header stack"); From 356d6990cd1fdc13e28dbbb71ad0a4777cd1dd8b Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 21:54:25 +0200 Subject: [PATCH 0370/1914] Add in-place replaceAll method to StrUtils --- Common/Utils/include/CommonUtils/StringUtils.h | 3 +++ Common/Utils/src/StringUtils.cxx | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/Common/Utils/include/CommonUtils/StringUtils.h b/Common/Utils/include/CommonUtils/StringUtils.h index 7a2edbf3b2f53..c68e441d5b1c4 100644 --- a/Common/Utils/include/CommonUtils/StringUtils.h +++ b/Common/Utils/include/CommonUtils/StringUtils.h @@ -146,6 +146,9 @@ struct Str { return s.str(); } + // replace all occurencies of from by to, return count + static int replaceAll(std::string& s, const std::string& from, const std::string& to); + // generate random string of given length, suitable for file names static std::string getRandomString(int length); diff --git a/Common/Utils/src/StringUtils.cxx b/Common/Utils/src/StringUtils.cxx index 03bf68df5a41c..4c0dd30ae6211 100644 --- a/Common/Utils/src/StringUtils.cxx +++ b/Common/Utils/src/StringUtils.cxx @@ -34,6 +34,19 @@ std::vector Str::tokenize(const std::string& src, char delim, bool return tokens; } +// replace all occurencies of from by to, return count +int Str::replaceAll(std::string& s, const std::string& from, const std::string& to) +{ + int count = 0; + size_t pos = 0; + while ((pos = s.find(from, pos)) != std::string::npos) { + s.replace(pos, from.length(), to); + pos += to.length(); // Handles case where 'to' is a substring of 'from' + count++; + } + return count; +} + // generate random string of given lenght, suitable for file names std::string Str::getRandomString(int lenght) { From b75d6433d3513c547588a9f93d0c64d536607311 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 17:08:30 +0200 Subject: [PATCH 0371/1914] o2-ecs-grp-create creates CTP/Config if --original-run passed at SOR Will clone the CTP/Config/Config of (replayed) original-run with the credentials of the new synthetic run --- Detectors/GRP/workflows/CMakeLists.txt | 1 + .../GRP/workflows/src/create-grp-ecs.cxx | 64 +++++++++++++++++-- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/Detectors/GRP/workflows/CMakeLists.txt b/Detectors/GRP/workflows/CMakeLists.txt index ea56cf8270335..1097855a5d579 100644 --- a/Detectors/GRP/workflows/CMakeLists.txt +++ b/Detectors/GRP/workflows/CMakeLists.txt @@ -45,6 +45,7 @@ o2_add_executable(grp-create SOURCES src/create-grp-ecs.cxx PUBLIC_LINK_LIBRARIES O2::DetectorsCommonDataFormats O2::DataFormatsParameters + O2::DataFormatsCTP O2::CommonUtils O2::CCDB Boost::program_options) diff --git a/Detectors/GRP/workflows/src/create-grp-ecs.cxx b/Detectors/GRP/workflows/src/create-grp-ecs.cxx index 95bfb878cee9d..873133e0dd46b 100644 --- a/Detectors/GRP/workflows/src/create-grp-ecs.cxx +++ b/Detectors/GRP/workflows/src/create-grp-ecs.cxx @@ -15,8 +15,10 @@ #include #include #include "DataFormatsParameters/GRPECSObject.h" +#include "DataFormatsCTP/Configuration.h" #include "DetectorsCommonDataFormats/DetID.h" #include "CCDB/CcdbApi.h" +#include "CCDB/BasicCCDBManager.h" #include "CommonUtils/NameConf.h" #include "CommonUtils/StringUtils.h" @@ -31,6 +33,7 @@ enum CCDBRefreshMode { NONE, int createGRPECSObject(const std::string& dataPeriod, int run, + int runOrig, // in case of replay int runTypeI, int nHBPerTF, const std::string& _detsReadout, @@ -44,13 +47,14 @@ int createGRPECSObject(const std::string& dataPeriod, long marginAtSOR, long marginAtEOR, const std::string& ccdbServer = "", + std::string ccdbServerInp = "", const std::string& metaDataStr = "", CCDBRefreshMode refresh = CCDBRefreshMode::NONE) { int retValGLO = 0; int retValRCT = 0; int retValGLOmd = 0; - + int retValCTP = 0; // substitute TRG by CTP std::regex regCTP(R"((^\s*|,\s*)(TRG)(\s*,|\s*$))"); std::string detsReadout{std::regex_replace(_detsReadout, regCTP, "$1CTP$3")}; @@ -78,6 +82,8 @@ int createGRPECSObject(const std::string& dataPeriod, tendVal = tend + marginAtEOR; } GRPECSObject grpecs; + o2::ctp::CTPConfiguration* ctpConfig = nullptr; + o2::ctp::CTPConfiguration ctpConfigNew; grpecs.setTimeStart(tstart); grpecs.setTimeEnd(tend); grpecs.setTimeStartCTP(tstartCTP); @@ -119,10 +125,32 @@ int createGRPECSObject(const std::string& dataPeriod, } }; + if (ccdbServerInp.empty()) { + ccdbServerInp = ccdbServer; + } + if (runOrig > 0 && runOrig != run && tend <= tstart && !ccdbServerInp.empty()) { // create CTP config + try { + auto& bcm = o2::ccdb::BasicCCDBManager::instance(); + bcm.setURL(ccdbServerInp); + bcm.setFatalWhenNull(false); + ctpConfig = bcm.getForRun("CTP/Config/Config", runOrig); + if (!ctpConfig) { + throw std::runtime_error(fmt::format("Failed to access CTP/Config/Config for original run {}", runOrig)); + } + std::string cfstr = ctpConfig->getConfigString(), srun{fmt::format("run {}", run)}, srunOrig{fmt::format("run {}", runOrig)}; + o2::utils::Str::replaceAll(cfstr, srunOrig, srun); + ctpConfigNew.loadConfigurationRun3(cfstr); + ctpConfigNew.setRunNumber(run); + } catch (std::exception e) { + LOGP(error, "Failed to create CTP/Config/Config from the original run {}, reason: {}", runOrig, e.what()); + } + } + toKeyValPairs(metaDataStr); if (!ccdbServer.empty()) { CcdbApi api; + const std::string objPath{"GLO/Config/GRPECS"}; api.init(ccdbServer); metadata["responsible"] = "ECS"; @@ -181,13 +209,33 @@ int createGRPECSObject(const std::string& dataPeriod, } } } + + if (ctpConfig && ctpConfigNew.getRunNumber() == run) { // create CTP config + std::map metadataCTP; + metadataCTP["runNumber"] = fmt::format("{}", run); + metadataCTP["comment"] = fmt::format("cloned from run {}", runOrig); + retValCTP = api.storeAsTFileAny(&ctpConfigNew, "CTP/Config/Config", metadataCTP, tstart, tendVal); + if (retValCTP == 0) { + LOGP(info, "Uploaded to {}/{} with validity {}:{} for SOR:{}/EOR:{}, cloned from run {}", ccdbServer, "CTP/Config/Config", tstart, tendVal, tstart, tend, runOrig); + } else { + LOGP(alarm, "Upload to {}/{} with validity {}:{} for SOR:{}/EOR:{} (cloned from run {}) FAILED, returned with code {}", ccdbServer, "CTP/Config/Config", tstart, tendVal, tstart, tend, runOrig, retValCTP); + } + } } else { // write a local file auto fname = o2::base::NameConf::getGRPECSFileName(); TFile grpF(fname.c_str(), "recreate"); grpF.WriteObjectAny(&grpecs, grpecs.Class(), o2::base::NameConf::CCDBOBJECT.data()); - LOG(info) << "Stored to local file " << fname; + grpF.Close(); + LOGP(info, "Stored GRPECS to local file {}", fname); + if (ctpConfig && ctpConfigNew.getRunNumber() == run) { + std::string ctnpfname = fmt::format("CTPConfig_{}_from_{}.root", run, runOrig); + TFile ctpF(ctnpfname.c_str(), "recreate"); + ctpF.WriteObjectAny(&ctpConfigNew, ctpConfigNew.Class(), o2::base::NameConf::CCDBOBJECT.data()); + ctpF.Close(); + LOGP(info, "Stored CTPConfig to local file {}", ctnpfname); + } } - // + if (refresh != CCDBRefreshMode::NONE && !ccdbServer.empty()) { auto cmd = fmt::format("curl -I -i -s \"{}{}latest/%5Cw%7B3%7D/.*/`date +%s000`/?prepare={}\"", ccdbServer, ccdbServer.back() == '/' ? "" : "/", refresh == CCDBRefreshMode::SYNC ? "sync" : "true"); auto t0 = std::chrono::high_resolution_clock::now(); @@ -195,7 +243,7 @@ int createGRPECSObject(const std::string& dataPeriod, auto t1 = std::chrono::high_resolution_clock::now(); LOGP(info, "Executed [{}] -> {} in {:.3f} s", cmd, res, std::chrono::duration_cast(t1 - t0).count() / 1000.f); } - if (retValGLO != 0 || retValRCT != 0 || retValGLOmd != 0) { + if (retValGLO != 0 || retValRCT != 0 || retValGLOmd != 0 || retValCTP != 0) { return 4; } return 0; @@ -229,10 +277,12 @@ int main(int argc, char** argv) add_option("start-time-ctp", bpo::value()->default_value(0), "run start CTP time in ms, same as ECS if not set or 0"); add_option("end-time-ctp", bpo::value()->default_value(0), "run end CTP time in ms, same as ECS if not set or 0"); add_option("ccdb-server", bpo::value()->default_value("http://alice-ccdb.cern.ch"), "CCDB server for upload, local file if empty"); + add_option("ccdb-server-input", bpo::value()->default_value(""), "CCDB server for inputs (if needed, e.g. CTPConfig), dy default ccdb-server is used"); add_option("meta-data,m", bpo::value()->default_value("")->implicit_value(""), "metadata as key1=value1;key2=value2;.."); add_option("refresh", bpo::value()->default_value("")->implicit_value("async"), R"(refresh server cache after upload: "none" (or ""), "async" (non-blocking) and "sync" (blocking))"); add_option("marginSOR", bpo::value()->default_value(4 * o2::ccdb::CcdbObjectInfo::DAY), "validity at SOR"); add_option("marginEOR", bpo::value()->default_value(10 * o2::ccdb::CcdbObjectInfo::MINUTE), "validity margin to add after EOR"); + add_option("original-run,o", bpo::value()->default_value(0), "if >0, use as the source run to create CTP/Config/Config object"); opt_all.add(opt_general).add(opt_hidden); bpo::store(bpo::command_line_parser(argc, argv).options(opt_all).positional(opt_pos).run(), vm); @@ -253,13 +303,13 @@ int main(int argc, char** argv) } if (vm.count("run") == 0) { std::cerr << "ERROR: " - << "obligator run number is missing" << std::endl; + << "obligatory run number is missing" << std::endl; std::cerr << opt_general << std::endl; exit(3); } if (vm.count("period") == 0) { std::cerr << "ERROR: " - << "obligator data taking period name is missing" << std::endl; + << "obligatory data taking period name is missing" << std::endl; std::cerr << opt_general << std::endl; exit(3); } @@ -278,6 +328,7 @@ int main(int argc, char** argv) int retVal = createGRPECSObject( vm["period"].as(), vm["run"].as(), + vm["original-run"].as(), vm["run-type"].as(), vm["hbf-per-tf"].as(), vm["detectors"].as(), @@ -291,6 +342,7 @@ int main(int argc, char** argv) vm["marginSOR"].as(), vm["marginEOR"].as(), vm["ccdb-server"].as(), + vm["ccdb-server-input"].as(), vm["meta-data"].as(), refresh); From 9e322a95f4f1b2e81c54d71a360a3ef8ca159f0d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 11 Apr 2025 13:38:56 +0200 Subject: [PATCH 0372/1914] DPL Analysis: improve handling of tables with sources (#14172) --- .../Core/include/Framework/AnalysisHelpers.h | 57 +++++++++++++++---- .../Core/include/Framework/AnalysisTask.h | 51 +---------------- Framework/Core/src/WorkflowHelpers.cxx | 18 +++--- 3 files changed, 57 insertions(+), 69 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 985f80cd548bc..55d2490dff1bc 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -13,7 +13,6 @@ #include "Framework/ASoA.h" #include "Framework/DataAllocator.h" -#include "Framework/ExpressionHelpers.h" #include "Framework/IndexBuilderHelpers.h" #include "Framework/InputSpec.h" #include "Framework/Output.h" @@ -28,6 +27,48 @@ #include namespace o2::soa { +template +constexpr auto tableRef2ConfigParamSpec() +{ + return o2::framework::ConfigParamSpec{ + std::string{"input:"} + o2::aod::label(), + framework::VariantType::String, + aod::sourceSpec(), + {"\"\""}}; +} + +namespace +{ +template +inline constexpr auto getSources() +{ + return [] refs>() { + return [](std::index_sequence) { + return std::vector{soa::tableRef2ConfigParamSpec()...}; + }(std::make_index_sequence()); + }.template operator()(); +} + +template +constexpr auto getInputMetadata() -> std::vector +{ + std::vector inputMetadata; + auto inputSources = getSources(); + std::sort(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name < b.name; }); + auto last = std::unique(inputSources.begin(), inputSources.end(), [](framework::ConfigParamSpec const& a, framework::ConfigParamSpec const& b) { return a.name == b.name; }); + inputSources.erase(last, inputSources.end()); + inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); + return inputMetadata; +} + +template + requires(!soa::with_sources) +constexpr auto getInputMetadata() -> std::vector +{ + return {}; +} +} // namespace + template constexpr auto tableRef2InputSpec() { @@ -35,7 +76,9 @@ constexpr auto tableRef2InputSpec() o2::aod::label(), o2::aod::origin(), o2::aod::description(o2::aod::signature()), - R.version}; + R.version, + framework::Lifetime::Timeframe, + getInputMetadata>::metadata>()}; } template @@ -64,16 +107,6 @@ constexpr auto tableRef2OutputRef() o2::aod::label(), R.version}; } - -template -constexpr auto tableRef2ConfigParamSpec() -{ - return o2::framework::ConfigParamSpec{ - std::string{"input:"} + o2::aod::label(), - framework::VariantType::String, - aod::sourceSpec(), - {"\"\""}}; -} } // namespace o2::soa namespace o2::framework diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index bd1a1cfd88954..c7f3da1948c62 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -65,46 +65,6 @@ concept is_enumeration = is_enumeration_v>; // the contents of an AnalysisTask... namespace { struct AnalysisDataProcessorBuilder { - template - static ConfigParamSpec getSpec() - { - if constexpr (soa::has_metadata>) { - return ConfigParamSpec{std::string{"input:"} + aod::MetadataTrait::metadata::tableLabel(), VariantType::String, aod::MetadataTrait::metadata::sourceSpec(), {"\"\""}}; - } else { - using O1 = framework::pack_element_t<0, typename T::originals>; - return ConfigParamSpec{std::string{"input:"} + aod::MetadataTrait::metadata::tableLabel(), VariantType::String, aod::MetadataTrait::metadata::sourceSpec(), {"\"\""}}; - } - } - - template - static ConfigParamSpec getSpec() - { - return soa::tableRef2ConfigParamSpec(); - } - - template - static inline auto getSources() - { - return [] refs>() { - return [](std::index_sequence) { - return std::vector{soa::tableRef2ConfigParamSpec()...}; - }(std::make_index_sequence()); - }.template operator()(); - } - - template - - static auto getInputMetadata() - { - std::vector inputMetadata; - auto inputSources = getSources(); - std::sort(inputSources.begin(), inputSources.end(), [](ConfigParamSpec const& a, ConfigParamSpec const& b) { return a.name < b.name; }); - auto last = std::unique(inputSources.begin(), inputSources.end(), [](ConfigParamSpec const& a, ConfigParamSpec const& b) { return a.name == b.name; }); - inputSources.erase(last, inputSources.end()); - inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); - return inputMetadata; - } - template static void addGroupingCandidates(std::vector& bk, std::vector& bku) { @@ -130,14 +90,9 @@ struct AnalysisDataProcessorBuilder { template static void addOriginalRef(const char* name, bool value, std::vector& inputs) { - using metadata = typename aod::MetadataTrait>::metadata; - std::vector inputMetadata; - inputMetadata.emplace_back(ConfigParamSpec{std::string{"control:"} + name, VariantType::Bool, value, {"\"\""}}); - if constexpr (soa::with_sources) { - auto inputSources = getInputMetadata(); - inputMetadata.insert(inputMetadata.end(), inputSources.begin(), inputSources.end()); - } - DataSpecUtils::updateInputList(inputs, InputSpec{o2::aod::label(), o2::aod::origin(), aod::description(o2::aod::signature()), R.version, Lifetime::Timeframe, inputMetadata}); + auto spec = soa::tableRef2InputSpec(); + spec.metadata.emplace_back(ConfigParamSpec{std::string{"control:"} + name, VariantType::Bool, value, {"\"\""}}); + DataSpecUtils::updateInputList(inputs, std::move(spec)); } /// helpers to append expression information for a single argument diff --git a/Framework/Core/src/WorkflowHelpers.cxx b/Framework/Core/src/WorkflowHelpers.cxx index b18b559fe99fb..652e863f98394 100644 --- a/Framework/Core/src/WorkflowHelpers.cxx +++ b/Framework/Core/src/WorkflowHelpers.cxx @@ -385,6 +385,15 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext auto outputSpecLessThan = [](OutputSpec const& lhs, OutputSpec const& rhs) { return DataSpecUtils::describe(lhs) < DataSpecUtils::describe(rhs); }; std::sort(ac.requestedDYNs.begin(), ac.requestedDYNs.end(), inputSpecLessThan); std::sort(ac.providedDYNs.begin(), ac.providedDYNs.end(), outputSpecLessThan); + + DataProcessorSpec indexBuilder{ + "internal-dpl-aod-index-builder", + {}, + {}, + readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs), + {}}; + AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); + for (auto& input : ac.requestedDYNs) { if (std::none_of(ac.providedDYNs.begin(), ac.providedDYNs.end(), [&input](auto const& x) { return DataSpecUtils::match(input, x); })) { ac.spawnerInputs.emplace_back(input); @@ -397,15 +406,6 @@ void WorkflowHelpers::injectServiceDevices(WorkflowSpec& workflow, ConfigContext {}, readers::AODReaderHelpers::aodSpawnerCallback(ac.spawnerInputs), {}}; - - DataProcessorSpec indexBuilder{ - "internal-dpl-aod-index-builder", - {}, - {}, - readers::AODReaderHelpers::indexBuilderCallback(ac.requestedIDXs), - {}}; - - AnalysisSupportHelpers::addMissingOutputsToBuilder(ac.requestedIDXs, ac.requestedAODs, ac.requestedDYNs, indexBuilder); AnalysisSupportHelpers::addMissingOutputsToSpawner({}, ac.spawnerInputs, ac.requestedAODs, aodSpawner); AnalysisSupportHelpers::addMissingOutputsToReader(ac.providedAODs, ac.requestedAODs, aodReader); From 206d9ab0ef4c905694237d9cb36a5d966e284aaf Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 08:13:44 +0200 Subject: [PATCH 0373/1914] ITS3: fix tracking initialisation (#14154) --- .../ITS3/reconstruction/src/TrackingInterface.cxx | 11 +++++++++++ Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx | 1 - 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx index 10c6b9265a8bb..afb276e956e76 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/TrackingInterface.cxx @@ -14,6 +14,7 @@ #include "ITSBase/GeometryTGeo.h" #include "ITSMFTBase/DPLAlpideParam.h" #include "DetectorsBase/GRPGeomHelper.h" +#include "Framework/DeviceSpec.h" namespace o2::its3 { @@ -31,7 +32,17 @@ void ITS3TrackingInterface::updateTimeDependentParams(framework::ProcessingConte } auto geom = its::GeometryTGeo::Instance(); geom->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::T2GRot, o2::math_utils::TransformType::T2G)); + initialise(); getConfiguration(pc); + if (pc.services().get().inputTimesliceId == 0) { // print settings only for the 1st pipeling + o2::its::VertexerParamConfig::Instance().printKeyValues(); + o2::its::TrackerParamConfig::Instance().printKeyValues(); + const auto& trParams = getTracker()->getParameters(); + for (size_t it = 0; it < trParams.size(); it++) { + const auto& par = trParams[it]; + LOGP(info, "recoIter#{} : {}", it, par.asString()); + } + } } } diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx index 90f94e625d6ea..5b710a3d11fef 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/TrackerSpec.cxx @@ -67,7 +67,6 @@ void TrackerDPL::init(InitContext& ic) mITS3TrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), mChainITS->GetITSTrackerTraits(), mChainITS->GetITSTimeframe()); - mITS3TrackingInterface.initialise(); } void TrackerDPL::stop() From 4be2de6e8316d05db24c5439b56ec6f67ce89e88 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 08:14:35 +0200 Subject: [PATCH 0374/1914] ITS3: add metal layer + fix macros (#13894) --- Detectors/Upgrades/ITS3/CMakeLists.txt | 6 +- Detectors/Upgrades/ITS3/README.md | 105 +++++++- .../ITS3/alignment/src/MisalignmentHits.cxx | 1 - Detectors/Upgrades/ITS3/base/CMakeLists.txt | 12 +- .../ITS3/base/include/ITS3Base/ITS3Params.h | 3 +- ...tionSuperAlpide.h => SegmentationMosaix.h} | 133 ++++----- .../ITS3/base/include/ITS3Base/SpecsV2.h | 150 +++++++---- .../Upgrades/ITS3/base/src/ITS3BaseLinkDef.h | 1 - .../ITS3/base/src/SegmentationSuperAlpide.cxx | 20 -- Detectors/Upgrades/ITS3/data/CMakeLists.txt | 25 ++ .../Upgrades/ITS3/macros/test/CMakeLists.txt | 4 +- .../ITS3/macros/test/CheckClusterSize.C | 24 +- .../ITS3/macros/test/CheckClustersITS3.C | 66 +++-- .../ITS3/macros/test/CheckDigitsDensity.C | 16 +- .../ITS3/macros/test/CheckDigitsITS3.C | 21 +- .../Upgrades/ITS3/macros/test/CheckHits.C | 2 - ...erAlpideSegment.C => CheckMosaixSegment.C} | 74 +++-- ...gmentTrans.C => CheckMosaixSegmentTrans.C} | 42 ++- .../ITS3/macros/test/CheckTileNumbering.C | 6 +- .../macros/test/CompareClustersAndDigits.C | 39 +-- .../ITS3/macros/test/CreateDictionariesITS3.C | 115 +++++--- .../ITS3/macros/test/TestSensorGeometry.C | 19 +- .../BuildTopologyDictionary.h | 48 ++-- .../include/ITS3Reconstruction/Clusterer.h | 10 +- .../include/ITS3Reconstruction/IOUtils.h | 23 +- .../include/ITS3Reconstruction/LookUp.h | 20 +- .../ITS3Reconstruction/TopologyDictionary.h | 124 +++++---- .../src/BuildTopologyDictionary.cxx | 254 +++++++++++------- .../ITS3/reconstruction/src/Clusterer.cxx | 21 +- .../ITS3/reconstruction/src/IOUtils.cxx | 3 - .../src/ITS3ReconstructionLinkDef.h | 1 + .../ITS3/reconstruction/src/LookUp.cxx | 21 +- .../reconstruction/src/TopologyDictionary.cxx | 176 ++++++++---- .../Upgrades/ITS3/simulation/CMakeLists.txt | 4 +- .../DescriptorInnerBarrelITS3.h | 6 +- .../include/ITS3Simulation/DigiParams.h | 45 ++++ .../include/ITS3Simulation/Digitizer.h | 31 ++- .../include/ITS3Simulation/ITS3Layer.h | 25 +- .../src/DescriptorInnerBarrelITS3.cxx | 4 +- .../ITS3/simulation/src/DigiParams.cxx | 40 +++ .../ITS3/simulation/src/Digitizer.cxx | 111 +++++--- .../ITS3/simulation/src/ITS3Layer.cxx | 54 ++-- .../simulation/src/ITS3SimulationLinkDef.h | 1 + 43 files changed, 1197 insertions(+), 709 deletions(-) rename Detectors/Upgrades/ITS3/base/include/ITS3Base/{SegmentationSuperAlpide.h => SegmentationMosaix.h} (54%) delete mode 100644 Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx create mode 100644 Detectors/Upgrades/ITS3/data/CMakeLists.txt rename Detectors/Upgrades/ITS3/macros/test/{CheckSuperAlpideSegment.C => CheckMosaixSegment.C} (78%) rename Detectors/Upgrades/ITS3/macros/test/{CheckSuperAlpideSegmentTrans.C => CheckMosaixSegmentTrans.C} (85%) create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h create mode 100644 Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx diff --git a/Detectors/Upgrades/ITS3/CMakeLists.txt b/Detectors/Upgrades/ITS3/CMakeLists.txt index 6965061571da6..73ad4b9d53e37 100644 --- a/Detectors/Upgrades/ITS3/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/CMakeLists.txt @@ -9,11 +9,13 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -#add_compile_options(-O0 -g -fPIC) +#add_compile_options(-O0 -g -fPIC -fsanitize=address) +#add_link_options(-fsanitize=address) -add_subdirectory(macros) +add_subdirectory(data) add_subdirectory(simulation) add_subdirectory(alignment) add_subdirectory(base) add_subdirectory(workflow) add_subdirectory(reconstruction) +add_subdirectory(macros) diff --git a/Detectors/Upgrades/ITS3/README.md b/Detectors/Upgrades/ITS3/README.md index 6d3b0d8d821fb..afcea6a5c3e17 100644 --- a/Detectors/Upgrades/ITS3/README.md +++ b/Detectors/Upgrades/ITS3/README.md @@ -35,7 +35,7 @@ export ALICEO2_CCDB_LOCALCACHE=${PWD}/ccdb Simulate diamond -``` bash +```bash # append to o2-sim --configKeyValues="Diamond.width[2]=6.;"" ``` @@ -86,13 +86,27 @@ TODO ```bash # Create Full Geometry -o2-sim -g pythia8pp -j10 --detectorList ALICE2.1 --run 303901 -n0 +o2-sim --detectorList ALICE2.1 --run 303901 -n0 cp o2sim_geometry.root ${ALICEO2_CCDB_LOCALCACHE}/GLO/Config/Geometry/snapshot.root o2-create-aligned-geometry-workflow -b --configKeyValues "HBFUtils.startTime=1547978230000" --condition-remap="file://${ALICEO2_CCDB_LOCALCACHE}=GLO/Config/Geometry" cp o2sim_geometry-aligned.root ${ALICEO2_CCDB_LOCALCACHE}/GLO/Config/GeometryAligned/snapshot.root cp its_GeometryTGeo.root ${ALICEO2_CCDB_LOCALCACHE}/ITS/Config/Geometry/snapshot.root ``` +or copying the ideal geometry to the aligned one and: + +```cpp +{ + o2::base::GeometryManager::loadGeometry(""); + auto itsTGeo = o2::its::GeometryTGeo::Instance(); + itsTGeo->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, o2::math_utils::TransformType::L2G, o2::math_utils::TransformType::T2GRot)); + TFile outF("its_GeometryTGeo.root", "recreate"); + outF.WriteObjectAny(itsTGeo, "o2::its::GeometryTGeo", "ccdb_object"); + outF.Close(); + itsTGeo->destroy(); +} +``` + ### Regenerating the TopologyDictionary 1. Clusterization w/o tracking @@ -158,7 +172,7 @@ The file `hijing.C` can be found [here](https://alice.its.cern.ch/jira/browse/AO 2. (optional) Run the macro `CreateITS3StaticDeadMap.C` and/or visualize with `CheckTileNumbering.C` 3. Move the ccdb object into `${ALICEO2_CCDB_LOCALCACHE}/IT3/Calib/DeadMap`, this is not optional since there is no default object uploaded 4. Run digitizer with `ITS3Params.useDeadChannelMap=true;`, e.g.: -``` bash +```bash o2-sim-digitizer-workflow --configKeyValues="ITS3Params.useDeadChannelMap=true;" ``` @@ -168,6 +182,89 @@ o2-sim-digitizer-workflow --configKeyValues="ITS3Params.useDeadChannelMap=true;" 1. Create misalignment parameters with `CreateMisalignmentITS3.C` 2. Visualize with `ShowCoefficients.C` 3. Run digitizer -``` bash +```bash o2-sim-digitizer-workflow -b --configKeyValues="ITS3Params.applyMisalignmentHits=true;ITS3Params.misalignmentHitsParams=misparams.root" ``` + + +### Misc +#### Setup to run SIM+DIGIT+TRACKING +```bash + +#!/bin/bash + +export IGNORE_VALIDITYCHECK_OF_CCDB_LOCALCACHE=1 +export ALICEO2_CCDB_LOCALCACHE=$PWD/ccdb + +BASE_DIR="batch_" +TOTAL_DIRS=4 +SIM_CMD="o2-sim -g pythia8pp --detectorList ALICE2.1 -m IT3 --run 303901 -n2000 --field ccdb -j8" +DIGIT_CMD="o2-sim-digitizer-workflow -b --interactionRate 675000 --run --configKeyValues=\"HBFUtils.runNumber=303901;HBFUtils.nHBFPerTF=32;ITSAlpideParam.roFrameLengthInBC=198\"" +RECO_CMD="o2-its3-reco-workflow -b --run --configKeyValues=\"ITSVertexerParam.phiCut=0.5;ITSVertexerParam.clusterContributorsCut=3;ITSVertexerParam.tanLambdaCut=0.2;ITSCATrackerParam.useTrackFollower=0;ITSCATrackerParam.findShortTracks=1;HBFUtils.runNumber=303901;HBFUtils.nHBFPerTF=32;ITSAlpideParam.roFrameLengthInBC=198\" --tracking-mode async" + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/sim_done" ]; then + echo "Skipping SIM ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing SIM command in ${DIR}..." + eval $SIM_CMD >sim.log + + touch sim_done + + cd .. +done + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/digit_done" ]; then + echo "Skipping DIGIT ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing DIGIT command in ${DIR}..." + eval $DIGIT_CMD >digit.log + + touch digit_done + + cd .. +done + +for ((i = 1; i <= TOTAL_DIRS; i++)); do + DIR="${BASE_DIR}${i}" + + if [ ! -d "$DIR" ]; then + mkdir "$DIR" + fi + + if [ -f "${DIR}/reco_done" ]; then + echo "Skipping RECO ${DIR} because _done exists." + continue + fi + + cd "$DIR" + + echo "Executing RECO command in ${DIR}..." + eval $RECO_CMD >reco.log + + touch reco_done + + cd .. +done +``` diff --git a/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx b/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx index fbc0b5d623dca..66ab4c8090b54 100644 --- a/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx +++ b/Detectors/Upgrades/ITS3/alignment/src/MisalignmentHits.cxx @@ -10,7 +10,6 @@ // or submit itself to any jurisdiction. #include "ITS3Align/MisalignmentHits.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/ITS3Params.h" #include "SimConfig/DigiParams.h" #include "DetectorsBase/Propagator.h" diff --git a/Detectors/Upgrades/ITS3/base/CMakeLists.txt b/Detectors/Upgrades/ITS3/base/CMakeLists.txt index 8695e2323bbab..306226e5088cf 100644 --- a/Detectors/Upgrades/ITS3/base/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/base/CMakeLists.txt @@ -9,11 +9,9 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -o2_add_library(ITS3Base - SOURCES src/SegmentationSuperAlpide.cxx - src/ITS3Params.cxx - PUBLIC_LINK_LIBRARIES O2::CommonConstants O2::MathUtils O2::DetectorsBase) +o2_add_library( + ITS3Base + SOURCES src/ITS3Params.cxx + PUBLIC_LINK_LIBRARIES O2::CommonConstants O2::MathUtils O2::DetectorsBase) -o2_target_root_dictionary(ITS3Base - HEADERS include/ITS3Base/SegmentationSuperAlpide.h - include/ITS3Base/ITS3Params.h) +o2_target_root_dictionary(ITS3Base HEADERS include/ITS3Base/ITS3Params.h) diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h index c685bf0f085d6..0bd548cef953d 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/ITS3Params.h @@ -25,7 +25,8 @@ struct ITS3Params : public o2::conf::ConfigurableParamHelper { bool misalignmentHitsUseProp{false}; // Use propagtor for mis-alignment std::string globalGeoMisAlignerMacro{"${O2_ROOT}/share/macro/MisAlignGeoITS3.C"}; // Path to macro for global geometry mis-alignment // Chip studies - bool useDeadChannelMap{false}; // Query for a dead channel map to study disabling individual tiles + bool useDeadChannelMap{false}; // Query for a dead channel map to study disabling individual tiles + std::string chipResponseFunction{"APTS"}; // Chip response function one of "Alpide", "APTS" or "Mosaix" (not yet available) O2ParamDef(ITS3Params, "ITS3Params"); }; diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h similarity index 54% rename from Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h rename to Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h index dbdf90574ce5d..f8d4a784120a0 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationSuperAlpide.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h @@ -9,30 +9,39 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file SegmentationSuperAlpide.h -/// \brief Definition of the SegmentationSuperAlpide class +/// \file SegmentationMosaix.h +/// \brief Definition of the SegmentationMosaix class /// \author felix.schlepper@cern.ch -#ifndef ALICEO2_ITS3_SEGMENTATIONSUPERALPIDE_H_ -#define ALICEO2_ITS3_SEGMENTATIONSUPERALPIDE_H_ +#ifndef ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ +#define ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ + +#include #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" -#include "Rtypes.h" - -#include namespace o2::its3 { /// Segmentation and response for pixels in ITS3 upgrade -class SegmentationSuperAlpide +class SegmentationMosaix { // This class defines the segmenation of the pixelArray in the tile. We define // two coordinate systems, one width x,z detector local coordianates (cm) and // the more natural row,col layout: Also all the transformation between these // two. The class provides the transformation from the tile to TGeo // coordinates. + // In fact there exist three coordinate systems and one is transient. + // 1. The curved coordinate system. The chip's local coordinate system is + // defined with its center at the the mid-point of the tube. + // 2. The flat coordinate system. This is the tube segment projected onto a flat + // surface. In the projection we implicitly assume that the inner and outer + // stretch does not depend on the radius. + // Additionally, there is a difference between the flat geometrical center + // and the phyiscal center defined by the metal layer. + // 3. The detector coordinate system. Defined by the row and column segmentation + // defined at the upper edge in the flat coord. // row,col=0 // | @@ -53,25 +62,32 @@ class SegmentationSuperAlpide // | | | // x----------------------x public: - virtual ~SegmentationSuperAlpide() = default; - SegmentationSuperAlpide(const SegmentationSuperAlpide&) = default; - SegmentationSuperAlpide(SegmentationSuperAlpide&&) = delete; - SegmentationSuperAlpide& operator=(const SegmentationSuperAlpide&) = delete; - SegmentationSuperAlpide& operator=(SegmentationSuperAlpide&&) = delete; - constexpr SegmentationSuperAlpide(int layer) : mLayer{layer} {} - - static constexpr int mNCols{constants::pixelarray::nCols}; - static constexpr int mNRows{constants::pixelarray::nRows}; - static constexpr int nPixels{mNCols * mNRows}; - static constexpr float mLength{constants::pixelarray::length}; - static constexpr float mWidth{constants::pixelarray::width}; - static constexpr float mPitchCol{constants::pixelarray::length / static_cast(mNCols)}; - static constexpr float mPitchRow{constants::pixelarray::width / static_cast(mNRows)}; - static constexpr float mSensorLayerThickness{constants::thickness}; - static constexpr float mSensorLayerThicknessEff{constants::effThickness}; - static constexpr std::array mRadii{constants::radii}; - - /// Transformation from the curved surface to a flat surface + constexpr SegmentationMosaix(int layer) : mRadius(static_cast(constants::radiiMiddle[layer])) {} + constexpr ~SegmentationMosaix() = default; + constexpr SegmentationMosaix(const SegmentationMosaix&) = default; + constexpr SegmentationMosaix(SegmentationMosaix&&) = delete; + constexpr SegmentationMosaix& operator=(const SegmentationMosaix&) = default; + constexpr SegmentationMosaix& operator=(SegmentationMosaix&&) = delete; + + static constexpr int NCols{constants::pixelarray::nCols}; + static constexpr int NRows{constants::pixelarray::nRows}; + static constexpr int NPixels{NCols * NRows}; + static constexpr float Length{constants::pixelarray::length}; + static constexpr float LengthH{Length / 2.f}; + static constexpr float Width{constants::pixelarray::width}; + static constexpr float WidthH{Width / 2.f}; + static constexpr float PitchCol{constants::pixelarray::pixels::mosaix::pitchZ}; + static constexpr float PitchRow{constants::pixelarray::pixels::mosaix::pitchX}; + static constexpr float SensorLayerThickness{constants::totalThickness}; + static constexpr float NominalYShift{constants::nominalYShift}; + + /// Transformation from the curved surface to a flat surface. + /// Additionally a shift in the flat coordinates must be applied because + /// the center of the TGeoShap when projected will be higher than the + /// physical thickness of the chip (we add an additional hull to account for + /// the copper metal interconnection which is in reality part of the chip but in our + /// simulation the silicon and metal layer are separated). Thus we shift the projected center + /// down by this difference to align the coordinate systems. /// \param xCurved Detector local curved coordinate x in cm with respect to /// the center of the sensitive volume. /// \param yCurved Detector local curved coordinate y in cm with respect to @@ -80,18 +96,20 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param yFlat Detector local flat coordinate y in cm with respect to /// the center of the sensitive volume. - void curvedToFlat(const float xCurved, const float yCurved, float& xFlat, float& yFlat) const noexcept + constexpr void curvedToFlat(const float xCurved, const float yCurved, float& xFlat, float& yFlat) const noexcept { - // MUST align the flat surface with the curved surface with the original pixel array is on + // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal + // stack float dist = std::hypot(xCurved, yCurved); - float phiReadout = constants::tile::readout::width / constants::radii[mLayer]; float phi = std::atan2(yCurved, xCurved); - xFlat = mRadii[mLayer] * (phi - phiReadout) - constants::pixelarray::width / 2.; - yFlat = dist - mRadii[mLayer]; + xFlat = (mRadius * phi) - WidthH; + // the y position is in the silicon volume however we need the chip volume (silicon+metalstack) + // this is accounted by a y shift + yFlat = dist - mRadius + NominalYShift; } /// Transformation from the flat surface to a curved surface - /// It works only if the detector is not rototraslated + /// It works only if the detector is not rototraslated. /// \param xFlat Detector local flat coordinate x in cm with respect to /// the center of the sensitive volume. /// \param yFlat Detector local flat coordinate y in cm with respect to @@ -100,13 +118,15 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param yCurved Detector local curved coordinate y in cm with respect to /// the center of the sensitive volume. - void flatToCurved(float xFlat, float yFlat, float& xCurved, float& yCurved) const noexcept + constexpr void flatToCurved(float xFlat, float yFlat, float& xCurved, float& yCurved) const noexcept { - // MUST align the flat surface with the curved surface with the original pixel array is on - float dist = yFlat + mRadii[mLayer]; - float phiReadout = constants::tile::readout::width / mRadii[mLayer]; - xCurved = dist * std::cos(phiReadout + (xFlat + constants::pixelarray::width / 2.) / mRadii[mLayer]); - yCurved = dist * std::sin(phiReadout + (xFlat + constants::pixelarray::width / 2.) / mRadii[mLayer]); + // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal + // stack + // the y position is in the chip volume however we need the silicon volume + // this is accounted by a -y shift + float dist = yFlat - NominalYShift + mRadius; + xCurved = dist * std::cos((xFlat + WidthH) / mRadius); + yCurved = dist * std::sin((xFlat + WidthH) / mRadius); } /// Transformation from Geant detector centered local coordinates (cm) to @@ -120,7 +140,7 @@ class SegmentationSuperAlpide /// the center of the sensitive volume. /// \param int iRow Detector x cell coordinate. /// \param int iCol Detector z cell coordinate. - bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept + constexpr bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { localToDetectorUnchecked(xRow, zCol, iRow, iCol); if (!isValid(iRow, iCol)) { @@ -131,11 +151,10 @@ class SegmentationSuperAlpide } // Same as localToDetector w.o. checks. - void localToDetectorUnchecked(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept + constexpr void localToDetectorUnchecked(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { - namespace cp = constants::pixelarray; - iRow = std::floor((cp::width / 2. - xRow) / mPitchRow); - iCol = std::floor((zCol + cp::length / 2.) / mPitchCol); + iRow = static_cast(std::floor((WidthH - xRow) / PitchRow)); + iCol = static_cast(std::floor((zCol + LengthH) / PitchCol)); } /// Transformation from Detector cell coordinates to Geant detector centered @@ -148,7 +167,7 @@ class SegmentationSuperAlpide /// center of the sensitive volume. /// If iRow and or iCol is outside of the segmentation range a value of -0.5*Dx() /// or -0.5*Dz() is returned. - bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + constexpr bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept { if (!isValid(iRow, iCol)) { return false; @@ -159,11 +178,10 @@ class SegmentationSuperAlpide // Same as detectorToLocal w.o. checks. // We position ourself in the middle of the pixel. - void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + constexpr void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept { - namespace cp = constants::pixelarray; - xRow = -(iRow + 0.5) * mPitchRow + cp::width / 2.; - zCol = (iCol + 0.5) * mPitchCol - cp::length / 2.; + xRow = -(static_cast(iRow) + 0.5f) * PitchRow + WidthH; + zCol = (static_cast(iCol) + 0.5f) * PitchCol - LengthH; } bool detectorToLocal(int const row, int const col, math_utils::Point3D& loc) const noexcept @@ -172,7 +190,7 @@ class SegmentationSuperAlpide if (!detectorToLocal(row, col, xRow, zCol)) { return false; } - loc.SetCoordinates(xRow, 0., zCol); + loc.SetCoordinates(xRow, NominalYShift, zCol); return true; } @@ -180,28 +198,23 @@ class SegmentationSuperAlpide { float xRow{0.}, zCol{0.}; detectorToLocalUnchecked(row, col, xRow, zCol); - loc.SetCoordinates(xRow, 0., zCol); + loc.SetCoordinates(xRow, NominalYShift, zCol); } private: template - [[nodiscard]] bool isValid(T const row, T const col) const noexcept + [[nodiscard]] constexpr bool isValid(T const row, T const col) const noexcept { if constexpr (std::is_floating_point_v) { // compares in local coord. - namespace cp = constants::pixelarray; - return !static_cast(row <= -cp::width / 2. || cp::width / 2. <= row || col <= -cp::length / 2. || cp::length / 2. <= col); + return (-WidthH < row && row < WidthH && -LengthH < col && col < LengthH); } else { // compares in rows/cols - return !static_cast(row < 0 || row >= static_cast(mNRows) || col < 0 || col >= static_cast(mNCols)); + return !static_cast(row < 0 || row >= static_cast(NRows) || col < 0 || col >= static_cast(NCols)); } } - const int mLayer{0}; ///< chip layer - - ClassDef(SegmentationSuperAlpide, 1); + float mRadius; }; -/// Segmentation array -extern const std::array SuperSegmentations; } // namespace o2::its3 #endif diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h index d3efde58d0e0d..fedaad9182cce 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h @@ -21,114 +21,164 @@ #include +// This files defines the design specifications of the chip. +// Each TGeoShape has the following properties +// length: dimension in z-axis +// width: dimension in xy-axes +// color: for visulisation namespace o2::its3::constants { -constexpr float cm{1e+2}; // This is the default unit of TGeo so we use this as scale -constexpr float mu{1e-6 * cm}; -constexpr float mm{1e-3 * cm}; +constexpr double cm{1e+2}; // This is the default unit of TGeo so we use this as scale +constexpr double mu{1e-6 * cm}; +constexpr double mm{1e-3 * cm}; namespace pixelarray { -constexpr float width{9.197 * mm}; -constexpr float length{3.571 * mm}; +constexpr double width{9.197 * mm}; +constexpr double length{3.571 * mm}; constexpr int nCols{156}; constexpr int nRows{442}; constexpr int nPixels{nRows * nCols}; constexpr EColor color{kGreen}; -constexpr float area{width * length}; +constexpr double area{width * length}; } // namespace pixelarray namespace tile { namespace biasing { -constexpr float width{0.06 * mm}; -constexpr float length{3.571 * mm}; +constexpr double width{0.06 * mm}; +constexpr double length{3.571 * mm}; constexpr EColor color{kYellow}; static_assert(length == pixelarray::length); } // namespace biasing namespace powerswitches { -constexpr float width{9.257 * mm}; -constexpr float length{0.02 * mm}; -constexpr float z{pixelarray::width}; +constexpr double width{9.257 * mm}; +constexpr double length{0.02 * mm}; +constexpr double z{pixelarray::width}; constexpr EColor color{kBlue}; } // namespace powerswitches namespace readout { -constexpr float width{0.525 * mm}; -constexpr float length{3.591 * mm}; +constexpr double width{0.525 * mm}; +constexpr double length{3.591 * mm}; constexpr EColor color{kMagenta}; static_assert(length == (biasing::length + powerswitches::length)); } // namespace readout -constexpr float length{readout::length}; -constexpr float width{powerswitches::width + readout::width}; +constexpr double length{readout::length}; +constexpr double width{powerswitches::width + readout::width}; } // namespace tile namespace rsu { namespace databackbone { -constexpr float width{9.782 * mm}; -constexpr float length{0.06 * mm}; +constexpr double width{9.782 * mm}; +constexpr double length{0.06 * mm}; constexpr EColor color{kRed}; } // namespace databackbone -constexpr float width{19.564 * mm}; -constexpr float length{21.666 * mm}; +constexpr double width{19.564 * mm}; +constexpr double length{21.666 * mm}; constexpr unsigned int nTiles{12}; } // namespace rsu namespace segment { -constexpr float width{rsu::width}; +constexpr double width{rsu::width}; namespace lec { -constexpr float width{segment::width}; -constexpr float length{4.5 * mm}; +constexpr double width{segment::width}; +constexpr double length{4.5 * mm}; constexpr EColor color{kCyan}; } // namespace lec namespace rec { -constexpr float width{segment::width}; -constexpr float length{1.5 * mm}; +constexpr double width{segment::width}; +constexpr double length{1.5 * mm}; constexpr EColor color{kCyan}; } // namespace rec constexpr unsigned int nRSUs{12}; constexpr unsigned int nTilesPerSegment{nRSUs * rsu::nTiles}; -constexpr float length{nRSUs * rsu::length + lec::length + rec::length}; -constexpr float lengthSensitive{nRSUs * rsu::length}; +constexpr double length{(nRSUs * rsu::length) + lec::length + rec::length}; +constexpr double lengthSensitive{nRSUs * rsu::length}; } // namespace segment namespace carbonfoam { // TODO: Waiting for the further information from WP5(Corrado) -constexpr float longeronsWidth{2.0 * mm}; // what is the height of the longerons? -constexpr float longeronsLength{263 * mm}; // from blueprint -constexpr float HringLength{6.0 * mm}; // from blueprint -constexpr float edgeBetwChipAndFoam{1.0 * mm}; // from blueprint but not used cause forms are already overlapping -constexpr float gapBetwHringsLongerons{0.05 * mm}; // from blueprint -constexpr std::array nHoles{11, 11, 11}; // how many holes for each layer? -constexpr std::array radiusHoles{1.0 * mm, 1.0 * mm, 2.0 * mm}; // what is the radius of the holes for each layer? +constexpr double longeronsWidth{2.0 * mm}; // what is the height of the longerons? +constexpr double longeronsLength{263 * mm}; // from blueprint +constexpr double HringLength{6.0 * mm}; // from blueprint +constexpr double edgeBetwChipAndFoam{1.0 * mm}; // from blueprint but not used cause forms are already overlapping +constexpr double gapBetwHringsLongerons{0.05 * mm}; // from blueprint +constexpr std::array nHoles{11, 11, 11}; // how many holes for each layer? +constexpr std::array radiusHoles{1.0 * mm, 1.0 * mm, 2.0 * mm}; // what is the radius of the holes for each layer? constexpr EColor color{kGray}; } // namespace carbonfoam +namespace metalstack +{ +constexpr double thickness{5 * mu}; // physical thickness of the copper metal stack +constexpr double length{segment::length}; +constexpr double width{segment::width}; +constexpr EColor color{kBlack}; +} // namespace metalstack +namespace silicon +{ +constexpr double thickness{45 * mu}; // thickness of silicon +constexpr double thicknessIn{(thickness + metalstack::thickness) / 2.}; // inner silicon thickness +constexpr double thicknessOut{(thickness - metalstack::thickness) / 2.}; // outer silicon thickness +} // namespace silicon constexpr unsigned int nLayers{3}; constexpr unsigned int nTotLayers{7}; constexpr unsigned int nSensorsIB{2 * nLayers}; -constexpr float equatorialGap{1 * mm}; +constexpr double equatorialGap{1 * mm}; constexpr std::array nSegments{3, 4, 5}; -constexpr float thickness{50 * mu}; //< Physical Thickness of chip -constexpr float effThickness{66 * mu}; //< Physical thickness + metal substrate -constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * mm}; // middle radius e.g. inner radius+thickness/2. -constexpr std::array radiiInner{radii[0] - thickness / 2.f, radii[1] - thickness / 2.f, radii[2] - thickness / 2.f}; // inner radius -constexpr std::array radiiOuter{radii[0] + thickness / 2.f, radii[1] + thickness / 2.f, radii[2] + thickness / 2.f}; // inner radius +constexpr double totalThickness{silicon::thickness + metalstack::thickness}; // total chip thickness +constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * mm}; // nominal radius +constexpr std::array radiiInner{radii[0] - silicon::thicknessIn, radii[1] - silicon::thicknessIn, radii[2] - silicon::thicknessIn}; // inner silicon radius +constexpr std::array radiiOuter{radii[0] + silicon::thicknessOut, radii[1] + silicon::thicknessOut, radii[2] + silicon::thicknessOut}; // outer silicon radius +constexpr std::array radiiMiddle{(radiiInner[0] + radiiOuter[0]) / 2., (radiiInner[1] + radiiOuter[1]) / 2., (radiiInner[2] + radiiOuter[2]) / 2.}; // middle silicon radius +constexpr double nominalYShift{-metalstack::thickness / 2.}; // shift to position in silicion volume to the chip volume (silicon+metalstack) + +// extra information of pixels and their response functions +namespace pixelarray::pixels +{ +namespace mosaix +{ +constexpr double pitchX{width / static_cast(nRows)}; +constexpr double pitchZ{length / static_cast(nCols)}; +} // namespace mosaix +namespace apts +{ +constexpr double pitchX{15.0 * mu}; +constexpr double pitchZ{15.0 * mu}; +constexpr double responseUpperLimit{10 * mu}; +constexpr double responseYShift{responseUpperLimit - silicon::thicknessOut}; +} // namespace apts +namespace moss +{ +namespace top +{ +constexpr double pitchX{22.5 * mu}; +constexpr double pitchZ{22.5 * mu}; +} // namespace top +namespace bot +{ +constexpr double pitchX{18.0 * mu}; +constexpr double pitchZ{18.0 * mu}; +} // namespace bot +} // namespace moss +} // namespace pixelarray::pixels + namespace detID { -constexpr unsigned int mDetIDs{2 * 12 * 12 * 12}; //< 2 Hemispheres * (3,4,5=12 segments in a layer) * 12 RSUs in a segment * 12 Tiles in a RSU -constexpr unsigned int l0IDStart{0}; //< Start DetID layer 0 -constexpr unsigned int l0IDEnd{2 * 3 * 12 * 12 - 1}; //< End First DetID layer 0; inclusive range -constexpr unsigned int l0IDTot{2 * 3 * 12 * 12}; //< Total DetID in Layer 0 -constexpr unsigned int l1IDStart{l0IDEnd + 1}; //< Start DetID layer 1 -constexpr unsigned int l1IDEnd{l1IDStart + 2 * 4 * 12 * 12 - 1}; //< End First DetID layer 1; inclusive range -constexpr unsigned int l1IDTot{2 * 4 * 12 * 12}; //< Total DetID in Layer 1 -constexpr unsigned int l2IDStart{l1IDEnd + 1}; //< Start DetID layer 2 -constexpr unsigned int l2IDEnd{l2IDStart + 2 * 5 * 12 * 12 - 1}; //< End First DetID layer 2; inclusive range -constexpr unsigned int l2IDTot{2 * 5 * 12 * 12}; //< Total DetID in Layer 2 -constexpr unsigned int nChips{l2IDEnd + 1}; //< number of Chips (PixelArrays) in IB +constexpr unsigned int mDetIDs{2 * 12 * 12 * 12}; //< 2 Hemispheres * (3,4,5=12 segments in a layer) * 12 RSUs in a segment * 12 Tiles in a RSU +constexpr unsigned int l0IDStart{0}; //< Start DetID layer 0 +constexpr unsigned int l0IDEnd{(2 * 3 * 12 * 12) - 1}; //< End First DetID layer 0; inclusive range +constexpr unsigned int l0IDTot{2 * 3 * 12 * 12}; //< Total DetID in Layer 0 +constexpr unsigned int l1IDStart{l0IDEnd + 1}; //< Start DetID layer 1 +constexpr unsigned int l1IDEnd{l1IDStart + (2 * 4 * 12 * 12) - 1}; //< End First DetID layer 1; inclusive range +constexpr unsigned int l1IDTot{2 * 4 * 12 * 12}; //< Total DetID in Layer 1 +constexpr unsigned int l2IDStart{l1IDEnd + 1}; //< Start DetID layer 2 +constexpr unsigned int l2IDEnd{l2IDStart + (2 * 5 * 12 * 12) - 1}; //< End First DetID layer 2; inclusive range +constexpr unsigned int l2IDTot{2 * 5 * 12 * 12}; //< Total DetID in Layer 2 +constexpr unsigned int nChips{l2IDEnd + 1}; //< number of Chips (PixelArrays) in IB template inline T getDetID2Layer(T detID) diff --git a/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h b/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h index dc0557824e0f8..144711b052a1b 100644 --- a/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h +++ b/Detectors/Upgrades/ITS3/base/src/ITS3BaseLinkDef.h @@ -15,7 +15,6 @@ #pragma link off all classes; #pragma link off all functions; -#pragma link C++ class o2::its3::SegmentationSuperAlpide + ; #pragma link C++ class o2::its3::ITS3Params + ; #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::its3::ITS3Params> + ; diff --git a/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx b/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx deleted file mode 100644 index 26ca09f351bec..0000000000000 --- a/Detectors/Upgrades/ITS3/base/src/SegmentationSuperAlpide.cxx +++ /dev/null @@ -1,20 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#include "ITS3Base/SegmentationSuperAlpide.h" - -ClassImp(o2::its3::SegmentationSuperAlpide); - -namespace o2::its3 -{ - -const std::array SuperSegmentations{0, 1, 2}; -} diff --git a/Detectors/Upgrades/ITS3/data/CMakeLists.txt b/Detectors/Upgrades/ITS3/data/CMakeLists.txt new file mode 100644 index 0000000000000..ba8b60c8aa7eb --- /dev/null +++ b/Detectors/Upgrades/ITS3/data/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright 2019-2020 CERN and copyright holders of ALICE O2. +# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +# All rights not expressly granted are reserved. +# +# This software is distributed under the terms of the GNU General Public +# License v3 (GPL Version 3), copied verbatim in the file "COPYING". +# +# In applying this license CERN does not waive the privileges and immunities +# granted to it by virtue of its status as an Intergovernmental Organization +# or submit itself to any jurisdiction. + +add_custom_target( + GenerateAPTSResponse ALL + COMMAND + ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -c APTS -i + ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ -o + ${CMAKE_CURRENT_BINARY_DIR}/ + BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root + DEPENDS GenerateAlpideResponse + COMMENT "Generating APTSResponseData.root") +install( + FILES "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root" + DESTINATION + "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" +) diff --git a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt index bdd0329c55ecd..39e435f0ba2e6 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt @@ -19,8 +19,8 @@ its3_add_macro(CheckHits.C) its3_add_macro(CheckDigitsDensity.C) its3_add_macro(CheckClusterSize.C) its3_add_macro(CompareClusterSize.C) -its3_add_macro(CheckSuperAlpideSegment.C) -its3_add_macro(CheckSuperAlpideSegmentTrans.C) +its3_add_macro(CheckMosaixSegment.C) +its3_add_macro(CheckMosaixSegmentTrans.C) its3_add_macro(CompareClustersAndDigits.C) its3_add_macro(CheckROFs.C) its3_add_macro(CheckTileNumbering.C) diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C b/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C index addaaf47269d2..564b20350b883 100755 --- a/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckClusterSize.C @@ -43,6 +43,7 @@ #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCEventHeader.h" #include "SimulationDataFormat/MCTrack.h" +#include "ITS3Base/SpecsV2.h" #endif #define ENABLE_UPGRADES #include "SimulationDataFormat/MCTruthContainer.h" @@ -65,7 +66,11 @@ void checkFile(const std::unique_ptr& file); inline auto hist_map(unsigned short id) { - return std::clamp(id, static_cast(0), static_cast(6)) / 2; + int lay = o2::its3::constants::detID::getDetID2Layer(id); + if (lay == -1) { + return nLayers - 1; + } + return lay; } void CheckClusterSize(std::string clusFileName = "o2clus_its.root", @@ -133,7 +138,7 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", std::vector hOtherSecondaryEta; std::vector hOtherSecondaryPt; std::vector hOtherSecondaryPhi; - for (int i = 0; i < 4; ++i) { + for (int i = 0; i < nLayers; ++i) { hPrimary.emplace_back(Form("primary/L%d", i), Form("L%d Primary Cluster Size", i), maxClusterSize, 0, maxClusterSize); hPrimaryEta.emplace_back(Form("primary/EtaL%d", i), Form("L%d Primary Cluster Size vs Eta", i), maxClusterSize, 0, maxClusterSize, 100, -3.0, 3.0); hPrimaryPt.emplace_back(Form("primary/Pt%d", i), Form("L%d Primary Cluster Size vs Pt", i), maxClusterSize, 0, maxClusterSize, 100, 0.0, 10.0); @@ -238,35 +243,39 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", int nROFRec = (int)rofRecVec.size(); auto pattIt = patternsPtr->cbegin(); + int cInvalid{0}, cGood{0}; for (int irof = 0; irof < nROFRec; irof++) { const auto& rofRec = rofRecVec[irof]; - // rofRec.print(); + /*rofRec.print();*/ for (int icl = 0; icl < rofRec.getNEntries(); icl++) { int clEntry = rofRec.getFirstEntry() + icl; const auto& cluster = clusArr[clEntry]; - // cluster.print(); + /*cluster.print();*/ auto pattId = cluster.getPatternID(); auto id = cluster.getSensorID(); + auto ib = o2::its3::constants::detID::isDetITS3(id); int clusterSize{-1}; - if (pattId == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattId)) { + if (pattId == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattId, ib)) { o2::itsmft::ClusterPattern patt(pattIt); clusterSize = patt.getNPixels(); continue; } else { - clusterSize = dict.getNpixels(pattId); + clusterSize = dict.getNpixels(pattId, ib); } const auto& label = (clusLabArr->getLabels(clEntry))[0]; if (!label.isValid() || label.getSourceID() != 0 || !label.isCorrect()) { + ++cInvalid; continue; } + ++cGood; const int trackID = label.getTrackID(); int evID = label.getEventID(); const auto& pInfo = info[evID][trackID]; - if (id > 6) { + if (!o2::its3::constants::detID::isDetITS3(id)) { hOuterBarrel.Fill(clusterSize); } @@ -332,6 +341,7 @@ void CheckClusterSize(std::string clusFileName = "o2clus_its.root", } } } + std::cout << "Good labels: " << cGood << "; invalid: " << cInvalid << '\n'; std::cout << "Done measuring cluster sizes:" << std::endl; for (int i = 0; i < nLayers; ++i) { std::cout << "* Layer " << i << ":\n"; diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C index af03ed7a9877b..006271a1ea7bd 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckClustersITS3.C @@ -25,7 +25,7 @@ #define ENABLE_UPGRADES #include "DetectorsCommonDataFormats/DetID.h" #include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #include "ITSBase/GeometryTGeo.h" #include "DataFormatsITSMFT/CompCluster.h" @@ -50,22 +50,24 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", using namespace o2::base; using namespace o2::its; - using SuperSegmentation = o2::its3::SegmentationSuperAlpide; + using MosaixSegmentation = o2::its3::SegmentationMosaix; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::itsmft::CompClusterExt; using o2::itsmft::Hit; using ROFRec = o2::itsmft::ROFRecord; using MC2ROF = o2::itsmft::MC2ROFRecord; using HitVec = std::vector; - using MC2HITS_map = std::unordered_map; // maps (track_ID<<16 + chip_ID) to entry in the hit vector + using MC2HITS_map = std::unordered_map; // maps (track_ID<<32 + chip_ID) to entry in the hit vector + std::array mMosaixSegmentations{0, 1, 2}; std::vector hitVecPool; std::vector mc2hitVec; - ULong_t cPattValid{0}, cPattInvalid{0}, cLabelInvalid{0}, cNoMC{0}; + ULong_t cPattValidIB{0}, cPattInvalidIB{0}, cLabelInvalidIB{0}, cNoMCIB{0}; + ULong_t cPattValidOB{0}, cPattInvalidOB{0}, cLabelInvalidOB{0}, cNoMCOB{0}; TFile fout("CheckClusters.root", "recreate"); - TNtuple nt("ntc", "cluster ntuple", "ev:lab:hlx:hlz:hgx:hgz:tx:tz:cgx:cgy:cgz:clx:cly:clz:dx:dy:dz:ex:ez:patid:rof:npx:id"); + TNtuple nt("ntc", "cluster ntuple", "ev:lab:hlx:hlz:hgx:hgz:tx:tz:cgx:cgy:cgz:clx:cly:clz:dx:dy:dz:ex:ez:patid:rof:npx:id:eta:row:col:lay"); // Geometry o2::base::GeometryManager::loadGeometry(inputGeom); @@ -102,6 +104,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", } else { LOG(info) << "Running without dictionary !"; } + dict.print(); // ROFrecords std::vector rofRecVec, *rofRecVecP = &rofRecVec; @@ -174,20 +177,18 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", auto isIB = o2::its3::constants::detID::isDetITS3(chipID); auto layer = o2::its3::constants::detID::getDetID2Layer(chipID); auto clusterSize{-1}; - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID, isIB)) { o2::itsmft::ClusterPattern patt(pattIt); locC = dict.getClusterCoordinates(cluster, patt, false); LOGP(debug, "I am invalid and I am on chip {}", chipID); - ++cPattInvalid; + (isIB) ? ++cPattInvalidIB : ++cPattInvalidOB; continue; } else { locC = dict.getClusterCoordinates(cluster); - errX = dict.getErrX(pattID); - errZ = dict.getErrZ(pattID); - errX *= (isIB) ? SuperSegmentation::mPitchRow : Segmentation::PitchRow; - errZ *= (isIB) ? SuperSegmentation::mPitchCol : Segmentation::PitchCol; - npix = dict.getNpixels(pattID); - ++cPattValid; + errX = dict.getErrX(pattID, isIB); + errZ = dict.getErrZ(pattID, isIB); + npix = dict.getNpixels(pattID, isIB); + (isIB) ? ++cPattValidIB : ++cPattValidOB; } // Transformation to the local --> global @@ -195,7 +196,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", const auto& lab = (clusLabArr->getLabels(clEntry))[0]; if (!lab.isValid()) { - ++cLabelInvalid; + (isIB) ? ++cLabelInvalidIB : ++cLabelInvalidOB; continue; } @@ -207,7 +208,7 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", auto hitEntry = mc2hit.find(key); if (hitEntry == mc2hit.end()) { LOG(debug) << "Failed to find MC hit entry for Tr" << trID << " chipID" << chipID; - ++cNoMC; + (isIB) ? ++cNoMCIB : ++cNoMCOB; continue; } const auto& hit = (*hitArray)[hitEntry->second]; @@ -234,25 +235,22 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", } else { // compare in local flat coordinates float xFlatEnd{0.}, yFlatEnd{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locH.X(), locH.Y(), xFlatEnd, yFlatEnd); + mMosaixSegmentations[layer].curvedToFlat(locH.X(), locH.Y(), xFlatEnd, yFlatEnd); locH.SetXYZ(xFlatEnd, yFlatEnd, locH.Z()); float xFlatSta{0.}, yFlatSta{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locHsta.X(), locHsta.Y(), xFlatSta, yFlatSta); + mMosaixSegmentations[layer].curvedToFlat(locHsta.X(), locHsta.Y(), xFlatSta, yFlatSta); locHsta.SetXYZ(xFlatSta, yFlatSta, locHsta.Z()); - // recalculate x/y in flat - // x0 = xFlatSta, dltx = xFlatEnd - x0; - // y0 = yFlatSta, dlty = yFlatEnd - y0; - // r = (0.5 * (SuperSegmentation::mSensorLayerThickness - SuperSegmentation::mSensorLayerThicknessEff) - y0) / dlty; - // locH.SetXYZ(x0 + r * dltx, y0 + r * dlty, z0 + r * dltz); // not really precise, but okish locH.SetXYZ(0.5f * (locH.X() + locHsta.X()), 0.5f * (locH.Y() + locHsta.Y()), 0.5f * (locH.Z() + locHsta.Z())); - o2::its3::SuperSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlatSta, yFlatSta); + mMosaixSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlatSta, yFlatSta); locC.SetXYZ(xFlatSta, yFlatSta, locC.Z()); } + float theta = std::acos(gloC.Z() / gloC.Rho()); + float eta = -std::log(std::tan(theta / 2)); - std::array data = {(float)lab.getEventID(), (float)trID, + std::array data = {(float)lab.getEventID(), (float)trID, locH.X(), locH.Z(), gloH.X(), gloH.Z(), dltx / dlty, dltz / dlty, @@ -260,13 +258,15 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", locC.X(), locC.Y(), locC.Z(), locC.X() - locH.X(), locC.Y() - locH.Y(), locC.Z() - locH.Z(), errX, errZ, (float)pattID, - (float)rofRec.getROFrame(), (float)npix, (float)chipID}; + (float)rofRec.getROFrame(), (float)npix, (float)chipID, eta, (float)cluster.getRow(), (float)cluster.getCol(), (float)layer}; nt.Fill(data.data()); } } - LOGP(info, "There were {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValid, cPattInvalid, ((float)cPattInvalid / (float)(cPattInvalid + cPattValid)) * 100); - LOGP(info, "There were {} invalid Labels and {} with No MC Hit information ", cLabelInvalid, cNoMC); + LOGP(info, "IB {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValidIB, cPattInvalidIB, ((float)cPattInvalidIB / (float)(cPattInvalidIB + cPattValidIB)) * 100); + LOGP(info, "IB {} invalid Labels and {} with No MC Hit information ", cLabelInvalidIB, cNoMCIB); + LOGP(info, "OB {} valid PatternIDs and {} ({:.1f}%) invalid ones", cPattValidOB, cPattInvalidOB, ((float)cPattInvalidOB / (float)(cPattInvalidOB + cPattValidOB)) * 100); + LOGP(info, "OB {} invalid Labels and {} with No MC Hit information ", cLabelInvalidOB, cNoMCOB); auto canvCgXCgY = new TCanvas("canvCgXCgY", "", 1600, 1600); canvCgXCgY->Divide(2, 2); @@ -292,6 +292,18 @@ void CheckClustersITS3(const std::string& clusfile = "o2clus_its.root", nt.Draw("dx:dz>>h_dx_vs_dz_OB_z(1000, -0.01, 0.01, 1000, -0.01, 0.01)", "id >= 3456 && abs(cgz) < 2", "colz"); canvdXdZ->SaveAs("it3clusters_dx_vs_dz.pdf"); + auto canvCHXZ = new TCanvas("canvCHXZ", "", 1600, 1600); + canvCHXZ->Divide(2, 2); + canvCHXZ->cd(1); + nt.Draw("(cgx-hgx)*10000:eta>>h_chx_IB(101,-1.4,1.4,101,-50,50)", "id<3456", "prof"); + canvCHXZ->cd(2); + nt.Draw("(cgx-hgx)*10000:eta>>h_chx_OB(101,-1.4,1.4,101,-50,50)", "id>=3456", "prof"); + canvCHXZ->cd(3); + nt.Draw("(cgz-hgz)*10000:eta>>h_chz_IB(101,-1.4,1.4,101,-50,50)", "id<3456", "prof"); + canvCHXZ->cd(4); + nt.Draw("(cgz-hgz)*10000:eta>>h_chz_OB(101,-1.4,1.4,101,-50,50)", "id>=3456", "prof"); + canvCgXCgY->SaveAs("it3clusters_xz_eta.pdf"); + auto c1 = new TCanvas("p1", "pullX"); c1->cd(); c1->SetLogy(); diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C index 0c8d9c3bdfbec..67b75e33bc430 100755 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsDensity.C @@ -37,7 +37,7 @@ #include "ITS3Base/SpecsV2.h" #include "CommonConstants/MathConstants.h" #include "DataFormatsITSMFT/Digit.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "DetectorsBase/GeometryManager.h" #include "ITSBase/GeometryTGeo.h" #include "fairlogger/Logger.h" @@ -56,7 +56,7 @@ constexpr double qedRate = qedXSection / hadXSection * interaction_rate; // Hz constexpr double qedFactor = qedRate * integration_time; // a.u. using o2::itsmft::Digit; namespace its3 = o2::its3; -using SSAlpide = its3::SegmentationSuperAlpide; +using Mosaix = its3::SegmentationMosaix; void checkFile(const std::unique_ptr& file); @@ -64,7 +64,7 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig { gROOT->SetBatch(batch); LOGP(debug, "Checking Digit ITS3 Density"); - // Vars + std::array mMosaixSegmentations{0, 1, 2}; // Geometry o2::base::GeometryManager::loadGeometry(geomFileName); @@ -80,8 +80,8 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig digitTree->SetBranchAddress("IT3Digit", &digitArrayPtr); std::array hists; for (int i{3}; i--;) { - double rmin = its3::constants::radii[i] - its3::constants::thickness; - double rmax = its3::constants::radii[i] + its3::constants::thickness; + double rmin = its3::constants::radiiInner[i]; + double rmax = its3::constants::radiiOuter[i]; hists[i] = new TH2F(Form("h_digits_dens_L%d", i), Form("Digit Density L%d in %d Events; Z_{Glo} [cm]; R_{Glo} [cm]", i, nEvents), 100, -15, 15, 100, rmin, rmax); } @@ -103,8 +103,8 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig // goto curved coordinates float x{0.f}, y{0.f}, z{0.f}; float xFlat{0.f}, yFlat{0.f}; - its3::SuperSegmentations[layer].detectorToLocal(row, col, xFlat, z); - its3::SuperSegmentations[layer].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layer].detectorToLocal(row, col, xFlat, z); + mMosaixSegmentations[layer].flatToCurved(xFlat, 0., x, y); const o2::math_utils::Point3D locD(x, y, z); const auto gloD = gman->getMatrixL2G(id)(locD); // convert to global const auto R = std::hypot(gloD.X(), gloD.Y()); @@ -115,7 +115,7 @@ void CheckDigitsDensity(int nEvents = 10000, std::string digitFileName = "it3dig std::unique_ptr oFile(TFile::Open("checkDigitsDensity.root", "RECREATE")); checkFile(oFile); for (const auto& h : hists) { - h->Scale(1. / (SSAlpide::mPitchCol * SSAlpide::mPitchRow * nEvents)); + h->Scale(1. / (Mosaix::PitchCol * Mosaix::PitchRow * nEvents)); h->ProjectionX()->Write(); h->Write(); } diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C index 16aa3adc8101c..1dc4a4e2d6b47 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C @@ -27,7 +27,7 @@ #define ENABLE_UPGRADES #include "ITSBase/GeometryTGeo.h" #include "DataFormatsITSMFT/Digit.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSMFTBase/SegmentationAlpide.h" #include "ITSMFTSimulation/Hit.h" #include "MathUtils/Utils.h" @@ -51,6 +51,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil using o2::itsmft::Hit; using o2::itsmft::SegmentationAlpide; + std::array mMosaixSegmentations{0, 1, 2}; TFile* f = TFile::Open("CheckDigits.root", "recreate"); TNtuple* nt = new TNtuple("ntd", "digit ntuple", "id:x:y:z:rowD:colD:rowH:colH:xlH:zlH:xlcH:zlcH:dx:dz"); @@ -165,8 +166,8 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil if (isIB) { // ITS3 IB float xFlat{0.f}, yFlat{0.f}; - its3::SuperSegmentations[layer].detectorToLocal(ix, iz, xFlat, z); - its3::SuperSegmentations[layer].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layer].detectorToLocal(ix, iz, xFlat, z); + mMosaixSegmentations[layer].flatToCurved(xFlat, 0., x, y); } else { // ITS2 OB SegmentationAlpide::detectorToLocal(ix, iz, x, z); @@ -184,7 +185,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil const auto* mc2hit = &mc2hitVec[lab.getEventID()]; const auto& hitEntry = mc2hit->find(key); if (hitEntry == mc2hit->end()) { - LOGP(error, "Failed to find MC hit entry for Tr {} chipID {}", trID, chipID); + LOGP(debug, "Failed to find MC hit entry for Tr {} chipID {}", trID, chipID); continue; } @@ -196,18 +197,18 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil auto xyzLocE = gman->getMatrixL2G(chipID) ^ (hit.GetPos()); // inverse conversion from global to local auto xyzLocS = gman->getMatrixL2G(chipID) ^ (hit.GetPosStart()); o2::math_utils::Vector3D xyzLocM; - xyzLocM.SetCoordinates(0.5 * (xyzLocE.X() + xyzLocS.X()), 0.5 * (xyzLocE.Y() + xyzLocS.Y()), 0.5 * (xyzLocE.Z() + xyzLocS.Z())); + xyzLocM.SetCoordinates(0.5f * (xyzLocE.X() + xyzLocS.X()), 0.5f * (xyzLocE.Y() + xyzLocS.Y()), 0.5f * (xyzLocE.Z() + xyzLocS.Z())); float xlc = 0., zlc = 0.; int row = 0, col = 0; if (isIB) { float xFlat{0.}, yFlat{0.}; - its3::SuperSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); xyzLocM.SetCoordinates(xFlat, yFlat, xyzLocM.Z()); - its3::SuperSegmentations[layer].curvedToFlat(locD.X(), locD.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locD.X(), locD.Y(), xFlat, yFlat); locD.SetCoordinates(xFlat, yFlat, locD.Z()); - if (auto v1 = !its3::SuperSegmentations[layer].localToDetector(xyzLocM.X(), xyzLocM.Z(), row, col), - v2 = !its3::SuperSegmentations[layer].detectorToLocal(row, col, xlc, zlc); + if (auto v1 = !mMosaixSegmentations[layer].localToDetector(xyzLocM.X(), xyzLocM.Z(), row, col), + v2 = !mMosaixSegmentations[layer].detectorToLocal(row, col, xlc, zlc); v1 || v2) { continue; } @@ -223,7 +224,7 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil (isIB) ? ++nDigitFilledIB : ++nDigitFilledOB; } // end loop on digits array - } // end loop on ROFRecords array + } // end loop on ROFRecords array auto canvXY = new TCanvas("canvXY", "", 1600, 1600); canvXY->Divide(2, 2); diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckHits.C b/Detectors/Upgrades/ITS3/macros/test/CheckHits.C index 7833b7c205f4a..00ac0a992ba39 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckHits.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckHits.C @@ -31,7 +31,6 @@ #define ENABLE_UPGRADES #include "CommonConstants/MathConstants.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/SpecsV2.h" #include "ITSMFTSimulation/Hit.h" #include "SimulationDataFormat/MCTrack.h" @@ -39,7 +38,6 @@ namespace it3c = o2::its3::constants; namespace it3d = it3c::detID; -using SSAlpide = o2::its3::SegmentationSuperAlpide; using o2::itsmft::Hit; constexpr double interaction_rate = 50e3; // Hz diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C similarity index 78% rename from Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C rename to Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C index 76ac02959415d..12e1ab3a7280d 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegment.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegment.C @@ -9,9 +9,6 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file CheckTracksITS3.C -/// \brief Simple macro to check ITS3 tracks - #if !defined(__CLING__) || defined(__ROOTCLING__) #include "Rtypes.h" @@ -24,14 +21,6 @@ #include "TGLViewer.h" #include "TMath.h" -#include "TEveGeoNode.h" -#include "TEveManager.h" -#include "TEveViewer.h" -#include "TEvePointSet.h" -#include "TEveTrackPropagator.h" -#include "TEveTrack.h" -#include "TEveVSDStructs.h" - #include "TFile.h" #include "TGraph.h" #include "TH1D.h" @@ -49,40 +38,41 @@ #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSBase/GeometryTGeo.h" #endif using gITS = o2::its::GeometryTGeo; -void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, - bool isTestFlatToCurved = false, - bool isTestLocalToGlobal = false) +void CheckMosaixSegment(bool isTestDetectorToLocal = false, + bool isTestFlatToCurved = false, + bool isTestLocalToGlobal = false) { using namespace o2::its3; - static constexpr unsigned int mNCols{SegmentationSuperAlpide::mNCols}; - static constexpr unsigned int mNRows{SegmentationSuperAlpide::mNRows}; + static constexpr unsigned int mNCols{SegmentationMosaix::NCols}; + static constexpr unsigned int mNRows{SegmentationMosaix::NRows}; static constexpr unsigned int nPixels{mNCols * mNRows}; + std::array mMosaixSegmentations{0, 1, 2}; if (isTestDetectorToLocal || isTestFlatToCurved) { namespace cp = constants::pixelarray; - TH2I* h_raw_col = new TH2I("h_raw_col", "raws and cols sown;raw;col", mNRows, 0, mNRows, mNCols, 0, mNCols); - TH2D* h_xLocal_zLocal = new TH2D("h_xLocal_zLocal", "x and z from raws and cols;xLocal;zLocal", mNRows, -cp::length / 2, cp::length / 2, mNCols, -cp::width / 2, cp::width / 2); - TH2I* h_raw_col_translate = new TH2I("h_raw_col_translate", "raws and cols from x and z;raw;col", mNRows, 0, mNRows, mNCols, 0, mNCols); - TGraph* g_raw_xLocal = new TGraph(); - g_raw_xLocal->SetMarkerStyle(20); - g_raw_xLocal->SetMarkerSize(0.2); + TH2I* h_row_col = new TH2I("h_row_col", "rows and cols sown;row;col", mNRows, 0, mNRows, mNCols, 0, mNCols); + TH2D* h_xLocal_zLocal = new TH2D("h_xLocal_zLocal", "x and z from rows and cols;xLocal;zLocal", mNRows, -cp::length / 2, cp::length / 2, mNCols, -cp::width / 2, cp::width / 2); + TH2I* h_row_col_translate = new TH2I("h_row_col_translate", "rows and cols from x and z;row;col", mNRows, 0, mNRows, mNCols, 0, mNCols); + TGraph* g_row_xLocal = new TGraph(); + g_row_xLocal->SetMarkerStyle(20); + g_row_xLocal->SetMarkerSize(0.2); TGraph* g_col_zLocal = new TGraph(); g_col_zLocal->SetMarkerStyle(20); g_col_zLocal->SetMarkerSize(0.2); - TGraph* g_raw_xLocal_translate = new TGraph(); - g_raw_xLocal_translate->SetMarkerStyle(20); - g_raw_xLocal_translate->SetMarkerSize(0.2); + TGraph* g_row_xLocal_translate = new TGraph(); + g_row_xLocal_translate->SetMarkerStyle(20); + g_row_xLocal_translate->SetMarkerSize(0.2); TGraph* g_col_zLocal_translate = new TGraph(); g_col_zLocal_translate->SetMarkerStyle(20); - SegmentationSuperAlpide seg(0); + SegmentationMosaix seg(0); int nPoint = 0; for (UInt_t i = 0; i < mNRows; ++i) { for (UInt_t j = 0; j < mNCols; ++j) { @@ -92,16 +82,16 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, int col_trans = -1; seg.detectorToLocal(i, j, xLocal, zLocal); seg.localToDetector(xLocal, zLocal, row_trans, col_trans); - g_raw_xLocal->SetPoint(nPoint, i, xLocal); + g_row_xLocal->SetPoint(nPoint, i, xLocal); g_col_zLocal->SetPoint(nPoint, j, zLocal); - g_raw_xLocal_translate->SetPoint(nPoint, xLocal, row_trans); + g_row_xLocal_translate->SetPoint(nPoint, xLocal, row_trans); g_col_zLocal_translate->SetPoint(nPoint++, zLocal, col_trans); bool pattern = ((i >= 50 && i <= 100) || (i >= 250 && i <= 350)) && ((j >= 30 && j <= 70) || (j >= 100 && j <= 120)); if (pattern) { - h_raw_col->Fill(i, j); + h_row_col->Fill(i, j); h_xLocal_zLocal->Fill(xLocal, zLocal); - h_raw_col_translate->Fill(row_trans, col_trans); + h_row_col_translate->Fill(row_trans, col_trans); } } } @@ -110,29 +100,30 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, // gStyle->SetPalette(kCMYK); c1->Divide(3, 1); c1->cd(1); - h_raw_col->Draw("colz"); + h_row_col->Draw("colz"); c1->cd(2); h_xLocal_zLocal->Draw("colz"); c1->cd(3); - h_raw_col_translate->Draw("colz"); + h_row_col_translate->Draw("colz"); TCanvas* c2 = new TCanvas("c2", "c2", 1600, 400); c2->Divide(4, 1); c2->cd(1); - g_raw_xLocal->SetTitle("xLocal vs raw;raw;xLocal"); - g_raw_xLocal->Draw("same ap"); + g_row_xLocal->SetTitle("xLocal vs row;row;xLocal"); + g_row_xLocal->Draw("same ap"); c2->cd(2); g_col_zLocal->SetTitle("zLocal vs col;col;zLocal"); g_col_zLocal->Draw("same ap"); c2->cd(3); - g_raw_xLocal_translate->SetTitle("raw_translate vs xLocal;xLocal;raw_translate"); - g_raw_xLocal_translate->Draw("same ap"); + g_row_xLocal_translate->SetTitle("row_translate vs xLocal;xLocal;row_translate"); + g_row_xLocal_translate->Draw("same ap"); c2->cd(4); g_col_zLocal_translate->SetTitle("col_translate vs zLocal;zLocal;col_translate"); g_col_zLocal_translate->Draw("same ap"); } if (isTestLocalToGlobal) { + o2::base::GeometryManager::loadGeometry(); namespace cp = constants::pixelarray; TH2D* h_xCurved_yCurved = new TH2D("h_xCurved_yCurved", "from flat to curved;x;y", 200, -1, 4, 200, -2, 3); TH2D* h_xFlat_yFlat = new TH2D("h_xFlat_yFlat", "from curved to flat ;x;y", 200, -1, 4, 200, -2, 3); @@ -170,11 +161,11 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, float xLocal_translate = 0; float yLocal_translate = 0; - SuperSegmentations[iLayer].detectorToLocal(row, col, xLocal, zLocal); - SuperSegmentations[iLayer].flatToCurved(xLocal, 0., xCurved, yCurved); + mMosaixSegmentations[iLayer].detectorToLocal(row, col, xLocal, zLocal); + mMosaixSegmentations[iLayer].flatToCurved(xLocal, 0., xCurved, yCurved); double posLocal[3] = {xCurved, yCurved, zLocal}; double posGlobal[3] = {0, 0, 0}; - SuperSegmentations[iLayer].curvedToFlat(xCurved, yCurved, xLocal_translate, yLocal_translate); + mMosaixSegmentations[iLayer].curvedToFlat(xCurved, yCurved, xLocal_translate, yLocal_translate); matrix->LocalToMaster(posLocal, posGlobal); h_xCurved_yCurved->Fill(xLocal, 0); @@ -195,8 +186,7 @@ void CheckSuperAlpideSegment(bool isTestDetectorToLocal = false, TArc* arc[3]; h_xCurved_yCurved->Draw("colz"); for (int i = 0; i < 3; i++) { - arc[i] = new TArc(-0, 0, constants::radii[i] + constants::thickness / 2., -5, 40); - arc[i]->SetLineColor(kRed); + arc[i] = new TArc(-0, 0, constants::radiiOuter[i], -5, 40); arc[i]->SetFillStyle(0); } diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C similarity index 85% rename from Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C rename to Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C index 64937f2ad2855..1a723bd6017bb 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckSuperAlpideSegmentTrans.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckMosaixSegmentTrans.C @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file CheckSuperAlpideSegmentTrans.C +/// \file CheckMosaixSegmentTrans.C /// \brief Simple macro to check ITS3 Alpide Trans #if !defined(__CLING__) || defined(__ROOTCLING__) @@ -26,7 +26,7 @@ #include "TStyle.h" #include "TTree.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #endif @@ -37,10 +37,11 @@ constexpr float PI = 3.14159274101257324e+00f; constexpr float Rad2Deg = 180.f / PI; constexpr float Deg2Rad = 1. / Rad2Deg; -constexpr auto nRows{SegmentationSuperAlpide::mNRows}; -constexpr auto nCols{SegmentationSuperAlpide::mNCols}; -constexpr auto fLength{SegmentationSuperAlpide::mLength}; -constexpr auto fWidth{SegmentationSuperAlpide::mWidth}; +constexpr auto nRows{SegmentationMosaix::NRows}; +constexpr auto nCols{SegmentationMosaix::NCols}; +constexpr auto fLength{SegmentationMosaix::Length}; +constexpr auto fWidth{SegmentationMosaix::Width}; +const std::array mMosaixSegmentations{0, 1, 2}; TH2* DrawReverseBins(TH2* h) { @@ -83,13 +84,13 @@ void DrawXAxisCol(TH1* h) newaxis->Draw(); } -void CheckSuperAlpideSegmentTrans() +void CheckMosaixSegmentTrans() { gStyle->SetOptStat(1111111); for (int iLayer{0}; iLayer < 3; ++iLayer) { - float r_inner = constants::radii[iLayer] - constants::thickness / 2.; - float r_outer = constants::radii[iLayer] + constants::thickness / 2.; + float r_inner = constants::radiiInner[iLayer]; + float r_outer = constants::radiiOuter[iLayer]; float phiReadout_inner = constants::tile::readout::width / r_inner * Rad2Deg; float phiReadout_outer = @@ -140,10 +141,10 @@ void CheckSuperAlpideSegmentTrans() g_arc_inner->AddPoint(x_inner, y_inner); g_arc_outer->AddPoint(x_outer, y_outer); // Test Segmentation - SuperSegmentations[iLayer].curvedToFlat(x_inner, y_inner, x_inner_flat, y_inner_flat); - SuperSegmentations[iLayer].flatToCurved(x_inner_flat, y_inner_flat, x_inner_curved, y_inner_curved); - SuperSegmentations[iLayer].curvedToFlat(x_outer, y_outer, x_outer_flat, y_outer_flat); - SuperSegmentations[iLayer].flatToCurved(x_outer_flat, y_outer_flat, x_outer_curved, y_outer_curved); + mMosaixSegmentations[iLayer].curvedToFlat(x_inner, y_inner, x_inner_flat, y_inner_flat); + mMosaixSegmentations[iLayer].flatToCurved(x_inner_flat, y_inner_flat, x_inner_curved, y_inner_curved); + mMosaixSegmentations[iLayer].curvedToFlat(x_outer, y_outer, x_outer_flat, y_outer_flat); + mMosaixSegmentations[iLayer].flatToCurved(x_outer_flat, y_outer_flat, x_outer_curved, y_outer_curved); g_arc_inner_flat->AddPoint(x_inner_flat, y_inner_flat); g_arc_outer_flat->AddPoint(x_outer_flat, y_outer_flat); h_f2c_res->Fill(x_inner - x_inner_curved, y_inner - y_inner_curved); @@ -201,15 +202,12 @@ void CheckSuperAlpideSegmentTrans() for (int iCol{0}; iCol < nCols; ++iCol) { float xRow{0}, zCol{0}; int iiRow{0}, iiCol{0}; - auto v1 = - SuperSegmentations[iLayer].detectorToLocal(iRow, iCol, xRow, zCol); - auto v2 = SuperSegmentations[iLayer].localToDetector(xRow, zCol, iiRow, - iiCol); - // Info("L2D", - // "iRow=%d, iCol=%d --d2l(%s)--> xRow=%f, zCol=%f --l2d(%s)--> " - // "iiRow=%d, iiCol=%d", - // iRow, iCol, v1 ? "good" : "bad", xRow, zCol, v2 ? "good" : - // "bad", iiRow, iiCol); + auto v1 = mMosaixSegmentations[iLayer].detectorToLocal(iRow, iCol, xRow, zCol); + auto v2 = mMosaixSegmentations[iLayer].localToDetector(xRow, zCol, iiRow, iiCol); + Info("L2D", + "iRow=%d, iCol=%d --d2l(%s)--> xRow=%f, zCol=%f --l2d(%s)--> " + "iiRow=%d, iiCol=%d", + iRow, iCol, v1 ? "good" : "bad", xRow, zCol, v2 ? "good" : "bad", iiRow, iiCol); if (!v1 || !v2) { Error("LOOP", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Layer %d", iLayer); return; diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C b/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C index 3a01960b1859d..220b1d39ad42b 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckTileNumbering.C @@ -25,7 +25,7 @@ #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SpecsV2.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "MathUtils/Cartesian.h" #include "MathUtils/Utils.h" #include "DataFormatsITSMFT/NoiseMap.h" @@ -102,6 +102,8 @@ void CheckTileNumbering(const std::string& inputGeom = "", const std::string& de Int_t colors[NRGBs] = {kWhite, kRed, kGray}; TColor::SetPalette(NRGBs, colors, 1.0); + std::array mMosaixSegmentations{0, 1, 2}; + const float phiOffsetL0 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[0]); const float phiOffsetL1 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[1]); const float phiOffsetL2 = std::asin(o2::its3::constants::equatorialGap / 2.f / o2::its3::constants::radii[2]); @@ -142,7 +144,7 @@ void CheckTileNumbering(const std::string& inputGeom = "", const std::string& de for (unsigned int iDet{0}; iDet <= o2::its3::constants::detID::l2IDEnd; ++iDet) { int sensorID = o2::its3::constants::detID::getSensorID(iDet); int layerID = o2::its3::constants::detID::getDetID2Layer(iDet); - o2::its3::SuperSegmentations[layerID].flatToCurved(xFlat, 0., x, y); + mMosaixSegmentations[layerID].flatToCurved(xFlat, 0., x, y); o2::math_utils::Point3D locC{x, y, z}; auto gloC = gman->getMatrixL2G(iDet)(locC); float phi = o2::math_utils::to02Pi(std::atan2(gloC.Y(), gloC.X())); diff --git a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C index f151de72c8ac1..c124481cc6f76 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C +++ b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigits.C @@ -31,7 +31,7 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "DetectorsCommonDataFormats/DetID.h" #include "DetectorsCommonDataFormats/DetectorNameConf.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITSBase/GeometryTGeo.h" @@ -86,7 +86,6 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", using namespace o2::base; using o2::itsmft::Hit; - using SuperSegmentation = o2::its3::SegmentationSuperAlpide; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::itsmft::CompClusterExt; using ROFRec = o2::itsmft::ROFRecord; @@ -97,6 +96,8 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", std::vector hitVecPool; std::vector mc2hitVec; + std::array mMosaixSegmentations{0, 1, 2}; + // Geometry o2::base::GeometryManager::loadGeometry(inputGeom); auto gman = o2::its::GeometryTGeo::Instance(); @@ -124,9 +125,9 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", TFile fileC(clusfile.data()); auto* clusTree = dynamic_cast(fileC.Get("o2sim")); std::vector* clusArr = nullptr; - clusTree->SetBranchAddress("IT3ClusterComp", &clusArr); + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); std::vector* patternsPtr = nullptr; - auto pattBranch = clusTree->GetBranch("IT3ClusterPatt"); + auto pattBranch = clusTree->GetBranch("ITSClusterPatt"); if (pattBranch != nullptr) { pattBranch->SetAddress(&patternsPtr); } @@ -144,14 +145,14 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", // ROFrecords std::vector rofRecVec, *rofRecVecP = &rofRecVec; - clusTree->SetBranchAddress("IT3ClustersROF", &rofRecVecP); + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); // Cluster MC labels o2::dataformats::MCTruthContainer* clusLabArr = nullptr; std::vector mc2rofVec, *mc2rofVecP = &mc2rofVec; - if ((hitTree != nullptr) && (clusTree->GetBranch("IT3ClusterMCTruth") != nullptr)) { - clusTree->SetBranchAddress("IT3ClusterMCTruth", &clusLabArr); - clusTree->SetBranchAddress("IT3ClustersMC2ROF", &mc2rofVecP); + if ((hitTree != nullptr) && (clusTree->GetBranch("ITSClusterMCTruth") != nullptr)) { + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); } clusTree->GetEntry(0); @@ -188,7 +189,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", std::vector data(nChips); for (int iChip{0}; iChip < nChips; ++iChip) { auto& dat = data[iChip]; - int col{o2::its3::SegmentationSuperAlpide::mNCols}, row{o2::its3::SegmentationSuperAlpide::mNRows}; + int col{o2::its3::SegmentationMosaix::NCols}, row{o2::its3::SegmentationMosaix::NRows}; if (!o2::its3::constants::detID::isDetITS3(iChip)) { col = o2::itsmft::SegmentationAlpide::NCols; row = o2::itsmft::SegmentationAlpide::NRows; @@ -259,7 +260,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", const auto pattID = cluster.getPatternID(); const auto isIB = o2::its3::constants::detID::isDetITS3(chipID); const auto layer = gman->getLayer(chipID); - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || dict.isGroup(pattID, isIB)) { continue; } const auto& lab = (clusLabArr->getLabels(clEntry))[0]; @@ -282,9 +283,9 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", o2::math_utils::Point3D locHMiddle; if (isIB) { float xFlat{0.}, yFlat{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); locHEnd.SetXYZ(xFlat, yFlat, locHEnd.Z()); - o2::its3::SuperSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); locHStart.SetXYZ(xFlat, yFlat, locHStart.Z()); } locHMiddle.SetXYZ(0.5f * (locHEnd.X() + locHStart.X()), 0.5f * (locHEnd.Y() + locHStart.Y()), 0.5f * (locHEnd.Z() + locHStart.Z())); @@ -292,10 +293,10 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", int rowHS, colHS, rowHM, colHM, rowHE, colHE, colC, rowC; bool v1, v2, v3, v4; if (isIB) { - v1 = o2::its3::SuperSegmentations[layer].localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); - v2 = o2::its3::SuperSegmentations[layer].localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); - v3 = o2::its3::SuperSegmentations[layer].localToDetector(locHEnd.X(), locHEnd.Z(), rowHE, colHE); - v4 = o2::its3::SuperSegmentations[layer].localToDetector(locC.X(), locC.Z(), rowC, colC); + v1 = mMosaixSegmentations[layer].localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); + v2 = mMosaixSegmentations[layer].localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); + v3 = mMosaixSegmentations[layer].localToDetector(locHEnd.X(), locHEnd.Z(), rowHE, colHE); + v4 = mMosaixSegmentations[layer].localToDetector(locC.X(), locC.Z(), rowC, colC); } else { v1 = o2::itsmft::SegmentationAlpide::localToDetector(locHStart.X(), locHStart.Z(), rowHS, colHS); v2 = o2::itsmft::SegmentationAlpide::localToDetector(locHMiddle.X(), locHMiddle.Z(), rowHM, colHM); @@ -315,7 +316,7 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", data[chipID].cog->AddPoint(colC, rowC); constexpr float delta = 1e-2; - const auto& patt = dict.getPattern(cluster.getPatternID()); + const auto& patt = dict.getPattern(cluster.getPatternID(), isIB); auto box = new TBox( cluster.getCol() - delta - 0.5, cluster.getRow() - delta - 0.5, @@ -338,8 +339,8 @@ void CompareClustersAndDigits(std::string clusfile = "o2clus_it3.root", } auto& dat = data[iChip]; gFile->cd(); - /* auto path = gman->getMatrixPath(iChip); */ - TString path; // TODO wrong use above + auto path = gman->getMatrixPath(iChip); + /*TString path; // TODO wrong use above*/ const std::string cpath{path.Data() + 39, path.Data() + path.Length()}; const std::filesystem::path p{cpath}; if (oFile->mkdir(p.parent_path().c_str(), "", true) == nullptr) { diff --git a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C index d8783ba7c8fb9..cc241afb3357a 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C @@ -34,7 +34,7 @@ #include "DetectorsCommonDataFormats/DetID.h" #include "ITSBase/GeometryTGeo.h" #include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "DataFormatsITSMFT/CompCluster.h" #include "DataFormatsITSMFT/ClusterTopology.h" #include "ITS3Reconstruction/TopologyDictionary.h" @@ -60,14 +60,13 @@ void CreateDictionariesITS3(bool saveDeltas = false, std::string collContextfile = "collisioncontext.root", std::string inputGeom = "", float checkOutliers = 2., // reject outliers (MC dX or dZ exceeds row/col span by a factor above the threshold) - float minPtMC = 0.01) // account only MC hits with pT above threshold + float minPtMC = 0.1) // account only MC hits with pT above threshold { const int QEDSourceID = 99; // Clusters from this MC source correspond to QED electrons using namespace o2::base; using namespace o2::its; - using o2::its3::SegmentationSuperAlpide; using Segmentation = o2::itsmft::SegmentationAlpide; using o2::its3::BuildTopologyDictionary; using o2::itsmft::ClusterTopology; @@ -82,12 +81,14 @@ void CreateDictionariesITS3(bool saveDeltas = false, std::vector hitVecPool; std::vector mc2hitVec; o2::its3::TopologyDictionary clusDictOld; + std::array mMosaixSegmentations{0, 1, 2}; if (!clusDictFile.empty()) { clusDictOld.readFromFile(clusDictFile); - LOGP(info, "Loaded external cluster dictionary with {} entries from {}", clusDictOld.getSize(), clusDictFile); + LOGP(info, "Loaded external cluster dictionary with {} IB/{} OBentries from {}", clusDictOld.getSize(true), clusDictOld.getSize(false), clusDictFile); } - ULong_t cOk{0}, cOutliers{0}, cFailedMC{0}; + ULong_t cOkIB{0}, cOutliersIB{0}, cFailedMCIB{0}; + ULong_t cOkOB{0}, cOutliersOB{0}, cFailedMCOB{0}; TFile* fout = nullptr; TNtuple* nt = nullptr; @@ -233,17 +234,18 @@ void CreateDictionariesITS3(bool saveDeltas = false, const auto& cluster = (*clusArr)[clEntry]; o2::itsmft::ClusterPattern pattern; + bool ib = o2::its3::constants::detID::isDetITS3(cluster.getChipID()); if (cluster.getPatternID() != CompCluster::InvalidPatternID) { - if (clusDictOld.getSize() == 0) { + if (clusDictOld.getSize(ib) == 0) { LOG(error) << "Encountered patternID = " << cluster.getPatternID() << " != " << CompCluster::InvalidPatternID; LOG(error) << "Clusters have already been generated with a dictionary which was not provided"; return; } - if (clusDictOld.isGroup(cluster.getPatternID())) { + if (clusDictOld.isGroup(cluster.getPatternID(), ib)) { pattern.acquirePattern(pattIdx); } else { - pattern = clusDictOld.getPattern(cluster.getPatternID()); + pattern = clusDictOld.getPattern(cluster.getPatternID(), ib); } } else { pattern.acquirePattern(pattIdx); @@ -270,44 +272,43 @@ void CreateDictionariesITS3(bool saveDeltas = false, o2::math_utils::Vector3D xyzLocM; xyzLocM.SetCoordinates(0.5f * (xyzLocE.X() + xyzLocS.X()), 0.5f * (xyzLocE.Y() + xyzLocS.Y()), 0.5f * (xyzLocE.Z() + xyzLocS.Z())); auto locC = o2::its3::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); - bool isIB = o2::its3::constants::detID::isDetITS3(chipID); int layer = gman->getLayer(chipID); - if (isIB) { + if (ib) { float xFlat{0.}, yFlat{0.}; - o2::its3::SuperSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(xyzLocM.X(), xyzLocM.Y(), xFlat, yFlat); xyzLocM.SetCoordinates(xFlat, yFlat, xyzLocM.Z()); - o2::its3::SuperSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlat, yFlat); + mMosaixSegmentations[layer].curvedToFlat(locC.X(), locC.Y(), xFlat, yFlat); locC.SetCoordinates(xFlat, yFlat, locC.Z()); } dX = xyzLocM.X() - locC.X(); dZ = xyzLocM.Z() - locC.Z(); - dX /= (isIB) ? o2::its3::SegmentationSuperAlpide::mPitchRow : o2::itsmft::SegmentationAlpide::PitchRow; - dZ /= (isIB) ? o2::its3::SegmentationSuperAlpide::mPitchCol : o2::itsmft::SegmentationAlpide::PitchCol; + dX /= (ib) ? o2::its3::SegmentationMosaix::PitchRow : o2::itsmft::SegmentationAlpide::PitchRow; + dZ /= (ib) ? o2::its3::SegmentationMosaix::PitchCol : o2::itsmft::SegmentationAlpide::PitchCol; if (saveDeltas) { nt->Fill(topology.getHash(), dX, dZ); } if (checkOutliers > 0.) { if (bool bX = std::abs(dX) > topology.getRowSpan() * checkOutliers, bZ = std::abs(dZ) > topology.getColumnSpan() * checkOutliers; bX || bZ) { // ignore outlier - ++cOutliers; + (ib) ? ++cOutliersIB : ++cOutliersOB; LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, bX); LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, bZ); dX = dZ = BuildTopologyDictionary::IgnoreVal; } else { - ++cOk; + (ib) ? ++cOkIB : ++cOkOB; } } } } else { /* LOGP(info, " Failed to find MC hit entry for Tr: {} chipID: {}", trID, chipID); */ /* lab.print(); */ - ++cFailedMC; + (ib) ? ++cFailedMCIB : ++cFailedMCOB; } - signalDictionary.accountTopology(topology, dX, dZ); + signalDictionary.accountTopology(topology, ib, dX, dZ); } else { - noiseDictionary.accountTopology(topology, dX, dZ); + noiseDictionary.accountTopology(topology, ib, dX, dZ); } } - completeDictionary.accountTopology(topology, dX, dZ); + completeDictionary.accountTopology(topology, ib, dX, dZ); } // clean MC cache for events which are not needed anymore @@ -323,12 +324,14 @@ void CreateDictionariesITS3(bool saveDeltas = false, } } - LOGP(info, "Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOk, cFailedMC, cOutliers); + LOGP(info, "IB Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOkIB, cFailedMCIB, cOutliersIB); + LOGP(info, "OB Clusters: {} okay (failed MCHit2Clus {}); outliers {}", cOkOB, cFailedMCOB, cOutliersOB); auto dID = o2::detectors::DetID::IT3; LOGP(info, "Complete Dictionary:"); - completeDictionary.setThreshold(probThreshold); + completeDictionary.setThreshold(probThreshold, true); + completeDictionary.setThreshold(probThreshold, false); completeDictionary.groupRareTopologies(); completeDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "")); completeDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "", "txt")); @@ -336,24 +339,34 @@ void CreateDictionariesITS3(bool saveDeltas = false, TFile histogramOutput("histograms.root", "recreate"); TCanvas* cComplete = new TCanvas("cComplete", "Distribution of all the topologies"); - cComplete->cd(); - cComplete->SetLogy(); - TH1F* hComplete = completeDictionary.getDictionary().getTopologyDistribution("hComplete"); - hComplete->SetDirectory(nullptr); - hComplete->Draw("hist"); - hComplete->Write(); + cComplete->Divide(2, 1); + cComplete->cd(1); + TH1F* hCompleteIB = completeDictionary.getDictionary().getTopologyDistribution("hCompleteInnerBarrel", true); + hCompleteIB->SetDirectory(nullptr); + hCompleteIB->Draw("hist"); + gPad->SetLogy(); + cComplete->cd(2); + TH1F* hCompleteOB = completeDictionary.getDictionary().getTopologyDistribution("hCompleteOuterBarrel", false); + hCompleteOB->SetDirectory(nullptr); + hCompleteOB->Draw("hist"); + gPad->SetLogy(); + histogramOutput.cd(); + hCompleteIB->Write(); + hCompleteOB->Write(); cComplete->Write(); if (clusLabArr) { LOGP(info, "Noise Dictionary:"); - noiseDictionary.setThreshold(0.0001); + noiseDictionary.setThreshold(0.0001, true); + noiseDictionary.setThreshold(0.0001, false); noiseDictionary.groupRareTopologies(); noiseDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo")); noiseDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo", "txt")); noiseDictionary.saveDictionaryRoot(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "noiseClusTopo", "root")); LOGP(info, "Signal Dictionary:"); - signalDictionary.setThreshold(0.0001); + signalDictionary.setThreshold(0.0001, true); + signalDictionary.setThreshold(0.0001, false); signalDictionary.groupRareTopologies(); signalDictionary.printDictionaryBinary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "signal")); signalDictionary.printDictionary(o2::base::DetectorNameConf::getAlpideClusterDictionaryFileName(dID, "signal", "txt")); @@ -361,26 +374,42 @@ void CreateDictionariesITS3(bool saveDeltas = false, LOGP(info, "Plotting Channels"); auto cNoise = new TCanvas("cNoise", "Distribution of noise topologies"); - cNoise->cd(); - cNoise->SetLogy(); - auto hNoise = noiseDictionary.getDictionary().getTopologyDistribution("hNoise"); - hNoise->SetDirectory(nullptr); - hNoise->Draw("hist"); + cNoise->Divide(2, 1); + cNoise->cd(1); + auto hNoiseIB = noiseDictionary.getDictionary().getTopologyDistribution("hNoiseInnerBarrel", true); + hNoiseIB->SetDirectory(nullptr); + hNoiseIB->Draw("hist"); + gPad->SetLogy(); + cNoise->cd(2); + auto hNoiseOB = noiseDictionary.getDictionary().getTopologyDistribution("hNoiseOuterBarrel", false); + hNoiseOB->SetDirectory(nullptr); + hNoiseOB->Draw("hist"); + gPad->SetLogy(); histogramOutput.cd(); - hNoise->Write(); + hNoiseIB->Write(); + hNoiseOB->Write(); cNoise->Write(); + auto cSignal = new TCanvas("cSignal", "cSignal"); - cSignal->cd(); + cSignal->Divide(2, 1); + cSignal->cd(1); + auto hSignalIB = signalDictionary.getDictionary().getTopologyDistribution("hSignalInnerBarrel", true); + hSignalIB->SetDirectory(nullptr); + hSignalIB->Draw("hist"); + gPad->SetLogy(); + cSignal->cd(2); cSignal->SetLogy(); - auto hSignal = signalDictionary.getDictionary().getTopologyDistribution("hSignal"); - hSignal->SetDirectory(nullptr); - hSignal->Draw("hist"); + auto hSignalOB = signalDictionary.getDictionary().getTopologyDistribution("hSignalOuterBarrel", false); + hSignalOB->SetDirectory(nullptr); + hSignalOB->Draw("hist"); + gPad->SetLogy(); histogramOutput.cd(); - hSignal->Write(); + hSignalIB->Write(); + hSignalOB->Write(); cSignal->Write(); - sw.Stop(); - sw.Print(); } + sw.Stop(); + sw.Print(); if (saveDeltas) { fout->cd(); nt->Write(); diff --git a/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C b/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C index 1a0ec73e34f31..4b54bbced2929 100644 --- a/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C +++ b/Detectors/Upgrades/ITS3/macros/test/TestSensorGeometry.C @@ -21,7 +21,7 @@ #include "TList.h" #endif -void TestSensorGeometry(bool checkFull = false) +void TestSensorGeometry(bool draw = false, bool checkFull = false) { gGeoManager = new TGeoManager("simple", "Simple geometry"); TGeoMaterial* matVacuum = new TGeoMaterial("Vacuum", 0, 0, 0); @@ -30,8 +30,7 @@ void TestSensorGeometry(bool checkFull = false) auto top = gGeoManager->MakeBox("TOP", Vacuum, 270., 270., 120.); gGeoManager->SetTopVolume(top); - o2::its3::ITS3Layer layer0{0, top, nullptr, - o2::its3::ITS3Layer::BuildLevel::kLayer, true}; + o2::its3::ITS3Layer layer0{2, top, nullptr, o2::its3::ITS3Layer::BuildLevel::kLayer, true}; // Print available medias TIter next{gGeoManager->GetListOfMedia()}; @@ -42,13 +41,17 @@ void TestSensorGeometry(bool checkFull = false) gGeoManager->CloseGeometry(); gGeoManager->SetVisLevel(99); + if (draw) { + gGeoManager->Draw("ogl"); + } + if (checkFull) { gGeoManager->CheckGeometryFull(); - } - gGeoManager->CheckOverlaps(0.0001); - TIter nextOverlap{gGeoManager->GetListOfOverlaps()}; - while ((obj = (TObject*)nextOverlap())) { - LOGP(info, "Overlap in {}", obj->GetName()); + gGeoManager->CheckOverlaps(0.00001); + TIter nextOverlap{gGeoManager->GetListOfOverlaps()}; + while ((obj = (TObject*)nextOverlap())) { + LOGP(info, "Overlap in {}", obj->GetName()); + } } std::unique_ptr f{TFile::Open("geo.root", "RECREATE")}; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h index 7df603bb29fb2..662c58aeb2cd8 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/BuildTopologyDictionary.h @@ -24,31 +24,47 @@ namespace o2::its3 class BuildTopologyDictionary { + using TopoInfo = std::unordered_map; + using TopoStat = std::map; + using TopoFreq = std::vector>; + public: static constexpr float IgnoreVal = 999.; - void accountTopology(const itsmft::ClusterTopology& cluster, float dX = IgnoreVal, float dZ = IgnoreVal); - void setNCommon(unsigned int nCommon); // set number of common topologies - void setThreshold(double thr); - void setThresholdCumulative(double cumulative); // Considering the integral + void accountTopology(const itsmft::ClusterTopology& cluster, bool IB, float dX = IgnoreVal, float dZ = IgnoreVal); + void setNCommon(unsigned int nCommon, bool IB); // set number of common topologies + void setThreshold(double thr, bool IB); + void setThresholdCumulative(double cumulative, bool IB); // Considering the integral void groupRareTopologies(); - friend std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& BD); void printDictionary(const std::string& fname); void printDictionaryBinary(const std::string& fname); void saveDictionaryRoot(const std::string& fname); - unsigned int getTotClusters() const { return mTotClusters; } - unsigned int getNotInGroups() const { return mNCommonTopologies; } - TopologyDictionary getDictionary() const { return mDictionary; } + [[nodiscard]] unsigned int getTotClusters(bool IB) const { return (IB) ? mTotClustersIB : mTotClustersOB; } + [[nodiscard]] unsigned int getNotInGroups(bool IB) const { return (IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB; } + [[nodiscard]] const TopologyDictionary& getDictionary() const { return mDictionary; } + + friend std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& BD); private: - TopologyDictionary mDictionary; ///< Dictionary of topologies - std::map mTopologyMap; //! Temporary map of type - std::vector> mTopologyFrequency; //! , needed to define threshold - unsigned int mTotClusters{0}; - unsigned int mNCommonTopologies{0}; - double mFrequencyThreshold{0.}; - - std::unordered_map mMapInfo; + void accountTopologyImpl(const itsmft::ClusterTopology& cluster, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ntot, float sigmaX, float sigmaZ, float dX, float dZ); + void setNCommonImpl(unsigned int ncom, TopoFreq& tfreq, TopoStat& tstat, unsigned int& ncommon, unsigned int ntot); + void setThresholdImpl(double thr, TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, unsigned int ntot); + void setThresholdCumulativeImpl(double cumulative, TopoFreq& tfreq, unsigned int& ncommon, double& freqthres, unsigned int ntot); + void groupRareTopologiesImpl(TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, TopologyDictionaryData& data, unsigned int ntot); + + TopologyDictionary mDictionary; ///< Dictionary of topologies + unsigned int mTotClustersIB{0}; + unsigned int mTotClustersOB{0}; + unsigned int mNCommonTopologiesIB{0}; + unsigned int mNCommonTopologiesOB{0}; + double mFrequencyThresholdIB{0.}; + double mFrequencyThresholdOB{0.}; + TopoInfo mMapInfoIB; + TopoInfo mMapInfoOB; + TopoStat mTopologyMapIB; //! IB Temporary map of type + TopoStat mTopologyMapOB; //! OB Temporary map of type + TopoFreq mTopologyFrequencyIB; //! IB , needed to define threshold + TopoFreq mTopologyFrequencyOB; //! OB , needed to define threshold ClassDefNV(BuildTopologyDictionary, 3); }; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h index 20acf07d4f547..a81db09217e9b 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/Clusterer.h @@ -207,7 +207,7 @@ class Clusterer template static void streamCluster(const std::vector& pixbuf, const std::array* lblBuff, const BBox& bbox, const its3::LookUp& pattIdConverter, - VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isHuge = false); + VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isIB, bool isHuge = false); bool isContinuousReadOut() const { return mContinuousReadout; } void setContinuousReadOut(bool v) { mContinuousReadout = v; } @@ -230,7 +230,7 @@ class Clusterer ///< load the dictionary of cluster topologies void setDictionary(const its3::TopologyDictionary* dict) { - LOGP(info, "Setting TopologyDictionary with size={}", dict->getSize()); + LOGP(info, "Setting TopologyDictionary with IB size={} & OB size={}", dict->getSize(true), dict->getSize(false)); mPattIdConverter.setDictionary(dict); // dict->print(); } @@ -274,7 +274,7 @@ class Clusterer template void Clusterer::streamCluster(const std::vector& pixbuf, const std::array* lblBuff, const Clusterer::BBox& bbox, const its3::LookUp& pattIdConverter, - VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isHuge) + VCLUS* compClusPtr, VPAT* patternsPtr, MCTruth* labelsClusPtr, int nlab, bool isIB, bool isHuge) { if (labelsClusPtr && lblBuff) { // MC labels were requested auto cnt = compClusPtr->size(); @@ -291,10 +291,10 @@ void Clusterer::streamCluster(const std::vector& pixbuf, const std::a int nbits = ir * colSpanW + ic; patt[nbits >> 3] |= (0x1 << (7 - (nbits % 8))); } - uint16_t pattID = (isHuge || pattIdConverter.size() == 0) ? CompCluster::InvalidPatternID : pattIdConverter.findGroupID(rowSpanW, colSpanW, patt.data()); + uint16_t pattID = (isHuge || pattIdConverter.size(isIB) == 0) ? CompCluster::InvalidPatternID : pattIdConverter.findGroupID(rowSpanW, colSpanW, isIB, patt.data()); uint16_t row = bbox.rowMin, col = bbox.colMin; LOGP(debug, "PattID: findGroupID({},{},{})={}", row, col, patt[0], pattID); - if (pattID == CompCluster::InvalidPatternID || pattIdConverter.isGroup(pattID)) { + if (pattID == CompCluster::InvalidPatternID || pattIdConverter.isGroup(pattID, isIB)) { if (pattID != CompCluster::InvalidPatternID) { // For groupped topologies, the reference pixel is the COG pixel float xCOG = 0., zCOG = 0.; diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h index 2407344aa0193..b9e7fd0f6ec39 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/IOUtils.h @@ -16,14 +16,13 @@ #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITStracking/TimeFrame.h" #include "ITStracking/IOUtils.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITS3Base/SpecsV2.h" namespace o2::its3::ioutils { -using SSAlpide = o2::its3::SegmentationSuperAlpide; -constexpr float DefClusErrorRow = o2::its3::SegmentationSuperAlpide::mPitchRow * 0.5; -constexpr float DefClusErrorCol = o2::its3::SegmentationSuperAlpide::mPitchCol * 0.5; +constexpr float DefClusErrorRow = o2::its3::SegmentationMosaix::PitchRow * 0.5; +constexpr float DefClusErrorCol = o2::its3::SegmentationMosaix::PitchCol * 0.5; constexpr float DefClusError2Row = DefClusErrorRow * DefClusErrorRow; constexpr float DefClusError2Col = DefClusErrorCol * DefClusErrorCol; @@ -31,13 +30,14 @@ template o2::math_utils::Point3D extractClusterData(const itsmft::CompClusterExt& c, iterator& iter, const its3::TopologyDictionary* dict, T& sig2y, T& sig2z) { auto pattID = c.getPatternID(); + auto ib = constants::detID::isDetITS3(c.getSensorID()); // Dummy COG errors (about half pixel size) - sig2y = (constants::detID::isDetITS3(c.getSensorID())) ? DefClusError2Row : o2::its::ioutils::DefClusError2Row; - sig2z = (constants::detID::isDetITS3(c.getSensorID())) ? DefClusError2Col : o2::its::ioutils::DefClusError2Col; + sig2y = (ib) ? DefClusError2Row : o2::its::ioutils::DefClusError2Row; + sig2z = (ib) ? DefClusError2Col : o2::its::ioutils::DefClusError2Col; if (pattID != itsmft::CompCluster::InvalidPatternID) { - sig2y = dict->getErr2X(pattID) * sig2y; // Error is given in detector coordinates - sig2z = dict->getErr2Z(pattID) * sig2z; - if (!dict->isGroup(pattID)) { + sig2y = dict->getErr2X(pattID, ib); + sig2z = dict->getErr2Z(pattID, ib); + if (!dict->isGroup(pattID, ib)) { return dict->getClusterCoordinates(c); } else { o2::itsmft::ClusterPattern patt(iter); @@ -53,13 +53,14 @@ template o2::math_utils::Point3D extractClusterData(const itsmft::CompClusterExt& c, iterator& iter, const its3::TopologyDictionary* dict, T& sig2y, T& sig2z, uint8_t& cls) { auto pattID = c.getPatternID(); + auto ib = constants::detID::isDetITS3(c.getSensorID()); auto iterC = iter; unsigned int clusterSize{999}; - if (pattID == itsmft::CompCluster::InvalidPatternID || dict->isGroup(pattID)) { + if (pattID == itsmft::CompCluster::InvalidPatternID || dict->isGroup(pattID, ib)) { o2::itsmft::ClusterPattern patt(iterC); clusterSize = patt.getNPixels(); } else { - clusterSize = dict->getNpixels(pattID); + clusterSize = dict->getNpixels(pattID, ib); } cls = static_cast(std::clamp(clusterSize, static_cast(std::numeric_limits::min()), static_cast(std::numeric_limits::max()))); return extractClusterData(c, iter, dict, sig2y, sig2z); diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h index 0fbecb41393ff..809a129a0debf 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/LookUp.h @@ -21,7 +21,6 @@ #ifndef ALICEO2_ITS3_LOOKUP_H #define ALICEO2_ITS3_LOOKUP_H -#include "DataFormatsITSMFT/ClusterTopology.h" #include "ITS3Reconstruction/TopologyDictionary.h" namespace o2::its3 @@ -32,20 +31,21 @@ class LookUp LookUp() = default; LookUp(std::string fileName); static int groupFinder(int nRow, int nCol); - int findGroupID(int nRow, int nCol, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const; - int getTopologiesOverThreshold() const { return mTopologiesOverThreshold; } + int findGroupID(int nRow, int nCol, bool IB, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const; + int getTopologiesOverThreshold(bool IB) const { return (IB) ? mTopologiesOverThresholdIB : mTopologiesOverThresholdOB; } void loadDictionary(std::string fileName); void setDictionary(const TopologyDictionary* dict); - bool isGroup(int id) const { return mDictionary.isGroup(id); } - int size() const { return mDictionary.getSize(); } - auto getPattern(int id) const { return mDictionary.getPattern(id); } - auto getDictionaty() const { return mDictionary; } + auto getDictionary() const { return mDictionary; } + bool isGroup(int id, bool IB) const { return mDictionary.isGroup(id, IB); } + int size(bool IB) const { return mDictionary.getSize(IB); } + auto getPattern(int id, bool IB) const { return mDictionary.getPattern(id, IB); } private: - TopologyDictionary mDictionary{}; - int mTopologiesOverThreshold{0}; + TopologyDictionary mDictionary; + int mTopologiesOverThresholdIB{0}; + int mTopologiesOverThresholdOB{0}; - ClassDefNV(LookUp, 2); + ClassDefNV(LookUp, 3); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h index a11131ed9f61f..d5f5721170aa7 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h +++ b/Detectors/Upgrades/ITS3/reconstruction/include/ITS3Reconstruction/TopologyDictionary.h @@ -24,6 +24,18 @@ namespace o2::its3 class BuildTopologyDictionary; class LookUp; +struct TopologyDictionaryData { + static constexpr int STopoSize{(8 * 255) + 1}; + std::array mSmallTopologiesLUT{}; ///< Look-Up Table for the topologies with 1-byte linearised matrix + std::vector mVectorOfIDs; ///< Vector of topologies and groups + std::unordered_map mCommonMap; ///< Map of pair + std::unordered_map mGroupMap; ///< Map of pair + + void print() const noexcept; + + ClassDefNV(TopologyDictionaryData, 1); +}; + class TopologyDictionary { public: @@ -32,91 +44,108 @@ class TopologyDictionary /// constexpr for the definition of the groups of rare topologies. /// The attritbution of the group ID is stringly dependent on the following parameters: it must be a power of 2. - static constexpr int RowClassSpan = 4; ///< Row span of the classes of rare topologies - static constexpr int ColClassSpan = 4; ///< Column span of the classes of rare topologies - static constexpr int MaxNumberOfRowClasses = 1 + (itsmft::ClusterPattern::MaxRowSpan - 1) / RowClassSpan; ///< Maximum number of row classes for the groups of rare topologies - static constexpr int MaxNumberOfColClasses = 1 + (itsmft::ClusterPattern::MaxColSpan - 1) / ColClassSpan; ///< Maximum number of col classes for the groups of rare topologies - static constexpr int NumberOfRareGroups = MaxNumberOfRowClasses * MaxNumberOfColClasses; ///< Number of entries corresponding to groups of rare topologies (those whos matrix exceed the max number of bytes are empty). + static constexpr int RowClassSpan = 4; ///< Row span of the classes of rare topologies + static constexpr int ColClassSpan = 4; ///< Column span of the classes of rare topologies + static constexpr int MaxNumberOfRowClasses = 1 + ((itsmft::ClusterPattern::MaxRowSpan - 1) / RowClassSpan); ///< Maximum number of row classes for the groups of rare topologies + static constexpr int MaxNumberOfColClasses = 1 + ((itsmft::ClusterPattern::MaxColSpan - 1) / ColClassSpan); ///< Maximum number of col classes for the groups of rare topologies + static constexpr int NumberOfRareGroups = MaxNumberOfRowClasses * MaxNumberOfColClasses; ///< Number of entries corresponding to groups of rare topologies (those whos matrix exceed the max number of bytes are empty). + /// Resets internal structures + void reset() noexcept; + void resetMaps(bool IB = true) noexcept; /// Prints the dictionary friend std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dictionary); /// Prints the dictionary in a binary file void writeBinaryFile(const std::string& outputFile); /// Reads the dictionary from a binary file - int readBinaryFile(const std::string& fileName); - - int readFromFile(const std::string& fileName); + void readBinaryFile(const std::string& fileName); + void readFromFile(const std::string& fileName); + void print() const noexcept; /// Returns the x position of the COG for the n_th element - inline float getXCOG(int n) const + [[nodiscard]] float getXCOG(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mXCOG; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mXCOG; } /// Returns the error on the x position of the COG for the n_th element - inline float getErrX(int n) const + [[nodiscard]] float getErrX(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErrX; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErrX; } /// Returns the z position of the COG for the n_th element - inline float getZCOG(int n) const + [[nodiscard]] float getZCOG(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mZCOG; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mZCOG; } /// Returns the error on the z position of the COG for the n_th element - inline float getErrZ(int n) const + [[nodiscard]] float getErrZ(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErrZ; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErrZ; } /// Returns the error^2 on the x position of the COG for the n_th element - inline float getErr2X(int n) const + [[nodiscard]] float getErr2X(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErr2X; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErr2X; } /// Returns the error^2 on the z position of the COG for the n_th element - inline float getErr2Z(int n) const + [[nodiscard]] float getErr2Z(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mErr2Z; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mErr2Z; } /// Returns the hash of the n_th element - inline unsigned long getHash(int n) const + [[nodiscard]] unsigned long getHash(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mHash; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mHash; } /// Returns the number of fired pixels of the n_th element - inline int getNpixels(int n) const + [[nodiscard]] int getNpixels(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mNpixels; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mNpixels; } /// Returns the frequency of the n_th element; - inline double getFrequency(int n) const + [[nodiscard]] double getFrequency(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mFrequency; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mFrequency; } /// Returns true if the element corresponds to a group of rare topologies - inline bool isGroup(int n) const + [[nodiscard]] bool isGroup(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mIsGroup; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mIsGroup; } /// Returns the pattern of the topology - inline const itsmft::ClusterPattern& getPattern(int n) const + [[nodiscard]] const itsmft::ClusterPattern& getPattern(int n, bool IB = true) const { - assert(n >= 0 || n < (int)mVectorOfIDs.size()); - return mVectorOfIDs[n].mPattern; + const auto& data = (IB) ? mDataIB : mDataOB; + assert(n >= 0 || n < (int)data.mVectorOfIDs.size()); + return data.mVectorOfIDs[n].mPattern; } /// Fills a hostogram with the distribution of the IDs - TH1F* getTopologyDistribution(const std::string_view hname = "h_topo_dist") const; + [[nodiscard]] TH1F* getTopologyDistribution(const std::string_view hname, bool IB = true) const; /// Returns the number of elements in the dicionary; - int getSize() const { return (int)mVectorOfIDs.size(); } + [[nodiscard]] int getSize(bool IB) const + { + return static_cast((IB) ? mDataIB.mVectorOfIDs.size() : mDataOB.mVectorOfIDs.size()); + } /// Returns the local position of a compact cluster /// Returns the local position of a compact cluster @@ -133,13 +162,10 @@ class TopologyDictionary friend its3::LookUp; private: - static constexpr int STopoSize{8 * 255 + 1}; - std::unordered_map mCommonMap{}; ///< Map of pair - std::unordered_map mGroupMap{}; ///< Map of pair - int mSmallTopologiesLUT[STopoSize]{}; ///< Look-Up Table for the topologies with 1-byte linearised matrix - std::vector mVectorOfIDs{}; ///< Vector of topologies and groups + TopologyDictionaryData mDataIB; + TopologyDictionaryData mDataOB; - ClassDefNV(TopologyDictionary, 3); + ClassDefNV(TopologyDictionary, 4); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx index 87ad450eecd9e..f7eec52f9434a 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/BuildTopologyDictionary.cxx @@ -14,7 +14,9 @@ #include "ITS3Reconstruction/BuildTopologyDictionary.h" #include "ITS3Reconstruction/LookUp.h" #include "DataFormatsITSMFT/CompCluster.h" -#include "ITS3Base/SegmentationSuperAlpide.h" + +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "TFile.h" @@ -22,14 +24,25 @@ ClassImp(o2::its3::BuildTopologyDictionary); namespace o2::its3 { -void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& cluster, float dX, float dZ) +void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& cluster, bool IB, float dX, float dZ) { - mTotClusters++; + accountTopologyImpl(cluster, + ((IB) ? mMapInfoIB : mMapInfoOB), + ((IB) ? mTopologyMapIB : mTopologyMapOB), + ((IB) ? mTotClustersIB : mTotClustersOB), + ((IB) ? SegmentationMosaix::PitchRow : itsmft::SegmentationAlpide::PitchRow), + ((IB) ? SegmentationMosaix::PitchCol : itsmft::SegmentationAlpide::PitchCol), + dX, dZ); +} + +void BuildTopologyDictionary::accountTopologyImpl(const itsmft::ClusterTopology& cluster, TopoInfo& tinfo, TopoStat& tstat, unsigned int& tot, float sigmaX, float sigmaZ, float dX, float dZ) +{ + ++tot; bool useDf = dX < IgnoreVal / 2; // we may need to account the frequency but to not update the centroid // std::pair::iterator,bool> ret; // auto ret = mTopologyMap.insert(std::make_pair(cluster.getHash(), std::make_pair(cluster, 1))); - auto& topoStat = mTopologyMap[cluster.getHash()]; + auto& topoStat = tstat[cluster.getHash()]; topoStat.countsTotal++; if (topoStat.countsTotal == 1) { // a new topology is inserted topoStat.topology = cluster; @@ -45,14 +58,14 @@ void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& clu topInf.mZmean = dZ; topoStat.countsWithBias = 1; } else { // assign expected sigmas from the pixel X, Z sizes - topInf.mXsigma2 = 1.f / 12.f / (float)std::min(10, topInf.mSizeX); - topInf.mZsigma2 = 1.f / 12.f / (float)std::min(10, topInf.mSizeZ); + topInf.mXsigma2 = sigmaX * sigmaX / 12.f / (float)std::min(10, topInf.mSizeX); + topInf.mZsigma2 = sigmaZ * sigmaZ / (float)std::min(10, topInf.mSizeZ); } - mMapInfo.emplace(cluster.getHash(), topInf); + tinfo.emplace(cluster.getHash(), topInf); } else { if (useDf) { auto num = topoStat.countsWithBias++; - auto ind = mMapInfo.find(cluster.getHash()); + auto ind = tinfo.find(cluster.getHash()); float tmpxMean = ind->second.mXmean; float newxMean = ind->second.mXmean = ((tmpxMean)*num + dX) / (num + 1); float tmpxSigma2 = ind->second.mXsigma2; @@ -65,101 +78,135 @@ void BuildTopologyDictionary::accountTopology(const itsmft::ClusterTopology& clu } } -void BuildTopologyDictionary::setThreshold(double thr) +void BuildTopologyDictionary::setNCommon(unsigned int nCommon, bool IB) +{ + mDictionary.resetMaps(IB); + + auto& freqTopo = ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB); + auto& freqThres = ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB); + auto& comTopo = ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB); + auto ntot = ((IB) ? mTotClustersIB : mTotClustersOB); + + setNCommonImpl(nCommon, + freqTopo, + ((IB) ? mTopologyMapIB : mTopologyMapOB), + comTopo, + ntot); + // Recaculate also the threshold + freqThres = ((double)freqTopo[comTopo - 1].first) / ntot; +} + +void BuildTopologyDictionary::setNCommonImpl(unsigned int ncom, TopoFreq& tfreq, TopoStat& tstat, unsigned int& ncommon, unsigned int ntot) { - mTopologyFrequency.clear(); - for (auto&& p : mTopologyMap) { // p is pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); + if (ncom >= itsmft::CompCluster::InvalidPatternID) { + LOGP(warning, "Redefining nCommon from {} to {} to be below InvalidPatternID", ncom, itsmft::CompCluster::InvalidPatternID - 1); + ncom = itsmft::CompCluster::InvalidPatternID - 1; + } + tfreq.clear(); + for (auto&& p : tstat) { // p os pair + tfreq.emplace_back(p.second.countsTotal, p.first); } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), + std::sort(tfreq.begin(), tfreq.end(), [](const std::pair& couple1, const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = 0; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - mFrequencyThreshold = thr; - for (auto& q : mTopologyFrequency) { - if (((double)q.first) / mTotClusters > thr) { - mNCommonTopologies++; + ncommon = ncom; +} + +void BuildTopologyDictionary::setThreshold(double thr, bool IB) +{ + mDictionary.resetMaps(IB); + setThresholdImpl(thr, + ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB), + ((IB) ? mMapInfoIB : mMapInfoOB), + ((IB) ? mTopologyMapIB : mTopologyMapOB), + ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB), + ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB), + ((IB) ? mTotClustersIB : mTotClustersOB)); +} + +void BuildTopologyDictionary::setThresholdImpl(double thr, TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, unsigned int ntot) +{ + setNCommonImpl(0, tfreq, tstat, ncommon, ntot); + freqthres = thr; + for (auto& q : tfreq) { + if (((double)q.first) / ntot > thr) { + ++ncommon; } else { break; } } - if (mNCommonTopologies >= itsmft::CompCluster::InvalidPatternID) { - mFrequencyThreshold = ((double)mTopologyFrequency[itsmft::CompCluster::InvalidPatternID - 1].first) / mTotClusters; - LOGP(warning, "Redefining prob. threshould from {} to {} to be below InvalidPatternID (was {})", thr, mFrequencyThreshold, mNCommonTopologies); - mNCommonTopologies = itsmft::CompCluster::InvalidPatternID - 1; + if (ncommon >= itsmft::CompCluster::InvalidPatternID) { + freqthres = ((double)tfreq[itsmft::CompCluster::InvalidPatternID - 1].first) / ntot; + LOGP(warning, "Redefining prob. threshold from {} to {} to be below InvalidPatternID (was {})", thr, freqthres, ntot); + ncommon = itsmft::CompCluster::InvalidPatternID - 1; } } -void BuildTopologyDictionary::setNCommon(unsigned int nCommon) +void BuildTopologyDictionary::setThresholdCumulative(double cumulative, bool IB) { - if (nCommon >= itsmft::CompCluster::InvalidPatternID) { - LOGP(warning, "Redefining nCommon from {} to {} to be below InvalidPatternID", nCommon, itsmft::CompCluster::InvalidPatternID - 1); - nCommon = itsmft::CompCluster::InvalidPatternID - 1; - } - mTopologyFrequency.clear(); - for (auto&& p : mTopologyMap) { // p os pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); + if (cumulative <= 0. || cumulative >= 1.) { + cumulative = 0.99; } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), - [](const std::pair& couple1, - const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = nCommon; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - mFrequencyThreshold = ((double)mTopologyFrequency[mNCommonTopologies - 1].first) / mTotClusters; + + auto& freqTopo = ((IB) ? mTopologyFrequencyIB : mTopologyFrequencyOB); + auto& freqThres = ((IB) ? mFrequencyThresholdIB : mFrequencyThresholdOB); + auto& statTopo = ((IB) ? mTopologyMapIB : mTopologyMapOB); + auto& comTopo = ((IB) ? mNCommonTopologiesIB : mNCommonTopologiesOB); + auto ntot = ((IB) ? mTotClustersIB : mTotClustersOB); + + mDictionary.resetMaps(IB); + setNCommonImpl(0, freqTopo, statTopo, comTopo, ntot); + setThresholdCumulativeImpl(cumulative, freqTopo, comTopo, freqThres, ntot); } -void BuildTopologyDictionary::setThresholdCumulative(double cumulative) +void BuildTopologyDictionary::setThresholdCumulativeImpl(double cumulative, TopoFreq& tfreq, unsigned int& ncommon, double& freqthres, unsigned int ntot) { - mTopologyFrequency.clear(); - if (cumulative <= 0. || cumulative >= 1.) { - cumulative = 0.99; - } double totFreq = 0.; - for (auto&& p : mTopologyMap) { // p os pair - mTopologyFrequency.emplace_back(p.second.countsTotal, p.first); - } - std::sort(mTopologyFrequency.begin(), mTopologyFrequency.end(), - [](const std::pair& couple1, - const std::pair& couple2) { return (couple1.first > couple2.first); }); - mNCommonTopologies = 0; - mDictionary.mCommonMap.clear(); - mDictionary.mGroupMap.clear(); - for (auto& q : mTopologyFrequency) { - totFreq += ((double)(q.first)) / mTotClusters; + for (auto& q : tfreq) { + totFreq += ((double)(q.first)) / ntot; if (totFreq < cumulative) { - mNCommonTopologies++; - if (mNCommonTopologies >= itsmft::CompCluster::InvalidPatternID) { - totFreq -= ((double)(q.first)) / mTotClusters; - mNCommonTopologies--; + ++ncommon; + if (ncommon >= itsmft::CompCluster::InvalidPatternID) { + totFreq -= ((double)(q.first)) / ntot; + --ncommon; LOGP(warning, "Redefining cumulative threshould from {} to {} to be below InvalidPatternID)", cumulative, totFreq); } } else { break; } } - mFrequencyThreshold = ((double)(mTopologyFrequency[--mNCommonTopologies].first)) / mTotClusters; - while (std::fabs(((double)mTopologyFrequency[mNCommonTopologies].first) / mTotClusters - mFrequencyThreshold) < 1.e-15) { - mNCommonTopologies--; + freqthres = ((double)(tfreq[--ncommon].first)) / ntot; + while (std::fabs(((double)tfreq[ncommon--].first) / ntot - freqthres) < 1.e-15) { } - mFrequencyThreshold = ((double)mTopologyFrequency[mNCommonTopologies++].first) / mTotClusters; + freqthres = ((double)tfreq[ncommon++].first) / ntot; } void BuildTopologyDictionary::groupRareTopologies() { LOG(info) << "Dictionary finalisation"; - LOG(info) << "Number of clusters: " << mTotClusters; + LOG(info) << "Number of IB clusters: " << mTotClustersIB; + LOG(info) << "Number of OB clusters: " << mTotClustersOB; + + groupRareTopologiesImpl(mTopologyFrequencyIB, mMapInfoIB, mTopologyMapIB, mNCommonTopologiesIB, mFrequencyThresholdIB, mDictionary.mDataIB, mNCommonTopologiesIB); + groupRareTopologiesImpl(mTopologyFrequencyOB, mMapInfoOB, mTopologyMapOB, mNCommonTopologiesOB, mFrequencyThresholdOB, mDictionary.mDataOB, mNCommonTopologiesOB); + + LOG(info) << "Dictionay finalised"; + LOG(info) << "IB:"; + mDictionary.mDataIB.print(); + LOG(info) << "OB:"; + mDictionary.mDataOB.print(); +} +void BuildTopologyDictionary::groupRareTopologiesImpl(TopoFreq& tfreq, TopoInfo& tinfo, TopoStat& tstat, unsigned int& ncommon, double& freqthres, TopologyDictionaryData& data, unsigned int ntot) +{ double totFreq = 0.; - for (unsigned int j = 0; j < mNCommonTopologies; j++) { + for (unsigned int j = 0; j < ncommon; j++) { itsmft::GroupStruct gr; - gr.mHash = mTopologyFrequency[j].second; - gr.mFrequency = ((double)(mTopologyFrequency[j].first)) / mTotClusters; + gr.mHash = tfreq[j].second; + gr.mFrequency = ((double)(tfreq[j].first)) / ntot; totFreq += gr.mFrequency; // rough estimation for the error considering a8 uniform distribution - const auto& topo = mMapInfo.find(gr.mHash)->second; + const auto& topo = tinfo.find(gr.mHash)->second; gr.mErrX = std::sqrt(topo.mXsigma2); gr.mErrZ = std::sqrt(topo.mZsigma2); gr.mErr2X = topo.mXsigma2; @@ -169,11 +216,11 @@ void BuildTopologyDictionary::groupRareTopologies() gr.mNpixels = topo.mNpixels; gr.mPattern = topo.mPattern; gr.mIsGroup = false; - mDictionary.mVectorOfIDs.push_back(gr); + data.mVectorOfIDs.push_back(gr); if (j == int(itsmft::CompCluster::InvalidPatternID - 1)) { LOGP(warning, "Limiting N unique topologies to {}, threshold freq. to {}, cumulative freq. to {} to be below InvalidPatternID", j, gr.mFrequency, totFreq); - mNCommonTopologies = j; - mFrequencyThreshold = gr.mFrequency; + ncommon = j; + freqthres = gr.mFrequency; break; } } @@ -193,8 +240,8 @@ void BuildTopologyDictionary::groupRareTopologies() // Create a structure for a group of rare topologies itsmft::GroupStruct gr; gr.mHash = (((unsigned long)(grNum)) << 32) & 0xffffffff00000000; - gr.mErrX = its3::TopologyDictionary::RowClassSpan / std::sqrt(12 * std::min(10, rowBinEdge)); - gr.mErrZ = its3::TopologyDictionary::ColClassSpan / std::sqrt(12 * std::min(10, colBinEdge)); + gr.mErrX = its3::TopologyDictionary::RowClassSpan / std::sqrt(12.f * (float)std::min(10, rowBinEdge)); + gr.mErrZ = its3::TopologyDictionary::ColClassSpan / std::sqrt(12.f * (float)std::min(10, colBinEdge)); gr.mErr2X = gr.mErrX * gr.mErrX; gr.mErr2Z = gr.mErrZ * gr.mErrZ; gr.mXCOG = 0; @@ -228,58 +275,65 @@ void BuildTopologyDictionary::groupRareTopologies() int rs{}, cs{}, index{}; // Updating the counts for the groups of rare topologies - for (auto j{mNCommonTopologies}; j < mTopologyFrequency.size(); j++) { - unsigned long hash1 = mTopologyFrequency[j].second; - rs = mTopologyMap.find(hash1)->second.topology.getRowSpan(); - cs = mTopologyMap.find(hash1)->second.topology.getColumnSpan(); + for (auto j{ncommon}; j < tfreq.size(); j++) { + unsigned long hash1 = tfreq[j].second; + rs = tstat.find(hash1)->second.topology.getRowSpan(); + cs = tstat.find(hash1)->second.topology.getColumnSpan(); index = its3::LookUp::groupFinder(rs, cs); - tmp_GroupMap[index].second += mTopologyFrequency[j].first; + tmp_GroupMap[index].second += tfreq[j].first; } for (auto&& p : tmp_GroupMap) { itsmft::GroupStruct& group = p.second.first; - group.mFrequency = ((double)p.second.second) / mTotClusters; - mDictionary.mVectorOfIDs.push_back(group); + group.mFrequency = ((double)p.second.second) / ntot; + data.mVectorOfIDs.push_back(group); } // Sorting the dictionary preserving all unique topologies - std::sort(mDictionary.mVectorOfIDs.begin(), mDictionary.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { + std::sort(data.mVectorOfIDs.begin(), data.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return (!a.mIsGroup) && b.mIsGroup ? true : (a.mIsGroup && (!b.mIsGroup) ? false : (a.mFrequency > b.mFrequency)); }); - if (mDictionary.mVectorOfIDs.size() >= itsmft::CompCluster::InvalidPatternID - 1) { + if (data.mVectorOfIDs.size() >= itsmft::CompCluster::InvalidPatternID - 1) { LOGP(warning, "Max allowed {} patterns is reached, stopping", itsmft::CompCluster::InvalidPatternID - 1); - mDictionary.mVectorOfIDs.resize(itsmft::CompCluster::InvalidPatternID - 1); + data.mVectorOfIDs.resize(itsmft::CompCluster::InvalidPatternID - 1); } // Sorting the dictionary to final form - std::sort(mDictionary.mVectorOfIDs.begin(), mDictionary.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return a.mFrequency > b.mFrequency; }); + std::sort(data.mVectorOfIDs.begin(), data.mVectorOfIDs.end(), [](const itsmft::GroupStruct& a, const itsmft::GroupStruct& b) { return a.mFrequency > b.mFrequency; }); // Creating the map for common topologies - for (int iKey = 0; iKey < mDictionary.getSize(); iKey++) { - itsmft::GroupStruct& gr = mDictionary.mVectorOfIDs[iKey]; + for (int iKey = 0; iKey < data.mVectorOfIDs.size(); iKey++) { + itsmft::GroupStruct& gr = data.mVectorOfIDs[iKey]; if (!gr.mIsGroup) { - mDictionary.mCommonMap.emplace(gr.mHash, iKey); + data.mCommonMap.emplace(gr.mHash, iKey); if (gr.mPattern.getUsedBytes() == 1) { - mDictionary.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = iKey; + data.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = iKey; } } else { - mDictionary.mGroupMap.emplace((int)(gr.mHash >> 32) & 0x00000000ffffffff, iKey); + data.mGroupMap.emplace((int)(gr.mHash >> 32) & 0x00000000ffffffff, iKey); } } - LOG(info) << "Dictionay finalised"; - LOG(info) << "Number of keys: " << mDictionary.getSize(); - LOG(info) << "Number of common topologies: " << mDictionary.mCommonMap.size(); - LOG(info) << "Number of groups of rare topologies: " << mDictionary.mGroupMap.size(); } std::ostream& operator<<(std::ostream& os, const BuildTopologyDictionary& DB) { - for (unsigned int i = 0; i < DB.mNCommonTopologies; i++) { - const unsigned long& hash = DB.mTopologyFrequency[i].second; + os << "--- InnerBarrel\n"; + for (unsigned int i = 0; i < DB.mNCommonTopologiesIB; i++) { + const unsigned long& hash = DB.mTopologyFrequencyIB[i].second; + os << "Hash: " << hash << '\n'; + os << "counts: " << DB.mTopologyMapIB.find(hash)->second.countsTotal; + os << " (with bias provided: " << DB.mTopologyMapIB.find(hash)->second.countsWithBias << ")" << '\n'; + os << "sigmaX: " << std::sqrt(DB.mMapInfoIB.find(hash)->second.mXsigma2) << '\n'; + os << "sigmaZ: " << std::sqrt(DB.mMapInfoIB.find(hash)->second.mZsigma2) << '\n'; + os << DB.mTopologyMapIB.find(hash)->second.topology; + } + os << "--- OuterBarrel\n"; + for (unsigned int i = 0; i < DB.mNCommonTopologiesOB; i++) { + const unsigned long& hash = DB.mTopologyFrequencyOB[i].second; os << "Hash: " << hash << '\n'; - os << "counts: " << DB.mTopologyMap.find(hash)->second.countsTotal; - os << " (with bias provided: " << DB.mTopologyMap.find(hash)->second.countsWithBias << ")" << '\n'; - os << "sigmaX: " << std::sqrt(DB.mMapInfo.find(hash)->second.mXsigma2) << '\n'; - os << "sigmaZ: " << std::sqrt(DB.mMapInfo.find(hash)->second.mZsigma2) << '\n'; - os << DB.mTopologyMap.find(hash)->second.topology; + os << "counts: " << DB.mTopologyMapOB.find(hash)->second.countsTotal; + os << " (with bias provided: " << DB.mTopologyMapOB.find(hash)->second.countsWithBias << ")" << '\n'; + os << "sigmaX: " << std::sqrt(DB.mMapInfoOB.find(hash)->second.mXsigma2) << '\n'; + os << "sigmaZ: " << std::sqrt(DB.mMapInfoOB.find(hash)->second.mZsigma2) << '\n'; + os << DB.mTopologyMapOB.find(hash)->second.topology; } return os; } diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx index 90f5245bcef58..bce17b3759340 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/Clusterer.cxx @@ -12,15 +12,14 @@ /// \file Clusterer.cxx /// \brief Implementation of the ITS cluster finder -#include "ITS3Reconstruction/Clusterer.h" +#include -#include -#include "Framework/Logger.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Reconstruction/Clusterer.h" +#include "ITS3Base/SegmentationMosaix.h" #include "SimulationDataFormat/MCTruthContainer.h" #include "CommonDataFormat/InteractionRecord.h" -#include +#include "TTree.h" #ifdef WITH_OPENMP #include @@ -252,7 +251,7 @@ void Clusterer::ClustererThread::finishChip(ChipPixelData* curChipData, CompClus preClusterIndices[i2] = -1; } if (bbox.isAcceptableSize()) { - parent->streamCluster(pixArrBuff, &labelsBuff, bbox, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab); + parent->streamCluster(pixArrBuff, &labelsBuff, bbox, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, constants::detID::isDetITS3(curChipData->getChipID())); } else { auto warnLeft = MaxHugeClusWarn - parent->mNHugeClus; if (warnLeft > 0) { @@ -278,7 +277,7 @@ void Clusterer::ClustererThread::finishChip(ChipPixelData* curChipData, CompClus } } if (!pixbuf.empty()) { // Stream a piece of cluster only if the reduced bounding box is not empty - parent->streamCluster(pixbuf, &labelsBuff, bboxT, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, true); + parent->streamCluster(pixbuf, &labelsBuff, bboxT, parent->mPattIdConverter, compClusPtr, patternsPtr, labelsClusPtr, nlab, constants::detID::isDetITS3(curChipData->getChipID()), true); pixbuf.clear(); } bboxT.rowMin = bboxT.rowMax + 1; @@ -305,10 +304,12 @@ void Clusterer::ClustererThread::finishChipSingleHitFast(uint32_t hit, ChipPixel } } + auto ib = constants::detID::isDetITS3(curChipData->getChipID()); + // add to compact clusters, which must be always filled unsigned char patt[ClusterPattern::MaxPatternBytes]{0x1 << (7 - (0 % 8))}; // unrolled 1 hit version of full loop in finishChip - uint16_t pattID = (parent->mPattIdConverter.size() == 0) ? CompCluster::InvalidPatternID : parent->mPattIdConverter.findGroupID(1, 1, patt); - if ((pattID == CompCluster::InvalidPatternID || parent->mPattIdConverter.isGroup(pattID)) && patternsPtr) { + uint16_t pattID = (parent->mPattIdConverter.size(ib) == 0) ? CompCluster::InvalidPatternID : parent->mPattIdConverter.findGroupID(1, 1, ib, patt); + if ((pattID == CompCluster::InvalidPatternID || parent->mPattIdConverter.isGroup(pattID, ib)) && patternsPtr) { patternsPtr->emplace_back(1); // rowspan patternsPtr->emplace_back(1); // colspan patternsPtr->insert(patternsPtr->end(), std::begin(patt), std::begin(patt) + 1); @@ -334,7 +335,7 @@ void Clusterer::ClustererThread::initChip(const ChipPixelData* curChipData, uint size = itsmft::SegmentationAlpide::NRows + 2; int chipId = curChipData->getChipID(); if (its3::constants::detID::isDetITS3(chipId)) { - size = its3::SegmentationSuperAlpide::mNRows + 2; + size = its3::SegmentationMosaix::NRows + 2; } delete[] column1; diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx index 50e651f7f5675..58dd56ac41f95 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/IOUtils.cxx @@ -16,8 +16,6 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "ITS3Reconstruction/TopologyDictionary.h" #include "ITSBase/GeometryTGeo.h" -#include "ITSMFTBase/SegmentationAlpide.h" -#include "ITS3Base/SegmentationSuperAlpide.h" #include "ITS3Base/SpecsV2.h" #include "ITStracking/TrackingConfigParam.h" #include "Framework/Logger.h" @@ -80,7 +78,6 @@ int loadROFrameDataITS3(its::TimeFrame* tf, auto isITS3 = its3::constants::detID::isDetITS3(sensorID); auto layer = geom->getLayer(sensorID); - auto pattID = c.getPatternID(); float sigmaY2{0}, sigmaZ2{0}, sigmaYZ{0}; uint8_t clusterSize{0}; auto locXYZ = extractClusterData(c, pattIt, dict, sigmaY2, sigmaZ2, clusterSize); diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h b/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h index f19a7fcaba9ca..2ebd89970d9a1 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h +++ b/Detectors/Upgrades/ITS3/reconstruction/src/ITS3ReconstructionLinkDef.h @@ -16,6 +16,7 @@ #pragma link off all functions; #pragma link C++ class o2::its3::Clusterer + ; +#pragma link C++ class o2::its3::TopologyDictionaryData + ; #pragma link C++ class o2::its3::TopologyDictionary + ; #pragma link C++ class o2::its3::BuildTopologyDictionary + ; #pragma link C++ class o2::its3::LookUp + ; diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx index caabfa6f2decb..e137e091dc631 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/LookUp.cxx @@ -31,7 +31,8 @@ LookUp::LookUp(std::string fileName) void LookUp::loadDictionary(std::string fileName) { mDictionary.readFromFile(fileName); - mTopologiesOverThreshold = mDictionary.mCommonMap.size(); + mTopologiesOverThresholdIB = mDictionary.mDataIB.mCommonMap.size(); + mTopologiesOverThresholdOB = mDictionary.mDataOB.mCommonMap.size(); } void LookUp::setDictionary(const its3::TopologyDictionary* dict) @@ -39,7 +40,8 @@ void LookUp::setDictionary(const its3::TopologyDictionary* dict) if (dict != nullptr) { mDictionary = *dict; } - mTopologiesOverThreshold = mDictionary.mCommonMap.size(); + mTopologiesOverThresholdIB = mDictionary.mDataIB.mCommonMap.size(); + mTopologiesOverThresholdOB = mDictionary.mDataOB.mCommonMap.size(); } int LookUp::groupFinder(int nRow, int nCol) @@ -61,25 +63,26 @@ int LookUp::groupFinder(int nRow, int nCol) return grNum; } -int LookUp::findGroupID(int nRow, int nCol, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const +int LookUp::findGroupID(int nRow, int nCol, bool IB, const unsigned char patt[itsmft::ClusterPattern::MaxPatternBytes]) const { + const auto& data = (IB) ? mDictionary.mDataIB : mDictionary.mDataOB; int nBits = nRow * nCol; if (nBits < 9) { // Small unique topology - int ID = mDictionary.mSmallTopologiesLUT[(nCol - 1) * 255 + (int)patt[0]]; + int ID = data.mSmallTopologiesLUT[(nCol - 1) * 255 + (int)patt[0]]; if (ID >= 0) { return ID; } } else { // Big unique topology unsigned long hash = itsmft::ClusterTopology::getCompleteHash(nRow, nCol, patt); - auto ret = mDictionary.mCommonMap.find(hash); - if (ret != mDictionary.mCommonMap.end()) { + auto ret = data.mCommonMap.find(hash); + if (ret != data.mCommonMap.end()) { return ret->second; } } - if (!mDictionary.mGroupMap.empty()) { // rare valid topology group + if (!data.mGroupMap.empty()) { // rare valid topology group int index = groupFinder(nRow, nCol); - auto res = mDictionary.mGroupMap.find(index); - return res == mDictionary.mGroupMap.end() ? itsmft::CompCluster::InvalidPatternID : res->second; + auto res = data.mGroupMap.find(index); + return res == data.mGroupMap.end() ? itsmft::CompCluster::InvalidPatternID : res->second; } return itsmft::CompCluster::InvalidPatternID; } diff --git a/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx b/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx index 66a4b0a6878cd..61ab051ffb565 100644 --- a/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx +++ b/Detectors/Upgrades/ITS3/reconstruction/src/TopologyDictionary.cxx @@ -12,7 +12,7 @@ /// \file TopologyDictionary.cxx #include "ITS3Reconstruction/TopologyDictionary.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" #include "ITSMFTBase/SegmentationAlpide.h" #include "CommonUtils/StringUtils.h" #include @@ -23,9 +23,16 @@ ClassImp(o2::its3::TopologyDictionary); namespace o2::its3 { +void TopologyDictionaryData::print() const noexcept +{ + LOG(info) << "Number of keys: " << mVectorOfIDs.size(); + LOG(info) << "Number of common topologies: " << mCommonMap.size(); + LOG(info) << "Number of groups of rare topologies: " << mGroupMap.size(); +} + TopologyDictionary::TopologyDictionary() { - memset(mSmallTopologiesLUT, -1, STopoSize * sizeof(int)); + reset(); } TopologyDictionary::TopologyDictionary(const std::string& fileName) @@ -33,10 +40,43 @@ TopologyDictionary::TopologyDictionary(const std::string& fileName) readFromFile(fileName); } +void TopologyDictionary::print() const noexcept +{ + LOG(info) << "ITS3 TopologyDictionary"; + LOG(info) << "InnerBarrel"; + mDataIB.print(); + LOG(info) << "OuterBarrel"; + mDataOB.print(); +} + +void TopologyDictionary::reset() noexcept +{ + mDataIB.mSmallTopologiesLUT.fill(-1); + mDataOB.mSmallTopologiesLUT.fill(-1); + mDataIB.mVectorOfIDs.clear(); + mDataOB.mVectorOfIDs.clear(); +} + +void TopologyDictionary::resetMaps(bool IB) noexcept +{ + auto& data = (IB) ? mDataIB : mDataOB; + data.mCommonMap.clear(); + data.mGroupMap.clear(); +} + std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dict) { int ID = 0; - for (auto& p : dict.mVectorOfIDs) { + os << "--- InnerBarrel:\n"; + for (auto& p : dict.mDataIB.mVectorOfIDs) { + os << "ID: " << ID++ << " Hash: " << p.mHash << " ErrX: " << p.mErrX << " ErrZ : " << p.mErrZ << " xCOG: " << p.mXCOG << " zCOG: " << p.mZCOG << " Npixles: " << p.mNpixels << " Frequency: " << p.mFrequency << " isGroup : " << std::boolalpha << p.mIsGroup << '\n' + << p.mPattern << '\n' + << "*********************************************************" << '\n' + << '\n'; + } + ID = 0; + os << "--- OuterBarrel:\n"; + for (auto& p : dict.mDataOB.mVectorOfIDs) { os << "ID: " << ID++ << " Hash: " << p.mHash << " ErrX: " << p.mErrX << " ErrZ : " << p.mErrZ << " xCOG: " << p.mXCOG << " zCOG: " << p.mZCOG << " Npixles: " << p.mNpixels << " Frequency: " << p.mFrequency << " isGroup : " << std::boolalpha << p.mIsGroup << '\n' << p.mPattern << '\n' << "*********************************************************" << '\n' @@ -48,24 +88,36 @@ std::ostream& operator<<(std::ostream& os, const its3::TopologyDictionary& dict) void TopologyDictionary::writeBinaryFile(const std::string& outputfile) { std::ofstream file_output(outputfile, std::ios::out | std::ios::binary); - for (auto& p : mVectorOfIDs) { - file_output.write(reinterpret_cast(&p.mHash), sizeof(unsigned long)); - file_output.write(reinterpret_cast(&p.mErrX), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErrZ), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErr2X), sizeof(float)); - file_output.write(reinterpret_cast(&p.mErr2Z), sizeof(float)); - file_output.write(reinterpret_cast(&p.mXCOG), sizeof(float)); - file_output.write(reinterpret_cast(&p.mZCOG), sizeof(float)); - file_output.write(reinterpret_cast(&p.mNpixels), sizeof(int)); - file_output.write(reinterpret_cast(&p.mFrequency), sizeof(double)); - file_output.write(reinterpret_cast(&p.mIsGroup), sizeof(bool)); - file_output.write(const_cast(reinterpret_cast(&p.mPattern.getPattern())), - sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + if (!file_output) { + throw std::runtime_error(fmt::format("Cannot open output file %s!", outputfile)); } + + auto writeData = [](auto& file_output, auto& data) { + auto size = data.mVectorOfIDs.size(); + file_output.write(reinterpret_cast(&size), sizeof(size)); + for (auto& p : data.mVectorOfIDs) { + file_output.write(reinterpret_cast(&p.mHash), sizeof(unsigned long)); + file_output.write(reinterpret_cast(&p.mErrX), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErrZ), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErr2X), sizeof(float)); + file_output.write(reinterpret_cast(&p.mErr2Z), sizeof(float)); + file_output.write(reinterpret_cast(&p.mXCOG), sizeof(float)); + file_output.write(reinterpret_cast(&p.mZCOG), sizeof(float)); + file_output.write(reinterpret_cast(&p.mNpixels), sizeof(int)); + file_output.write(reinterpret_cast(&p.mFrequency), sizeof(double)); + file_output.write(reinterpret_cast(&p.mIsGroup), sizeof(bool)); + file_output.write(const_cast(reinterpret_cast(&p.mPattern.getPattern())), + sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + } + }; + + writeData(file_output, mDataIB); + writeData(file_output, mDataOB); + file_output.close(); } -int TopologyDictionary::readFromFile(const std::string& fname) +void TopologyDictionary::readFromFile(const std::string& fname) { LOGP(info, "Reading TopologyDictionary from File '{}'", fname); if (o2::utils::Str::endsWith(fname, ".root")) { @@ -76,59 +128,63 @@ int TopologyDictionary::readFromFile(const std::string& fname) } else { throw std::runtime_error(fmt::format("Unrecognized format {}", fname)); } - return 0; } -int TopologyDictionary::readBinaryFile(const std::string& fname) +void TopologyDictionary::readBinaryFile(const std::string& fname) { - mVectorOfIDs.clear(); - mCommonMap.clear(); - for (auto& p : mSmallTopologiesLUT) { - p = -1; - } + reset(); + std::ifstream in(fname.data(), std::ios::in | std::ios::binary); - itsmft::GroupStruct gr; - int groupID = 0; if (!in.is_open()) { LOG(error) << "The file " << fname << " coud not be opened"; throw std::runtime_error("The file coud not be opened"); } else { - while (in.read(reinterpret_cast(&gr.mHash), sizeof(unsigned long))) { - in.read(reinterpret_cast(&gr.mErrX), sizeof(float)); - in.read(reinterpret_cast(&gr.mErrZ), sizeof(float)); - in.read(reinterpret_cast(&gr.mErr2X), sizeof(float)); - in.read(reinterpret_cast(&gr.mErr2Z), sizeof(float)); - in.read(reinterpret_cast(&gr.mXCOG), sizeof(float)); - in.read(reinterpret_cast(&gr.mZCOG), sizeof(float)); - in.read(reinterpret_cast(&gr.mNpixels), sizeof(int)); - in.read(reinterpret_cast(&gr.mFrequency), sizeof(double)); - in.read(reinterpret_cast(&gr.mIsGroup), sizeof(bool)); - in.read(const_cast(reinterpret_cast(&gr.mPattern.getPattern())), sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); - mVectorOfIDs.push_back(gr); - if (!gr.mIsGroup) { - mCommonMap.insert(std::make_pair(gr.mHash, groupID)); - if (gr.mPattern.getUsedBytes() == 1) { - mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = groupID; + + auto readData = [](auto& in, auto& data) { + int groupID = 0; + std::size_t size{}, cur{}; + itsmft::GroupStruct gr; + in.read(reinterpret_cast(&size), sizeof(std::size_t)); + while (cur++ != size) { + in.read(reinterpret_cast(&gr.mHash), sizeof(unsigned long)); + in.read(reinterpret_cast(&gr.mErrX), sizeof(float)); + in.read(reinterpret_cast(&gr.mErrZ), sizeof(float)); + in.read(reinterpret_cast(&gr.mErr2X), sizeof(float)); + in.read(reinterpret_cast(&gr.mErr2Z), sizeof(float)); + in.read(reinterpret_cast(&gr.mXCOG), sizeof(float)); + in.read(reinterpret_cast(&gr.mZCOG), sizeof(float)); + in.read(reinterpret_cast(&gr.mNpixels), sizeof(int)); + in.read(reinterpret_cast(&gr.mFrequency), sizeof(double)); + in.read(reinterpret_cast(&gr.mIsGroup), sizeof(bool)); + in.read(const_cast(reinterpret_cast(&gr.mPattern.getPattern())), sizeof(unsigned char) * (itsmft::ClusterPattern::kExtendedPatternBytes)); + data.mVectorOfIDs.push_back(gr); + if (!gr.mIsGroup) { + data.mCommonMap.insert(std::make_pair(gr.mHash, groupID)); + if (gr.mPattern.getUsedBytes() == 1) { + data.mSmallTopologiesLUT[(gr.mPattern.getColumnSpan() - 1) * 255 + (int)gr.mPattern.getByte(2)] = groupID; + } + } else { + data.mGroupMap.insert(std::make_pair((int)(gr.mHash >> 32) & 0x00000000ffffffff, groupID)); } - } else { - mGroupMap.insert(std::make_pair((int)(gr.mHash >> 32) & 0x00000000ffffffff, groupID)); + groupID++; } - groupID++; - } + }; + + readData(in, mDataIB); + readData(in, mDataOB); } in.close(); - return 0; } -TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname) const +TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname, bool IB) const { - int dictSize = getSize(); - auto* histo = new TH1F(hname.data(), ";Topology ID;Frequency", dictSize, -0.5, dictSize - 0.5); + int dictSize = getSize(IB); + auto* histo = new TH1F(hname.data(), Form("%s;Topology ID;Frequency", (IB) ? "InnerBarrel" : "OuterBarrel"), dictSize, -0.5, dictSize - 0.5); histo->SetFillColor(kRed); histo->SetFillStyle(3005); histo->SetDrawOption("histo"); for (int i = 0; i < dictSize; i++) { - histo->Fill(i, getFrequency(i)); + histo->Fill(i, getFrequency(i, IB)); } return histo; } @@ -136,18 +192,19 @@ TH1F* TopologyDictionary::getTopologyDistribution(const std::string_view hname) template math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::CompClusterExt& cl) const { + static std::array mIBSegmentations{0, 1, 2}; math_utils::Point3D locCl; if (!its3::constants::detID::isDetITS3(cl.getSensorID())) { o2::itsmft::SegmentationAlpide::detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); - locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID()) * itsmft::SegmentationAlpide::PitchRow); - locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID()) * itsmft::SegmentationAlpide::PitchCol); + locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID(), false) * itsmft::SegmentationAlpide::PitchRow); + locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID(), false) * itsmft::SegmentationAlpide::PitchCol); } else { auto layer = its3::constants::detID::getDetID2Layer(cl.getSensorID()); - its3::SuperSegmentations[layer].detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); - locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID()) * its3::SegmentationSuperAlpide::mPitchRow); - locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID()) * its3::SegmentationSuperAlpide::mPitchCol); + mIBSegmentations[layer].detectorToLocalUnchecked(cl.getRow(), cl.getCol(), locCl); + locCl.SetX(locCl.X() + this->getXCOG(cl.getPatternID(), true) * its3::SegmentationMosaix::PitchRow); + locCl.SetZ(locCl.Z() + this->getZCOG(cl.getPatternID(), true) * its3::SegmentationMosaix::PitchCol); float xCurved{0.f}, yCurved{0.f}; - its3::SuperSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); + mIBSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); locCl.SetXYZ(xCurved, yCurved, locCl.Z()); } return locCl; @@ -156,6 +213,7 @@ math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::C template math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::CompClusterExt& cl, const itsmft::ClusterPattern& patt, bool isGroup) { + static std::array mIBSegmentations{0, 1, 2}; auto refRow = cl.getRow(); auto refCol = cl.getCol(); float xCOG = 0, zCOG = 0; @@ -169,9 +227,9 @@ math_utils::Point3D TopologyDictionary::getClusterCoordinates(const itsmft::C o2::itsmft::SegmentationAlpide::detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); } else { auto layer = its3::constants::detID::getDetID2Layer(cl.getSensorID()); - its3::SuperSegmentations[layer].detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); + mIBSegmentations[layer].detectorToLocalUnchecked(refRow + xCOG, refCol + zCOG, locCl); float xCurved{0.f}, yCurved{0.f}; - its3::SuperSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); + mIBSegmentations[layer].flatToCurved(locCl.X(), locCl.Y(), xCurved, yCurved); locCl.SetXYZ(xCurved, yCurved, locCl.Z()); } return locCl; diff --git a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt index 17a8fd2748b87..2fad72a96426d 100644 --- a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt @@ -14,6 +14,7 @@ o2_add_library(ITS3Simulation src/ITS3Services.cxx src/DescriptorInnerBarrelITS3.cxx src/Digitizer.cxx + src/DigiParams.cxx PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat O2::ITSBase O2::ITSMFTSimulation ROOT::Physics) @@ -23,6 +24,7 @@ o2_target_root_dictionary(ITS3Simulation include/ITS3Simulation/ITS3Services.h include/ITS3Simulation/DescriptorInnerBarrelITS3.h include/ITS3Simulation/Digitizer.h + include/ITS3Simulation/DigiParams.h ) -o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) \ No newline at end of file +o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h index 80536a14d99c2..d1b54f81face4 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DescriptorInnerBarrelITS3.h @@ -45,9 +45,9 @@ class DescriptorInnerBarrelITS3 : public o2::its::DescriptorInnerBarrel int mNumLayers{constants::nLayers}; // wrapper volume properties - double mWrapperMinRadiusITS3{1.8}; - double mWrapperMaxRadiusITS3{4.}; - double mWrapperZSpanITS3{20.}; + static constexpr double mWrapperMinRadiusITS3{1.8}; + static constexpr double mWrapperMaxRadiusITS3{4.}; + static constexpr double mWrapperZSpanITS3{constants::segment::length + 5.}; private: std::array, constants::nLayers> mIBLayers; diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h new file mode 100644 index 0000000000000..eca0a71949ba7 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h @@ -0,0 +1,45 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ITS3_DIGIPARAMS_H +#define ITS3_DIGIPARAMS_H + +#include "ITSMFTSimulation/DigiParams.h" + +namespace o2::its3 +{ + +class DigiParams final : public o2::itsmft::DigiParams +{ + public: + const o2::itsmft::AlpideSimResponse* getAlpSimResponse() const = delete; + void setAlpSimResponse(const o2::itsmft::AlpideSimResponse* par) = delete; + + const o2::itsmft::AlpideSimResponse* getOBSimResponse() const { return mOBSimResponse; } + void setOBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mOBSimResponse = response; } + + const o2::itsmft::AlpideSimResponse* getIBSimResponse() const { return mIBSimResponse; } + void setIBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mIBSimResponse = response; } + + bool hasResponseFunctions() const { return mIBSimResponse != nullptr && mOBSimResponse != nullptr; } + + void print() const final; + + private: + const o2::itsmft::AlpideSimResponse* mOBSimResponse = nullptr; //!< pointer to external response + const o2::itsmft::AlpideSimResponse* mIBSimResponse = nullptr; //!< pointer to external response + + ClassDef(DigiParams, 1); +}; + +} // namespace o2::its3 + +#endif diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h index 7ece842b6f61f..8d0f06a27343b 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h @@ -18,15 +18,15 @@ #include #include -#include "Rtypes.h" // for Digitizer::Class -#include "TObject.h" // for TObject +#include "Rtypes.h" +#include "TObject.h" #include "ITSMFTSimulation/ChipDigitsContainer.h" #include "ITSMFTSimulation/AlpideSimResponse.h" -#include "ITSMFTSimulation/DigiParams.h" #include "ITSMFTSimulation/Hit.h" #include "ITSBase/GeometryTGeo.h" -#include "ITS3Base/SegmentationSuperAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" +#include "ITS3Simulation/DigiParams.h" #include "DataFormatsITSMFT/Digit.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "CommonDataFormat/InteractionRecord.h" @@ -35,6 +35,7 @@ namespace o2::its3 { + class Digitizer : public TObject { using ExtraDig = std::vector; ///< container for extra contributions to PreDigits @@ -44,8 +45,8 @@ class Digitizer : public TObject void setMCLabels(o2::dataformats::MCTruthContainer* mclb) { mMCLabels = mclb; } void setROFRecords(std::vector* rec) { mROFRecords = rec; } - o2::itsmft::DigiParams& getParams() { return (o2::itsmft::DigiParams&)mParams; } - const o2::itsmft::DigiParams& getParams() const { return mParams; } + o2::its3::DigiParams& getParams() { return mParams; } + const o2::its3::DigiParams& getParams() const { return mParams; } void init(); @@ -62,9 +63,6 @@ class Digitizer : public TObject bool isContinuous() const { return mParams.isContinuous(); } void fillOutputContainer(uint32_t maxFrame = 0xffffffff); - void setDigiParams(const o2::itsmft::DigiParams& par) { mParams = par; } - const o2::itsmft::DigiParams& getDigitParams() const { return mParams; } - // provide the common itsmft::GeometryTGeo to access matrices and segmentation void setGeometry(const o2::its::GeometryTGeo* gm) { mGeometry = gm; } @@ -97,7 +95,7 @@ class Digitizer : public TObject static constexpr float sec2ns = 1e9; - o2::itsmft::DigiParams mParams; ///< digitization parameters + o2::its3::DigiParams mParams; ///< digitization parameters o2::InteractionTimeRecord mEventTime; ///< global event time and interaction record o2::InteractionRecord mIRFirstSampledTF; ///< IR of the 1st sampled IR, noise-only ROFs will be inserted till this IR only double mCollisionTimeWrtROF{}; @@ -108,7 +106,15 @@ class Digitizer : public TObject uint32_t mEventROFrameMin = 0xffffffff; ///< lowest RO frame for processed events (w/o automatic noise ROFs) uint32_t mEventROFrameMax = 0; ///< highest RO frame forfor processed events (w/o automatic noise ROFs) - o2::itsmft::AlpideSimResponse* mAlpSimResp = nullptr; // simulated response + static constexpr std::array mIBSegmentations{0, 1, 2}; + + o2::itsmft::AlpideSimResponse* mSimRespIB = nullptr; // simulated response for IB + o2::itsmft::AlpideSimResponse* mSimRespOB = nullptr; // simulated response for OB + bool mSimRespIBOrientation{false}; // wether the orientation in the IB response function is flipped + float mSimRespIBShift{0.f}; // adjusting the Y-shift in the IB response function to match sensor local coord. + float mSimRespIBScaleX{1.f}; // scale x-local coordinate to response function x-coordinate + float mSimRespIBScaleZ{1.f}; // scale z-local coordinate to response function z-coordinate + float mSimRespOBShift{0.f}; // adjusting the Y-shift in the OB response function to match sensor local coord. const o2::its::GeometryTGeo* mGeometry = nullptr; ///< ITS3 geometry @@ -121,8 +127,9 @@ class Digitizer : public TObject const o2::itsmft::NoiseMap* mDeadChanMap = nullptr; - ClassDef(Digitizer, 4); + ClassDef(Digitizer, 5); }; + } // namespace o2::its3 #endif /* ALICEO2_ITS3_DIGITIZER_H */ diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h index 7543650e04a71..fd9195f9ee228 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3Layer.h @@ -20,13 +20,12 @@ #include #include -#include "Framework/Logger.h" -#include +#include "ITS3Base/SpecsV2.h" namespace o2::its3 { -/// This class defines the Geometry for the ITS3 using TGeo. +/// This class defines the geometry for the ITS3 IB layers. class ITS3Layer { // The hierarchy will be the following: @@ -45,8 +44,8 @@ class ITS3Layer kTile, kRSU, kSegment, - kCarbonForm, kChip, + kCarbonForm, kLayer, kAll, }; @@ -56,11 +55,10 @@ class ITS3Layer return mNames[static_cast((b == BuildLevel::kAll) ? BuildLevel::kLayer : b)]; } - explicit ITS3Layer(int layer = 0) : mNLayer(layer) - { - LOGP(debug, "Called on {} layer {}", layer, mNLayer); - init(); - } + explicit ITS3Layer(int layer = 0) : mNLayer(layer), + mR(o2::its3::constants::radii[mNLayer]), + mRmin(o2::its3::constants::radiiInner[mNLayer]), + mRmax(o2::its3::constants::radiiOuter[mNLayer]) {} explicit ITS3Layer(TGeoVolume* motherVolume, int layer = 0) : ITS3Layer(layer) { @@ -82,6 +80,7 @@ class ITS3Layer TGeoMedium* mSilicon{nullptr}; TGeoMedium* mAir{nullptr}; TGeoMedium* mCarbon{nullptr}; + TGeoMedium* mCopper{nullptr}; void getMaterials(bool create = false); TGeoMedium* getMaterial(const char* matName, bool create = false); @@ -97,10 +96,12 @@ class ITS3Layer uint8_t mNLayer{0}; // Layer number double mR{0}; // Middle Radius - double mRmin{}; // Minimum Radius + double mRmin{0}; // Minimum Radius double mRmax{0}; // Maximum Radius - // Individual Pieces + // Individual pieces + // since TGeo manages the resources itself one should not use these pointers + // after initializition anymore! TGeoVolume* mPixelArray{nullptr}; TGeoVolumeAssembly* mTile{nullptr}; TGeoVolumeAssembly* mRSU{nullptr}; @@ -109,7 +110,7 @@ class ITS3Layer TGeoVolumeAssembly* mCarbonForm{nullptr}; TGeoVolumeAssembly* mLayer{nullptr}; - ClassDef(ITS3Layer, 2); + ClassDef(ITS3Layer, 3); }; } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx b/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx index 6d1bc621b5287..540e1d41f1c62 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/DescriptorInnerBarrelITS3.cxx @@ -18,14 +18,14 @@ ClassImp(DescriptorInnerBarrelITS3); void DescriptorInnerBarrelITS3::createLayer(int iLayer, TGeoVolume* dest) { - LOGP(info, "ITS3-IB: Creating Layer {}", iLayer); + LOGP(debug, "ITS3-IB: Creating Layer {}", iLayer); mIBLayers[iLayer] = std::make_unique(iLayer); mIBLayers[iLayer]->createLayer(dest); } void DescriptorInnerBarrelITS3::createServices(TGeoVolume* dest) { - LOGP(info, "ITS3-IB: Creating Services"); + LOGP(debug, "ITS3-IB: Creating Services"); mServices = std::make_unique(); mServices->createCYSSAssembly(dest); } diff --git a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx new file mode 100644 index 0000000000000..a9f17a544b3c4 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx @@ -0,0 +1,40 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file DigiParams.cxx +/// \brief Implementation of the ITS3 digitization steering params + +#include "Framework/Logger.h" +#include "ITS3Simulation/DigiParams.h" + +ClassImp(o2::its3::DigiParams); + +namespace o2::its3 +{ + +void DigiParams::print() const +{ + // print settings + LOGF(info, "ITS3 DigiParams settings:"); + LOGF(info, "Continuous readout : %s", isContinuous() ? "ON" : "OFF"); + LOGF(info, "Readout Frame Length(ns) : %f", getROFrameLength()); + LOGF(info, "Strobe delay (ns) : %f", getStrobeDelay()); + LOGF(info, "Strobe length (ns) : %f", getStrobeLength()); + LOGF(info, "Threshold (N electrons) : %d", getChargeThreshold()); + LOGF(info, "Min N electrons to account : %d", getMinChargeToAccount()); + LOGF(info, "Number of charge sharing steps : %d", getNSimSteps()); + LOGF(info, "ELoss to N electrons factor : %e", getEnergyToNElectrons()); + LOGF(info, "Noise level per pixel : %e", getNoisePerPixel()); + LOGF(info, "Charge time-response:\n"); + getSignalShape().print(); +} + +} // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx index f1519c1d04063..3c75bf3e8f680 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx @@ -14,6 +14,7 @@ #include "ITSMFTBase/SegmentationAlpide.h" #include "ITS3Simulation/Digitizer.h" +#include "ITS3Base/ITS3Params.h" #include "MathUtils/Cartesian.h" #include "SimulationDataFormat/MCTruthContainer.h" #include "DetectorsRaw/HBFUtils.h" @@ -21,12 +22,12 @@ #include "Framework/Logger.h" #include +#include #include #include using o2::itsmft::Hit; -using Segmentation = o2::itsmft::SegmentationAlpide; -using SuperSegmentation = o2::its3::SegmentationSuperAlpide; +using SegmentationAlpide = o2::itsmft::SegmentationAlpide; using o2::itsmft::AlpideRespSimMat; using o2::itsmft::PreDigit; @@ -44,14 +45,44 @@ void Digitizer::init() } } - if (mParams.getAlpSimResponse() == nullptr) { - std::string responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - LOGP(info, "Loading AlpideSimRespnse from file: {}", responseFile); - auto file = TFile::Open(responseFile.data()); - mAlpSimResp = (o2::itsmft::AlpideSimResponse*)file->Get("response0"); // We use by default the alpide response for Vbb=0V - mParams.setAlpSimResponse(mAlpSimResp); + if (!mParams.hasResponseFunctions()) { + auto loadSetResponseFunc = [&](const char* name, const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { + LOGP(info, "Loading response function for {}: IB={}:{} ; OB={}:{}", name, nameIB, fileIB, nameOB, fileOB); + auto fIB = TFile::Open(fileIB, "READ"); + if (!fIB || fIB->IsZombie() || !fIB->IsOpen()) { + LOGP(fatal, "Cannot open file {}", fileIB); + } + auto fOB = TFile::Open(fileOB, "READ"); + if (!fOB || fOB->IsZombie() || !fOB->IsOpen()) { + LOGP(fatal, "Cannot open file {}", fileOB); + } + mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); + mParams.setOBSimResponse(mSimRespOB = fOB->Get(nameOB)); + fIB->Close(); + fOB->Close(); + }; + + if (const auto& func = ITS3Params::Instance().chipResponseFunction; func == "Alpide") { + constexpr const char* responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + loadSetResponseFunc("Alpide", responseFile, "response0", responseFile, "response1"); + mSimRespIBShift = mSimRespIB->getDepthMax() - SegmentationMosaix::SensorLayerThickness / 2.f + 10.e-4f; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + } else if (func == "APTS") { + constexpr const char* responseFileIB = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; + constexpr const char* responseFileOB = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + loadSetResponseFunc("APTS", responseFileIB, "response1", responseFileOB, "response1"); + mSimRespIBShift = mSimRespIB->getDepthMax() + (float)constants::pixelarray::pixels::apts::responseYShift; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + mSimRespIBScaleX = 0.5f * constants::pixelarray::pixels::apts::pitchX / SegmentationMosaix::PitchRow; + mSimRespIBScaleZ = 0.5f * constants::pixelarray::pixels::apts::pitchZ / SegmentationMosaix::PitchCol; + mSimRespIBOrientation = true; + } else { + LOGP(fatal, "ResponseFunction '{}' not implemented!", func); + } } mParams.print(); + LOGP(info, "IBShift = {} ; OBShift = {}", mSimRespIBShift, mSimRespOBShift); + LOGP(info, "IB-Scale: X={} ; Z={}", mSimRespIBScaleX, mSimRespIBScaleZ); mIRFirstSampledTF = o2::raw::HBFUtils::Instance().getFirstSampledTFIR(); } @@ -143,7 +174,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) for (size_t iChip{0}; iChip < mChips.size(); ++iChip) { auto& chip = mChips[iChip]; if (constants::detID::isDetITS3(iChip)) { // Check if this is a chip of ITS3 - chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SuperSegmentation::mNRows, SuperSegmentation::mNCols); + chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SegmentationMosaix::NRows, SegmentationMosaix::NCols); } else { chip.addNoise(mROFrameMin, mROFrameMin, &mParams); } @@ -238,8 +269,8 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (innerBarrel) { // transform the point on the curved surface to a flat one float xFlatE{0.f}, yFlatE{0.f}, xFlatS{0.f}, yFlatS{0.f}; - SuperSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); - SuperSegmentations[layer].curvedToFlat(xyzLocE.X(), xyzLocE.Y(), xFlatE, yFlatE); + mIBSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); + mIBSegmentations[layer].curvedToFlat(xyzLocE.X(), xyzLocE.Y(), xFlatE, yFlatE); // update the local coordinates with the flattened ones xyzLocS.SetXYZ(xFlatS, yFlatS, xyzLocS.Z()); xyzLocE.SetXYZ(xFlatE, yFlatE, xyzLocE.Z()); @@ -255,14 +286,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; if (innerBarrel) { // get entrance pixel row and col - while (!SuperSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!SuperSegmentations[layer].localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!mIBSegmentations[layer].localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -270,14 +301,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } else { // get entrance pixel row and col - while (!Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!Segmentation::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!SegmentationAlpide::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -294,23 +325,17 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } rowS -= AlpideRespSimMat::NPix / 2; rowE += AlpideRespSimMat::NPix / 2; - if (rowS < 0) { - rowS = 0; - } + rowS = std::max(rowS, 0); - int maxNrows{innerBarrel ? SuperSegmentation::mNRows : Segmentation::NRows}; - int maxNcols{innerBarrel ? SuperSegmentation::mNCols : Segmentation::NCols}; - if (rowE >= maxNrows) { - rowE = maxNrows - 1; - } + const int maxNrows{innerBarrel ? SegmentationMosaix::NRows : SegmentationAlpide::NRows}; + const int maxNcols{innerBarrel ? SegmentationMosaix::NCols : SegmentationAlpide::NCols}; + + rowE = std::min(rowE, maxNrows - 1); colS -= AlpideRespSimMat::NPix / 2; colE += AlpideRespSimMat::NPix / 2; - if (colS < 0) { - colS = 0; - } - if (colE >= maxNcols) { - colE = maxNcols - 1; - } + colS = std::max(colS, 0); + colE = std::min(colE, maxNcols - 1); + int rowSpan = rowE - rowS + 1, colSpan = colE - colS + 1; // size of plaquet where some response is expected float respMatrix[rowSpan][colSpan]; // response accumulated here std::fill(&respMatrix[0][0], &respMatrix[0][0] + rowSpan * colSpan, 0.f); @@ -327,22 +352,22 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // take into account that the AlpideSimResponse depth defintion has different min/max boundaries // although the max should coincide with the surface of the epitaxial layer, which in the chip // local coordinates has Y = +SensorLayerThickness/2 - float thickness = innerBarrel ? SuperSegmentation::mSensorLayerThickness : Segmentation::SensorLayerThickness; - xyzLocS.SetY(xyzLocS.Y() + mAlpSimResp->getDepthMax() - thickness / 2.); + xyzLocS.SetY(xyzLocS.Y() + ((innerBarrel) ? mSimRespIBShift : mSimRespOBShift)); + // collect charge in evey pixel which might be affected by the hit for (int iStep = nSteps; iStep--;) { // Get the pixel ID if (innerBarrel) { - SuperSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } else { - Segmentation::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center if (innerBarrel) { - if (!SuperSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { + if (!mIBSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { continue; } - } else if (!Segmentation::detectorToLocal(row, col, cRowPix, cColPix)) { + } else if (!SegmentationAlpide::detectorToLocal(row, col, cRowPix, cColPix)) { continue; // should not happen } rowPrev = row; @@ -350,9 +375,17 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } bool flipCol = false, flipRow = false; // note that response needs coordinates along column row (locX) (locZ) then depth (locY) - double rowMax{0.5f * (innerBarrel ? SuperSegmentation::mPitchRow : Segmentation::PitchRow)}; - double colMax{0.5f * (innerBarrel ? SuperSegmentation::mPitchCol : Segmentation::PitchCol)}; - auto rspmat = mAlpSimResp->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + float rowMax{}, colMax{}; + const AlpideRespSimMat* rspmat{nullptr}; + if (innerBarrel) { + rowMax = 0.5f * SegmentationMosaix::PitchRow; + colMax = 0.5f * SegmentationMosaix::PitchCol; + rspmat = mSimRespIB->getResponse(mSimRespIBScaleX * (xyzLocS.X() - cRowPix), mSimRespIBScaleZ * (xyzLocS.Z() - cColPix), xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + } else { + rowMax = 0.5f * SegmentationAlpide::PitchRow; + colMax = 0.5f * SegmentationAlpide::PitchCol; + rspmat = mSimRespOB->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); + } xyzLocS += step; if (rspmat == nullptr) { @@ -369,7 +402,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (colDest < 0 || colDest >= colSpan) { continue; } - respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, flipRow, flipCol); + respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((innerBarrel && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); } } } diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx b/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx index 26e47e03057c2..8dc94e339c793 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3Layer.cxx @@ -18,11 +18,11 @@ #include "TGeoVolume.h" #include "TGeoCompositeShape.h" +#include "Framework/Logger.h" #include "CommonConstants/MathConstants.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SpecsV2.h" #include "ITS3Simulation/ITS3Layer.h" -#include "fairlogger/Logger.h" namespace o2m = o2::constants::math; namespace its3c = o2::its3::constants; @@ -31,13 +31,6 @@ namespace o2::its3 { using its3TGeo = o2::its::GeometryTGeo; -void ITS3Layer::init() -{ - mR = its3c::radii[mNLayer]; - mRmin = its3c::radiiInner[mNLayer]; - mRmax = its3c::radiiOuter[mNLayer]; -} - void ITS3Layer::getMaterials(bool create) { if (gGeoManager == nullptr) { @@ -47,6 +40,7 @@ void ITS3Layer::getMaterials(bool create) mSilicon = getMaterial("IT3_SI$", create); mAir = getMaterial("IT3_AIR$", create); mCarbon = getMaterial("IT3_CARBON$", create); + mCopper = getMaterial("IT3_COPPER$", create); } TGeoMedium* ITS3Layer::getMaterial(const char* matName, bool create) @@ -58,11 +52,11 @@ TGeoMedium* ITS3Layer::getMaterial(const char* matName, bool create) } else { // create dummy auto matDummy = gGeoManager->GetMaterial("MAT_DUMMY$"); if (matDummy == nullptr) { - LOGP(info, "Created Dummy material"); + LOGP(warn, "Created Dummy material"); matDummy = new TGeoMaterial("MAT_DUMMY$", 26.98, 13, 2.7); } mat = new TGeoMedium(matName, 1, matDummy); - LOGP(info, "Created medium {}", matName); + LOGP(warn, "Created medium {}", matName); } } return mat; @@ -75,12 +69,10 @@ void ITS3Layer::createLayer(TGeoVolume* motherVolume) createLayerImpl(); mBuilt = true; - LOGP(info, "ITS3-Layer: Created Layer {} with mR={} (minR={}, maxR={})", mNLayer, mR, mRmin, mRmax); if (motherVolume == nullptr) { return; } // Add it to motherVolume - LOGP(debug, " `-> Attaching to motherVolume '{}'", motherVolume->GetName()); auto* trans = new TGeoTranslation(0, 0, -constants::segment::lengthSensitive / 2.); motherVolume->AddNode(mLayer, 0, trans); } @@ -91,15 +83,9 @@ void ITS3Layer::createPixelArray() return; } // A pixel array is pure silicon and the sensitive part of our detector. - // It will be segmented into a 442x156 matrix by the - // SuperSegmentationAlpide. - // Pixel Array is just a longer version of the biasing but starts in phi at - // biasPhi2. using namespace its3c::pixelarray; - double pixelArrayPhi1 = constants::tile::readout::width / mR * o2m::Rad2Deg; - double pixelArrayPhi2 = width / mR * o2m::Rad2Deg + pixelArrayPhi1; - auto pixelArray = new TGeoTubeSeg(mRmin, mRmax, length / 2., - pixelArrayPhi1, pixelArrayPhi2); + double pixelArrayPhi = width / mR * o2m::Rad2Deg; + auto pixelArray = new TGeoTubeSeg(mRmin, mRmax, length / 2., 0, pixelArrayPhi); mPixelArray = new TGeoVolume(its3TGeo::getITS3PixelArrayPattern(mNLayer), pixelArray, mSilicon); mPixelArray->SetLineColor(color); mPixelArray->RegisterYourself(); @@ -131,8 +117,9 @@ void ITS3Layer::createTile() mTile->AddNode(readoutVol, 0, zMoveReadout); // Pixel Array is just a longer version of the biasing but starts in phi at - // biasPhi2. - mTile->AddNode(mPixelArray, 0); + // readoutPhi2. + auto phiRotPixelArray = new TGeoRotation(Form("its3PhiPixelArrayOffset_%d", mNLayer), readoutPhi2, 0, 0); + mTile->AddNode(mPixelArray, 0, phiRotPixelArray); // Biasing double biasPhi1 = constants::pixelarray::width / mR * o2m::Rad2Deg + readoutPhi2; @@ -199,7 +186,7 @@ void ITS3Layer::createRSU() // Rotation for top half and vertical mirroring double phi = width / mR * o2m::Rad2Deg; - auto rot = new TGeoRotation("", 0, 0, -phi); + auto rot = new TGeoRotation(Form("its3RotHalfBarrel_%d", mNLayer), 0, 0, -phi); rot->ReflectY(true); // Upper Left @@ -276,11 +263,19 @@ void ITS3Layer::createChip() mChip = new TGeoVolumeAssembly(its3TGeo::getITS3ChipPattern(mNLayer)); mChip->VisibleDaughters(); + auto phiOffset = constants::segment::width / mR * o2m::Rad2Deg; for (unsigned int i{0}; i < constants::nSegments[mNLayer]; ++i) { - double phiOffset = constants::segment::width / mR * o2m::Rad2Deg; - auto rot = new TGeoRotation("", 0, 0, phiOffset * i); + auto rot = new TGeoRotation(Form("its3PhiSegmentOffset_%d_%d", mNLayer, i), 0, 0, phiOffset * i); mChip->AddNode(mSegment, i, rot); } + + // Add metal stack positioned radially outward + auto zMoveMetal = new TGeoTranslation(0, 0, constants::metalstack::length / 2. - constants::segment::lec::length); + auto metal = new TGeoTubeSeg(mRmax, mRmax + constants::metalstack::thickness, constants::metalstack::length / 2., 0, constants::nSegments[mNLayer] * phiOffset); + auto metalVol = new TGeoVolume(Form("metal%d", mNLayer), metal, mCopper); + metalVol->SetLineColor(constants::metalstack::color); + metalVol->RegisterYourself(); + mChip->AddNode(metalVol, 0, zMoveMetal); } void ITS3Layer::createCarbonForm() @@ -296,7 +291,7 @@ void ITS3Layer::createCarbonForm() mCarbonForm->VisibleDaughters(); double dRadius = -1; if (mNLayer < 2) { - dRadius = constants::radii[mNLayer + 1] - constants::radii[mNLayer] - constants::thickness; + dRadius = constants::radii[mNLayer + 1] - constants::radii[mNLayer] - constants::totalThickness; } else { dRadius = 0.7; // TODO: lack of carbon foam radius for layer 2, use 0.7mm as a temporary value } @@ -372,8 +367,8 @@ void ITS3Layer::createLayerImpl() // The offset is the right angle triangle of the middle radius with the // transverse axis. double phiOffset = std::asin(constants::equatorialGap / 2. / mR) * o2m::Rad2Deg; - auto rotTop = new TGeoRotation("", 0, 0, +phiOffset); - auto rotBot = new TGeoRotation("", 0, 0, phiOffset + 180); + auto rotTop = new TGeoRotation(Form("its3CarbonPhiOffsetTop_%d", mNLayer), 0, 0, +phiOffset); + auto rotBot = new TGeoRotation(Form("its3CarbonPhiOffsetBot_%d", mNLayer), 0, 0, phiOffset + 180); mLayer->AddNode(mCarbonForm, 0, rotTop); mLayer->AddNode(mCarbonForm, 1, rotBot); @@ -412,8 +407,7 @@ void ITS3Layer::buildPartial(TGeoVolume* motherVolume, TGeoMatrix* mat, BuildLev case BuildLevel::kLayer: [[fallthrough]]; default: - createLayerImpl(); - motherVolume->AddNode(mLayer, 0, mat); + createLayer(motherVolume); } LOGP(info, "Partially built ITS3-{}-{}", mNLayer, getName(level)); } diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h index b9af595018a34..fca3f5d63c2c4 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h @@ -18,6 +18,7 @@ #pragma link C++ class o2::its3::ITS3Layer + ; #pragma link C++ class o2::its3::ITS3Services + ; #pragma link C++ class o2::its3::DescriptorInnerBarrelITS3 + ; +#pragma link C++ class o2::its3::DigiParams + ; #pragma link C++ class o2::its3::Digitizer + ; #endif From f0eebb64b1ef1ce4c1ebf79fa531ae06bf7049f5 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Sat, 12 Apr 2025 12:45:08 +0200 Subject: [PATCH 0375/1914] ITS3: remove unnecessary reader/writers (#14157) Signed-off-by: Felix Schlepper --- .../Upgrades/ITS3/workflow/CMakeLists.txt | 25 ---- .../include/ITS3Workflow/ClusterReaderSpec.h | 76 ----------- .../include/ITS3Workflow/ClusterWriterSpec.h | 31 ----- .../ITS3Workflow/ClusterWriterWorkflow.h | 31 ----- .../include/ITS3Workflow/TrackReaderSpec.h | 79 ------------ .../include/ITS3Workflow/TrackWriterSpec.h | 31 ----- .../include/ITS3Workflow/VertexReaderSpec.h | 65 ---------- .../ITS3/workflow/src/ClusterReaderSpec.cxx | 122 ------------------ .../ITS3/workflow/src/ClusterWriterSpec.cxx | 72 ----------- .../workflow/src/ClusterWriterWorkflow.cxx | 36 ------ .../ITS3/workflow/src/RecoWorkflow.cxx | 8 +- .../ITS3/workflow/src/TrackReaderSpec.cxx | 120 ----------------- .../ITS3/workflow/src/TrackWriterSpec.cxx | 82 ------------ .../ITS3/workflow/src/VertexReaderSpec.cxx | 84 ------------ 14 files changed, 4 insertions(+), 858 deletions(-) delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx delete mode 100644 Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx diff --git a/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt b/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt index 649e4d737d42c..bcb3cf46375e9 100644 --- a/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/workflow/CMakeLists.txt @@ -15,13 +15,8 @@ o2_add_library(ITS3Workflow SOURCES src/DigitReaderSpec.cxx src/DigitWriterSpec.cxx src/RecoWorkflow.cxx - src/ClusterWriterWorkflow.cxx src/ClustererSpec.cxx - src/ClusterWriterSpec.cxx src/TrackerSpec.cxx - src/TrackWriterSpec.cxx - src/TrackReaderSpec.cxx - src/VertexReaderSpec.cxx PUBLIC_LINK_LIBRARIES O2::Framework O2::SimConfig O2::DataFormatsITSMFT @@ -35,27 +30,7 @@ o2_add_library(ITS3Workflow O2::GPUTracking O2::ITSBase) -# o2_add_executable(digit-writer-workflow -# SOURCES src/digit-writer-workflow.cxx -# COMPONENT_NAME its3 -# PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - -# o2_add_executable(digit-reader-workflow -# SOURCES src/digit-reader-workflow.cxx -# COMPONENT_NAME its3 -# PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - o2_add_executable(reco-workflow SOURCES src/its3-reco-workflow.cxx COMPONENT_NAME its3 PUBLIC_LINK_LIBRARIES O2::ITS3Workflow) - -# o2_add_executable(cluster-writer-workflow -# SOURCES src/its-cluster-writer-workflow.cxx -# COMPONENT_NAME its -# PUBLIC_LINK_LIBRARIES O2::ITSWorkflow) - -# o2_add_executable(cluster-reader-workflow -# SOURCES src/its-cluster-reader-workflow.cxx -# COMPONENT_NAME its -# PUBLIC_LINK_LIBRARIES O2::ITSWorkflow) diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h deleted file mode 100644 index c411c2accace1..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterReaderSpec.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterReaderSpec.h - -#ifndef O2_ITSMFT_CLUSTERREADER -#define O2_ITSMFT_CLUSTERREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "Headers/DataHeader.h" -#include "DataFormatsITSMFT/CompCluster.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "DataFormatsITSMFT/ROFRecord.h" - -using namespace o2::framework; - -namespace o2::its3 -{ - -class ClusterReader : public Task -{ - public: - ClusterReader(bool useMC, bool usePatterns = true); - - void init(InitContext& ic) final; - void run(ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - - std::vector mClusROFRec, *mClusROFRecPtr = &mClusROFRec; - std::vector mClusterCompArray, *mClusterCompArrayPtr = &mClusterCompArray; - std::vector mPatternsArray, *mPatternsArrayPtr = &mPatternsArray; - o2::dataformats::MCTruthContainer mClusterMCTruth, *mClusterMCTruthPtr = &mClusterMCTruth; - std::vector mClusMC2ROFs, *mClusMC2ROFsPtr = &mClusMC2ROFs; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - std::unique_ptr mFile; - std::unique_ptr mTree; - - bool mUseMC = true; // use MC truth - bool mUsePatterns = true; // send patterns - - std::string mDetName = "ITS"; // pretending to be ITS - std::string mDetNameLC = "its"; - std::string mDetNameReal = "IT3"; - std::string mFileName = ""; - std::string mClusTreeName = "o2sim"; - std::string mClusROFBranchName = "ClustersROF"; - std::string mClusterPattBranchName = "ClusterPatt"; - std::string mClusterCompBranchName = "ClusterComp"; - std::string mClustMCTruthBranchName = "ClusterMCTruth"; - std::string mClustMC2ROFBranchName = "ClustersMC2ROF"; -}; - -/// create a processor spec -/// read ITS/MFT cluster data from a root file -framework::DataProcessorSpec getITS3ClusterReaderSpec(bool useMC = true, bool usePatterns = true); - -} // namespace o2::its3 - -#endif /* O2_ITSMFT_CLUSTERREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h deleted file mode 100644 index 49106871d89d5..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterSpec.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterSpec.h - -#ifndef O2_ITS_CLUSTERWRITER -#define O2_ITS_CLUSTERWRITER - -#include "Framework/DataProcessorSpec.h" - -namespace o2 -{ -namespace its3 -{ - -/// create a processor spec -/// write ITS clusters to ROOT file -framework::DataProcessorSpec getClusterWriterSpec(bool useMC); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS_CLUSTERWRITER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h deleted file mode 100644 index 05268e7ca3a1e..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/ClusterWriterWorkflow.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -#ifndef O2_ITS_CLUSTER_WRITER_WORKFLOW_H -#define O2_ITS_CLUSTER_WRITER_WORKFLOW_H - -/// @file ClusterWriterWorkflow.h - -#include "Framework/WorkflowSpec.h" - -namespace o2 -{ -namespace its3 -{ - -namespace cluster_writer_workflow -{ -framework::WorkflowSpec getWorkflow(bool useMC); -} - -} // namespace its3 -} // namespace o2 -#endif diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h deleted file mode 100644 index 1686b7c275941..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackReaderSpec.h +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackReaderSpec.h - -#ifndef O2_ITS3_TRACKREADER -#define O2_ITS3_TRACKREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "Headers/DataHeader.h" -#include "DataFormatsITS/TrackITS.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "ReconstructionDataFormats/Vertex.h" - -namespace o2 -{ -namespace its3 -{ - -class TrackReader : public o2::framework::Task -{ - using Vertex = o2::dataformats::Vertex>; - - public: - TrackReader(bool useMC = true); - ~TrackReader() override = default; - void init(o2::framework::InitContext& ic) final; - void run(o2::framework::ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - - std::vector mROFRec, *mROFRecInp = &mROFRec; - std::vector mVerticesROFRec, *mVerticesROFRecInp = &mVerticesROFRec; - std::vector mTracks, *mTracksInp = &mTracks; - std::vector mVertices, *mVerticesInp = &mVertices; - std::vector mClusInd, *mClusIndInp = &mClusInd; - std::vector mMCTruth, *mMCTruthInp = &mMCTruth; - std::vector mMCVertTruth, *mMCVTruthInp = &mMCTruth; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - bool mUseMC = true; // use MC truth - - std::unique_ptr mFile; - std::unique_ptr mTree; - std::string mInputFileName = ""; - std::string mTrackTreeName = "o2sim"; - std::string mROFBranchName = "ITSTracksROF"; - std::string mTrackBranchName = "ITSTrack"; - std::string mClusIdxBranchName = "ITSTrackClusIdx"; - std::string mVertexBranchName = "Vertices"; - std::string mVertexROFBranchName = "VerticesROF"; - std::string mTrackMCTruthBranchName = "ITSTrackMCTruth"; - std::string mTrackMCVertTruthBranchName = "ITSVertexMCTruth"; -}; - -/// create a processor spec -/// read ITS track data from a root file -framework::DataProcessorSpec getITS3TrackReaderSpec(bool useMC = true); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS3_TRACKREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h deleted file mode 100644 index 32f704fd61b45..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/TrackWriterSpec.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackWriterSpec.h - -#ifndef O2_ITS3_TRACKWRITER -#define O2_ITS3_TRACKWRITER - -#include "Framework/DataProcessorSpec.h" - -namespace o2 -{ -namespace its3 -{ - -/// create a processor spec -/// write ITS tracks to ROOT file -o2::framework::DataProcessorSpec getTrackWriterSpec(bool useMC); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS_TRACKWRITER */ diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h deleted file mode 100644 index 6c723d07982f2..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/VertexReaderSpec.h +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file VertexReaderSpec.h - -#ifndef O2_ITS3_VERTEXREADER -#define O2_ITS3_VERTEXREADER - -#include "TFile.h" -#include "TTree.h" - -#include "Framework/DataProcessorSpec.h" -#include "Framework/Task.h" -#include "ReconstructionDataFormats/Vertex.h" -#include "DataFormatsITSMFT/ROFRecord.h" - -namespace o2 -{ -namespace its3 -{ -// read ITS vertices from the output tree of ITS tracking - -class VertexReader : public o2::framework::Task -{ - using Vertex = o2::dataformats::Vertex>; - - public: - VertexReader() = default; - ~VertexReader() override = default; - void init(o2::framework::InitContext& ic) final; - void run(o2::framework::ProcessingContext& pc) final; - - protected: - void connectTree(const std::string& filename); - void accumulate(); - - std::vector mVerticesROFRec, *mVerticesROFRecPtr = &mVerticesROFRec; - std::vector mVertices, *mVerticesPtr = &mVertices; - - o2::header::DataOrigin mOrigin = o2::header::gDataOriginITS; - - std::unique_ptr mFile; - std::unique_ptr mTree; - std::string mFileName = ""; - std::string mVertexTreeName = "o2sim"; - std::string mVertexBranchName = "Vertices"; - std::string mVertexROFBranchName = "VerticesROF"; -}; - -/// create a processor spec -/// read ITS vertex data from a root file -o2::framework::DataProcessorSpec getITS3VertexReaderSpec(); - -} // namespace its3 -} // namespace o2 - -#endif /* O2_ITS3_VERTEXREADER */ diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx deleted file mode 100644 index 5030b1fcdd30a..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterReaderSpec.cxx +++ /dev/null @@ -1,122 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterReaderSpec.cxx - -#include - -#include "TTree.h" - -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Logger.h" -#include "ITS3Workflow/ClusterReaderSpec.h" -#include -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::itsmft; - -namespace o2 -{ -namespace its3 -{ - -ClusterReader::ClusterReader(bool useMC, bool usePatterns) -{ - mUseMC = useMC; - mUsePatterns = usePatterns; -} - -void ClusterReader::init(InitContext& ic) -{ - mFileName = o2::utils::concat_string(o2::base::NameConf::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get((mDetNameLC + "-cluster-infile").c_str())); - connectTree(mFileName); -} - -void ClusterReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << mDetNameReal << "ClusterReader pushes " << mClusROFRec.size() << " ROFRecords," - << mClusterCompArray.size() << " compact clusters at entry " << ent; - - // This is a very ugly way of providing DataDescription, which anyway does not need to contain detector name. - // To be fixed once the names-definition class is ready - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSROF", 0}, mClusROFRec); - pc.outputs().snapshot(Output{mOrigin, "COMPCLUSTERS", 0}, mClusterCompArray); - if (mUsePatterns) { - pc.outputs().snapshot(Output{mOrigin, "PATTERNS", 0}, mPatternsArray); - } - if (mUseMC) { - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSMCTR", 0}, mClusterMCTruth); - pc.outputs().snapshot(Output{mOrigin, "CLUSTERSMC2ROF", 0}, mClusMC2ROFs); - } - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void ClusterReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mClusTreeName.c_str())); - assert(mTree); - - mTree->SetBranchAddress((mDetName + mClusROFBranchName).c_str(), &mClusROFRecPtr); - mTree->SetBranchAddress((mDetName + mClusterCompBranchName).c_str(), &mClusterCompArrayPtr); - if (mUsePatterns) { - mTree->SetBranchAddress((mDetName + mClusterPattBranchName).c_str(), &mPatternsArrayPtr); - } - if (mUseMC) { - if (mTree->GetBranch((mDetName + mClustMCTruthBranchName).c_str()) && - mTree->GetBranch((mDetName + mClustMC2ROFBranchName).c_str())) { - mTree->SetBranchAddress((mDetName + mClustMCTruthBranchName).c_str(), &mClusterMCTruthPtr); - mTree->SetBranchAddress((mDetName + mClustMC2ROFBranchName).c_str(), &mClusMC2ROFsPtr); - } else { - LOG(info) << "MC-truth is missing"; - mUseMC = false; - } - } - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3ClusterReaderSpec(bool useMC, bool usePatterns) -{ - std::vector outputSpec; - outputSpec.emplace_back("IT3", "CLUSTERSROF", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "COMPCLUSTERS", 0, Lifetime::Timeframe); - if (usePatterns) { - outputSpec.emplace_back("IT3", "PATTERNS", 0, Lifetime::Timeframe); - } - if (useMC) { - outputSpec.emplace_back("IT3", "CLUSTERSMCTR", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "CLUSTERSMC2ROF", 0, Lifetime::Timeframe); - } - - return DataProcessorSpec{ - "its3-cluster-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask(useMC, usePatterns)}, - Options{ - {"its-cluster-infile", VariantType::String, "o2clus_its.root", {"Name of the input cluster file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx deleted file mode 100644 index 0231560b3ac25..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterSpec.cxx +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterSpec.cxx - -#include - -#include "ITS3Workflow/ClusterWriterSpec.h" -#include "DPLUtils/MakeRootTreeWriterSpec.h" -#include "DataFormatsITSMFT/CompCluster.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" - -using namespace o2::framework; - -namespace o2 -{ -namespace its3 -{ - -template -using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; -using CompClusType = std::vector; -using PatternsType = std::vector; -using ROFrameRType = std::vector; -using LabelsType = o2::dataformats::MCTruthContainer; -using ROFRecLblT = std::vector; -using namespace o2::header; - -DataProcessorSpec getClusterWriterSpec(bool useMC) -{ - // Spectators for logging - // this is only to restore the original behavior - auto compClustersSize = std::make_shared(0); - auto compClustersSizeGetter = [compClustersSize](CompClusType const& compClusters) { - *compClustersSize = compClusters.size(); - }; - auto logger = [compClustersSize](std::vector const& rofs) { - LOG(info) << "ITS3ClusterWriter pulled " << *compClustersSize << " clusters, in " << rofs.size() << " RO frames"; - }; - return MakeRootTreeWriterSpec("its3-cluster-writer", - "o2clus_its.root", - MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Tree with ITS clusters"}, - BranchDefinition{InputSpec{"compclus", "ITS", "COMPCLUSTERS", 0}, - "ITSClusterComp", - compClustersSizeGetter}, - BranchDefinition{InputSpec{"patterns", "ITS", "PATTERNS", 0}, - "ITSClusterPatt"}, - BranchDefinition{InputSpec{"ROframes", "ITS", "CLUSTERSROF", 0}, - "ITSClustersROF", - logger}, - BranchDefinition{InputSpec{"labels", "ITS", "CLUSTERSMCTR", 0}, - "ITSClusterMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition{InputSpec{"MC2ROframes", "ITS", "CLUSTERSMC2ROF", 0}, - "ITSClustersMC2ROF", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""})(); -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx deleted file mode 100644 index ae79b7797d57d..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/ClusterWriterWorkflow.cxx +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file ClusterWriterWorkflow.cxx - -#include "ITS3Workflow/ClusterWriterWorkflow.h" -#include "ITS3Workflow/ClusterWriterSpec.h" - -namespace o2 -{ -namespace its3 -{ - -namespace cluster_writer_workflow -{ - -framework::WorkflowSpec getWorkflow(bool useMC) -{ - framework::WorkflowSpec specs; - - specs.emplace_back(getClusterWriterSpec(useMC)); - - return specs; -} - -} // namespace cluster_writer_workflow -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx index 721ef36335631..21ae5b4a72345 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx @@ -11,9 +11,9 @@ #include "ITS3Workflow/RecoWorkflow.h" #include "ITS3Workflow/ClustererSpec.h" -#include "ITS3Workflow/ClusterWriterSpec.h" #include "ITS3Workflow/TrackerSpec.h" -#include "ITS3Workflow/TrackWriterSpec.h" +#include "ITSWorkflow/ClusterWriterSpec.h" +#include "ITSWorkflow/TrackWriterSpec.h" #include "ITS3Workflow/DigitReaderSpec.h" #include "Framework/Logger.h" @@ -34,14 +34,14 @@ framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::g } if (!disableRootOutput) { - specs.emplace_back(o2::its3::getClusterWriterSpec(useMC)); + specs.emplace_back(o2::its::getClusterWriterSpec(useMC)); } if (trmode != "off") { specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); if (!disableRootOutput) { - specs.emplace_back(o2::its3::getTrackWriterSpec(useMC)); + specs.emplace_back(o2::its::getTrackWriterSpec(useMC)); } } diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx deleted file mode 100644 index 409fa69e7815b..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackReaderSpec.cxx +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackReaderSpec.cxx - -#include -#include -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "ITS3Workflow/TrackReaderSpec.h" -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::its3; - -namespace o2 -{ -namespace its3 -{ - -TrackReader::TrackReader(bool useMC) -{ - mUseMC = useMC; -} - -void TrackReader::init(InitContext& ic) -{ - mInputFileName = o2::utils::Str::concat_string(o2::utils::Str::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get("its3-tracks-infile")); - connectTree(mInputFileName); -} - -void TrackReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << "Pushing " << mTracks.size() << " track in " << mROFRec.size() << " ROFs at entry " << ent; - pc.outputs().snapshot(Output{mOrigin, "ITSTrackROF", 0}, mROFRec); - pc.outputs().snapshot(Output{mOrigin, "TRACKS", 0}, mTracks); - pc.outputs().snapshot(Output{mOrigin, "TRACKCLSID", 0}, mClusInd); - pc.outputs().snapshot(Output{"ITS", "VERTICES", 0}, mVertices); - pc.outputs().snapshot(Output{"ITS", "VERTICESROF", 0}, mVerticesROFRec); - if (mUseMC) { - pc.outputs().snapshot(Output{mOrigin, "TRACKSMCTR", 0}, mMCTruth); - pc.outputs().snapshot(Output{mOrigin, "VERTICESMCTR", 0}, mMCVertTruth); - } - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void TrackReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mTrackTreeName.c_str())); - assert(mTree); - assert(mTree->GetBranch(mROFBranchName.c_str())); - - mTree->SetBranchAddress(mROFBranchName.c_str(), &mROFRecInp); - mTree->SetBranchAddress(mTrackBranchName.c_str(), &mTracksInp); - mTree->SetBranchAddress(mClusIdxBranchName.c_str(), &mClusIndInp); - if (!mTree->GetBranch(mVertexBranchName.c_str())) { - LOG(warning) << "No " << mVertexBranchName << " branch in " << mTrackTreeName << " -> vertices will be empty"; - } else { - mTree->SetBranchAddress(mVertexBranchName.c_str(), &mVerticesInp); - } - if (!mTree->GetBranch(mVertexROFBranchName.c_str())) { - LOG(warning) << "No " << mVertexROFBranchName << " branch in " << mTrackTreeName - << " -> vertices ROFrecords will be empty"; - } else { - mTree->SetBranchAddress(mVertexROFBranchName.c_str(), &mVerticesROFRecInp); - } - if (mUseMC) { - if (mTree->GetBranch(mTrackMCTruthBranchName.c_str())) { - mTree->SetBranchAddress(mTrackMCTruthBranchName.c_str(), &mMCTruthInp); - } else { - LOG(warning) << "MC-truth is missing, message will be empty"; - } - } - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3TrackReaderSpec(bool useMC) -{ - std::vector outputSpec; - outputSpec.emplace_back("ITS", "ITSTrackROF", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "TRACKS", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "TRACKCLSID", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICES", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICESROF", 0, Lifetime::Timeframe); - if (useMC) { - outputSpec.emplace_back("ITS", "TRACKSMCTR", 0, Lifetime::Timeframe); - outputSpec.emplace_back("ITS", "VERTICESMCTR", 0, Lifetime::Timeframe); - } - - return DataProcessorSpec{ - "its3-track-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask(useMC)}, - Options{ - {"its-tracks-infile", VariantType::String, "o2trac_its.root", {"Name of the input ITS3 track file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx deleted file mode 100644 index 856c806e74247..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/TrackWriterSpec.cxx +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file TrackWriterSpec.cxx - -#include - -#include "ITS3Workflow/TrackWriterSpec.h" -#include "DPLUtils/MakeRootTreeWriterSpec.h" -#include "DataFormatsITS/TrackITS.h" -#include "DataFormatsITSMFT/ROFRecord.h" -#include "SimulationDataFormat/MCCompLabel.h" -#include "SimulationDataFormat/MCTruthContainer.h" -#include "ReconstructionDataFormats/Vertex.h" - -using namespace o2::framework; - -namespace o2 -{ -namespace its3 -{ -using Vertex = o2::dataformats::Vertex>; - -template -using BranchDefinition = MakeRootTreeWriterSpec::BranchDefinition; -using LabelsType = std::vector; -using ROFRecLblT = std::vector; -using namespace o2::header; - -DataProcessorSpec getTrackWriterSpec(bool useMC) -{ - // Spectators for logging - // this is only to restore the original behavior - auto tracksSize = std::make_shared(0); - auto tracksSizeGetter = [tracksSize](std::vector const& tracks) { - *tracksSize = tracks.size(); - }; - auto logger = [tracksSize](std::vector const& rofs) { - LOG(info) << "ITS3TrackWriter pulled " << *tracksSize << " tracks, in " << rofs.size() << " RO frames"; - }; - // NOTE: We name the branches as ITS and not IT3 to ensure matching works. - return MakeRootTreeWriterSpec("its3-track-writer", - "o2trac_its.root", - MakeRootTreeWriterSpec::TreeAttributes{"o2sim", "Tree with ITS3 tracks"}, - BranchDefinition>{InputSpec{"tracks", "ITS", "TRACKS", 0}, - "ITSTrack", - tracksSizeGetter}, - BranchDefinition>{InputSpec{"trackClIdx", "ITS", "TRACKCLSID", 0}, - "ITSTrackClusIdx"}, - BranchDefinition>{InputSpec{"vertices", "ITS", "VERTICES", 0}, - "Vertices"}, - BranchDefinition>{InputSpec{"vtxROF", "ITS", "VERTICESROF", 0}, - "VerticesROF"}, - BranchDefinition>{InputSpec{"ROframes", "ITS", "ITSTrackROF", 0}, - "ITSTracksROF", - logger}, - BranchDefinition{InputSpec{"labels", "ITS", "TRACKSMCTR", 0}, - "ITSTrackMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition{InputSpec{"labelsVertices", "ITS", "VERTICESMCTR", 0}, - "ITSVertexMCTruth", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""}, - BranchDefinition>{InputSpec{"purityVertices", "ITS", "VERTICESMCPUR", 0}, - "ITSVertexMCPurity", (useMC ? 1 : 0), ""}, - BranchDefinition{InputSpec{"MC2ROframes", "ITS", "ITSTrackMC2ROF", 0}, - "ITSTracksMC2ROF", - (useMC ? 1 : 0), // one branch if mc labels enabled - ""})(); -} - -} // namespace its3 -} // namespace o2 diff --git a/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx b/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx deleted file mode 100644 index 25fd3e530161a..0000000000000 --- a/Detectors/Upgrades/ITS3/workflow/src/VertexReaderSpec.cxx +++ /dev/null @@ -1,84 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// @file VertexReaderSpec.cxx - -#include - -#include "Framework/ControlService.h" -#include "Framework/ConfigParamRegistry.h" -#include "Framework/Logger.h" -#include "ITSWorkflow/VertexReaderSpec.h" -#include "CommonUtils/NameConf.h" - -using namespace o2::framework; -using namespace o2::its; - -namespace o2 -{ -namespace its -{ - -void VertexReader::init(InitContext& ic) -{ - mFileName = o2::utils::Str::concat_string(o2::utils::Str::rectifyDirectory(ic.options().get("input-dir")), - ic.options().get("its-vertex-infile")); - connectTree(mFileName); -} - -void VertexReader::run(ProcessingContext& pc) -{ - auto ent = mTree->GetReadEntry() + 1; - assert(ent < mTree->GetEntries()); // this should not happen - mTree->GetEntry(ent); - LOG(info) << "Pushing " << mVerticesPtr->size() << " vertices in " << mVerticesROFRecPtr->size() - << " ROFs at entry " << ent; - pc.outputs().snapshot(Output{"IT3", "VERTICES", 0}, mVertices); - pc.outputs().snapshot(Output{"IT3", "VERTICESROF", 0}, mVerticesROFRec); - - if (mTree->GetReadEntry() + 1 >= mTree->GetEntries()) { - pc.services().get().endOfStream(); - pc.services().get().readyToQuit(QuitRequest::Me); - } -} - -void VertexReader::connectTree(const std::string& filename) -{ - mTree.reset(nullptr); // in case it was already loaded - mFile.reset(TFile::Open(filename.c_str())); - assert(mFile && !mFile->IsZombie()); - mTree.reset((TTree*)mFile->Get(mVertexTreeName.c_str())); - assert(mTree); - assert(mTree->GetBranch(mVertexBranchName.c_str())); - assert(mTree->GetBranch(mVertexROFBranchName.c_str())); - mTree->SetBranchAddress(mVertexBranchName.c_str(), &mVerticesPtr); - mTree->SetBranchAddress(mVertexROFBranchName.c_str(), &mVerticesROFRecPtr); - LOG(info) << "Loaded tree from " << filename << " with " << mTree->GetEntries() << " entries"; -} - -DataProcessorSpec getITS3VertexReaderSpec() -{ - std::vector outputSpec; - outputSpec.emplace_back("IT3", "VERTICES", 0, Lifetime::Timeframe); - outputSpec.emplace_back("IT3", "VERTICESROF", 0, Lifetime::Timeframe); - - return DataProcessorSpec{ - "its3-vertex-reader", - Inputs{}, - outputSpec, - AlgorithmSpec{adaptFromTask()}, - Options{ - {"its3-vertex-infile", VariantType::String, "o2trac_its3.root", {"Name of the input ITS3 vertex file"}}, - {"input-dir", VariantType::String, "none", {"Input directory"}}}}; -} - -} // namespace its -} // namespace o2 From 7e212e1e87659b8f075ef961a0e70a11cbf104cc Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 14:02:18 +0200 Subject: [PATCH 0376/1914] jobutils: Don't treat bogus Geant message about exceptions as error --- Utilities/Tools/jobutils.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Utilities/Tools/jobutils.sh b/Utilities/Tools/jobutils.sh index 4d51ac8522c6d..54f1394197815 100644 --- a/Utilities/Tools/jobutils.sh +++ b/Utilities/Tools/jobutils.sh @@ -189,10 +189,12 @@ taskwrapper() { -e \"terminate called without an active\" \ -e \"\*\*\* Error in\"" # <--- LIBC fatal error messages - grepcommand="grep -a -H ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} >> encountered_exceptions_list 2>/dev/null" + exclude_pattern="-e \"To change the tolerance or the exception severity\"" + + grepcommand="grep -a -H ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} | grep -a -v ${exclude_pattern} >> encountered_exceptions_list 2>/dev/null" eval ${grepcommand} - grepcommand="grep -a -h --count ${pattern} $logfile ${JOBUTILS_JOB_SUPERVISEDFILES} 2>/dev/null" + grepcommand="cat encountered_exceptions_list 2>/dev/null | wc -l" # using eval here since otherwise the pattern is translated to a # a weirdly quoted stringlist RC=$(eval ${grepcommand}) From 7872ee5a85ac0762faa1dbb50da8dd0ddd904104 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 13:07:26 +0200 Subject: [PATCH 0377/1914] GPU CMake: cleanup and fix some todos --- GPU/GPUTracking/CMakeLists.txt | 43 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e722d375e4b93..44a630fe19f48 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -230,31 +230,21 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") Interface/GPUO2InterfaceConfigurableParam.cxx) endif() +set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h + Base/GPUReconstructionKernelIncludes.template.h + Base/GPUReconstructionIncludesDeviceAll.template.h + cmake/GPUNoFastMathKernels.template.h + Definitions/GPUDefParameters.template.h + Definitions/GPUDefParametersLoad.template.inc) +set(GENERATED_HEADERS_LIST "") + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) -file(GENERATE # TODO: Do this as a list - OUTPUT include_gpu_onthefly/GPUReconstructionKernelList.h - INPUT Base/GPUReconstructionKernelList.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUReconstructionKernelIncludes.h - INPUT Base/GPUReconstructionKernelIncludes.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUReconstructionIncludesDeviceAll.h - INPUT Base/GPUReconstructionIncludesDeviceAll.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUNoFastMathKernels.h - INPUT cmake/GPUNoFastMathKernels.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUDefParameters.h - INPUT Definitions/GPUDefParameters.template.h -) -file(GENERATE - OUTPUT include_gpu_onthefly/GPUDefParametersLoad.inc - INPUT Definitions/GPUDefParametersLoad.template.inc -) +foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) + get_filename_component(OUTPUT_FILE_NAME ${TEMPLATE_FILE} NAME) + string(REPLACE ".template" "" OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}) + file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) + list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) +endforeach() file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" @@ -266,6 +256,7 @@ add_custom_command( VERBATIM COMMAND_EXPAND_LISTS ) +list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) include(kernels.cmake) @@ -405,11 +396,11 @@ set_source_files_properties(Base/GPUReconstructionLibrary.cxx PROPERTIES INCLUDE_DIRECTORIES "${CMAKE_CURRENT_BINARY_DIR}") -# Make sure header files generated with add_custom_command are built +# Make sure header files generated with add_custom_command are built before being used target_sources(${targetName} PRIVATE FILE_SET "generatedHeaders" TYPE HEADERS - FILES ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h # TODO: build file list for this + FILES ${GENERATED_HEADERS_LIST} BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) # Add compile definitions and libraries depending on available optional dependencies From e3b82a84be0aff2dea342a291b6e50e177ec6b90 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 12:29:50 +0200 Subject: [PATCH 0378/1914] GPU: Fix typo in variable name, fix comments, fix debug messages --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 10 +++++----- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 ++-- GPU/GPUTracking/Global/GPUChainTracking.cxx | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index d714c6833d18d..ed47358cc9d5c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -68,7 +68,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu int32_t nThreads = getNKernelHostThreads(false); if (nThreads > 1) { if (mProcessingSettings.debugLevel >= 5) { - printf("Running %d Threads\n", nThreads); + printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency()); } tbb::this_task_arena::isolate([&] { mThreading->activeThreads->execute([&] { @@ -91,10 +91,10 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu template <> inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { - int32_t nnThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); - if (nnThreads > 1) { - tbb::parallel_for(0, nnThreads, [&](int iThread) { - size_t threadSize = size / nnThreads; + int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); + if (nThreads > 1) { + tbb::parallel_for(0, nThreads, [&](int iThread) { + size_t threadSize = size / nThreads; if (threadSize % 4096) { threadSize += 4096 - threadSize % 4096; } diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index dd72119e3b56f..e17f1fcd7091e 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -77,8 +77,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done || bash -c "echo ERROR: CUDA RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking CUDA RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_CUDA_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_CUDA_SRC_CHK.done) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 5a68df8ac9527..9a9b1e36a167c 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,8 +115,8 @@ add_custom_command( create_binary_resource(${GPU_RTC_BIN}.src ${GPU_RTC_BIN}.src.o) add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done - COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain standard headers 1>&2 && exit 1" - COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src" + COMMAND ! grep "# [0-9]* \"\\(/usr/\\|.*GCC-Toolchain\\)" ${GPU_RTC_BIN}.src > ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done || bash -c "echo ERROR: HIP RTC sources contain system headers 1>&2 && exit 1" + COMMENT "Checking HIP RTC File ${GPU_RTC_BIN}.src for system headers" DEPENDS ${GPU_RTC_BIN}.src VERBATIM) add_custom_target(${MODULE}_HIP_SRC_CHK ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${MODULE}_HIP_SRC_CHK.done) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 37ad164d20a60..6753db280d5bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -267,7 +267,7 @@ bool GPUChainTracking::ValidateSettings() return false; } if ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) && std::max(GetProcessingSettings().nTPCClustererLanes + 1, GetProcessingSettings().nTPCClustererLanes * 2) + (GetProcessingSettings().doublePipeline ? 1 : 0) > (int32_t)mRec->NStreams()) { - GPUError("NStreams (%d) must be > nTPCClustererLanes (%d)", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); + GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 3c1ad9658566b..7faab410d20ea 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -76,7 +76,6 @@ function(o2_gpu_add_kernel kernel_name kernel_files) set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_NAMES "${kernel_name}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_INCLUDES "${TMP_KERNEL_CLASS_FILE}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_FILES "${TMP_KERNEL_CLASS_FILE}.cxx") - # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround set(O2_GPU_KERNEL_TEMPLATE_FILES "GPUConstantMem.h") if (GPUCA_BUILD_DEBUG) @@ -102,6 +101,7 @@ function(o2_gpu_add_kernel kernel_name kernel_files) list(TRANSFORM O2_GPU_KERNEL_TEMPLATE_FILES PREPEND "#include \"") list(JOIN O2_GPU_KERNEL_TEMPLATE_FILES "\n" O2_GPU_KERNEL_TEMPLATE_FILES) + # add_custom_command OUTPUT option does not support target-dependend generator expressions, thus this workaround to create CUDA and HIP files string(REPLACE ", " "_" TMP_FILENAME "${kernel_name}") if(CUDA_ENABLED) set(TMP_FILENAMEA "${O2_GPU_KERNEL_WRAPPER_FOLDER}/krnl_${TMP_FILENAME}.cu") From 4c4e0044afb9a37b1117d7a773815a3abb86a306 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 13:28:55 +0200 Subject: [PATCH 0379/1914] GPU: Clean up some unused parameters --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 10 +++++----- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 4 ++-- GPU/GPUTracking/Definitions/GPUDefParametersDefault.h | 7 ------- GPU/GPUTracking/Standalone/Benchmark/standalone.cxx | 8 ++++---- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 9b6562d8e77ee..2f643706647ee 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -449,7 +449,7 @@ int32_t GPUReconstruction::Exit() if (mMemoryResources[i].mReuse >= 0) { continue; } - operator delete(mMemoryResources[i].mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(mMemoryResources[i].mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); mMemoryResources[i].mPtr = mMemoryResources[i].mPtrDevice = nullptr; } } @@ -577,7 +577,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { if (res->mPtrDevice && res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize); if (res->mReuse >= 0) { @@ -587,7 +587,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, } res->mPtrDevice = mMemoryResources[res->mReuse].mPtrDevice; } else { - res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT GPUCA_OPERATOR_NEW_ALIGNMENT); + res->mPtrDevice = operator new(res->mSize + GPUCA_BUFFER_ALIGNMENT, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = GPUProcessor::alignPointer(res->mPtrDevice); res->SetPointers(res->mPtr); @@ -775,7 +775,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n"; } if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; res->mPtrDevice = nullptr; @@ -825,7 +825,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) for (uint32_t i = std::get<2>(mNonPersistentMemoryStack.back()); i < mNonPersistentIndividualAllocations.size(); i++) { GPUMemoryResource* res = mNonPersistentIndividualAllocations[i]; if (res->mReuse < 0) { - operator delete(res->mPtrDevice GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; res->mPtrDevice = nullptr; diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index ed47358cc9d5c..9b569d3e88f3c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -202,7 +202,7 @@ int32_t GPUReconstructionCPU::InitDevice() if (mDeviceMemorySize > mHostMemorySize) { mHostMemorySize = mDeviceMemorySize; } - mHostMemoryBase = operator new(mHostMemorySize GPUCA_OPERATOR_NEW_ALIGNMENT); + mHostMemoryBase = operator new(mHostMemorySize, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } mHostMemoryPermanent = mHostMemoryBase; ClearAllocatedMemory(); @@ -218,7 +218,7 @@ int32_t GPUReconstructionCPU::ExitDevice() { if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { - operator delete(mHostMemoryBase GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(mHostMemoryBase, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } mHostMemoryPool = mHostMemoryBase = mHostMemoryPoolEnd = mHostMemoryPermanent = nullptr; mHostMemorySize = 0; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h index 4435e69c60ff6..44f3eb299d4c1 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h @@ -594,14 +594,11 @@ #define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters #define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets #define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit -#define GPUCA_TRACKER_CONSTANT_MEM ((size_t) 63 * 1024) // Amount of Constant Memory to reserve #define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device #define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#define GPUCA_MAX_SECTOR_NTRACK (2 << 24) // Maximum number of tracks per sector (limited by track id format) - // #define GPUCA_KERNEL_DEBUGGER_OUTPUT // Some assertions to make sure the parameters are not invalid @@ -614,14 +611,10 @@ // Derived parameters #ifdef GPUCA_USE_TEXTURES #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache - #define GPUCA_TEXTURE_FETCH_NEIGHBORS // Fetch also in Neighbours Finder #endif #if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE) #define GPUCA_SORT_STARTHITS #endif -#define GPUCA_NEW_ALIGNMENT (std::align_val_t{GPUCA_BUFFER_ALIGNMENT}) -#define GPUCA_OPERATOR_NEW_ALIGNMENT ,GPUCA_NEW_ALIGNMENT - // clang-format on #endif // GPUDEFPARAMETERSDEFAULT_H diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index 2e3e4725bd6aa..b32db2bfebf11 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -71,7 +71,7 @@ GPUChainTracking *chainTracking, *chainTrackingAsync, *chainTrackingPipeline; GPUChainITS *chainITS, *chainITSAsync, *chainITSPipeline; void unique_ptr_aligned_delete(char* v) { - operator delete(v GPUCA_OPERATOR_NEW_ALIGNMENT); + operator delete(v, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } std::unique_ptr outputmemory(nullptr, unique_ptr_aligned_delete), outputmemoryPipeline(nullptr, unique_ptr_aligned_delete), inputmemory(nullptr, unique_ptr_aligned_delete); std::unique_ptr eventDisplay; @@ -221,20 +221,20 @@ int32_t ReadConfiguration(int argc, char** argv) if (configStandalone.outputcontrolmem) { bool forceEmptyMemory = getenv("LD_PRELOAD") && strstr(getenv("LD_PRELOAD"), "valgrind") != nullptr; - outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + outputmemory.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); if (forceEmptyMemory) { printf("Valgrind detected, emptying GPU output memory to avoid false positive undefined reads"); memset(outputmemory.get(), 0, configStandalone.outputcontrolmem); } if (configStandalone.proc.doublePipeline) { - outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + outputmemoryPipeline.reset((char*)operator new(configStandalone.outputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); if (forceEmptyMemory) { memset(outputmemoryPipeline.get(), 0, configStandalone.outputcontrolmem); } } } if (configStandalone.inputcontrolmem) { - inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem GPUCA_OPERATOR_NEW_ALIGNMENT)); + inputmemory.reset((char*)operator new(configStandalone.inputcontrolmem, std::align_val_t(GPUCA_BUFFER_ALIGNMENT))); } configStandalone.proc.showOutputStat = true; From 4952cee09a99f788e070dc513d14e9ebdfb04165 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 8 Apr 2025 11:26:52 +0200 Subject: [PATCH 0380/1914] ITS3: GPU tracking Signed-off-by: Felix Schlepper --- .../include/ITS3Workflow/RecoWorkflow.h | 1 + .../ITS3/workflow/src/RecoWorkflow.cxx | 40 +++++++++++++++++-- .../ITS3/workflow/src/its3-reco-workflow.cxx | 4 +- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h index bac8a128c5b39..1760aa1d850eb 100644 --- a/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h +++ b/Detectors/Upgrades/ITS3/workflow/include/ITS3Workflow/RecoWorkflow.h @@ -26,6 +26,7 @@ namespace o2::its3::reco_workflow framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, + bool useGPUWorkflow, bool upstreamDigits, bool upstreamClusters, bool disableRootOutput, diff --git a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx index 21ae5b4a72345..947e53f80ddf1 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/RecoWorkflow.cxx @@ -15,12 +15,18 @@ #include "ITSWorkflow/ClusterWriterSpec.h" #include "ITSWorkflow/TrackWriterSpec.h" #include "ITS3Workflow/DigitReaderSpec.h" -#include "Framework/Logger.h" +#include "GPUWorkflow/GPUWorkflowSpec.h" +#include "Framework/CCDBParamSpec.h" + +// Dummy pointers +using CompletionPolicyData = std::vector; +static CompletionPolicyData gPolicyData; +static std::shared_ptr gTask; namespace o2::its3::reco_workflow { -framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, +framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::gpu::GPUDataTypes::DeviceType dtype, bool useGPUWorkflow, bool upstreamDigits, bool upstreamClusters, bool disableRootOutput, bool useGeom, int useTrig, bool overrideBeamPosition) { framework::WorkflowSpec specs; @@ -38,8 +44,36 @@ framework::WorkflowSpec getWorkflow(bool useMC, const std::string& trmode, o2::g } if (trmode != "off") { - specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); + if (useGPUWorkflow) { + o2::gpu::GPURecoWorkflowSpec::Config cfg; + cfg.runITSTracking = true; + cfg.isITS3 = true; + cfg.itsTriggerType = useTrig; + cfg.itsOverrBeamEst = overrideBeamPosition; + cfg.processMC = useMC; + Inputs ggInputs; + auto ggRequest = std::make_shared(false, true, false, true, true, + useGeom ? o2::base::GRPGeomRequest::Aligned : o2::base::GRPGeomRequest::None, + ggInputs, true); + if (!useGeom) { + ggRequest->addInput({"itsTGeo", "ITS", "GEOMTGEO", 0, Lifetime::Condition, framework::ccdbParamSpec("ITS/Config/Geometry")}, ggInputs); + } + + auto task = std::make_shared(&gPolicyData, cfg, std::vector(), 0, ggRequest); + gTask = task; + Inputs taskInputs = task->inputs(); + Options taskOptions = task->options(); + std::move(ggInputs.begin(), ggInputs.end(), std::back_inserter(taskInputs)); + specs.emplace_back(DataProcessorSpec{ + "its3-gpu-tracker", + taskInputs, + task->outputs(), + AlgorithmSpec{adoptTask(task)}, + taskOptions}); + } else { + specs.emplace_back(o2::its3::getTrackerSpec(useMC, useGeom, useTrig, trmode, overrideBeamPosition, dtype)); + } if (!disableRootOutput) { specs.emplace_back(o2::its::getTrackWriterSpec(useMC)); } diff --git a/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx b/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx index b8aec2541d31f..2f0eda73742cb 100644 --- a/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx +++ b/Detectors/Upgrades/ITS3/workflow/src/its3-reco-workflow.cxx @@ -48,6 +48,7 @@ void customize(std::vector& workflowOptions) {"tracking-mode", o2::framework::VariantType::String, "off", {"off,sync,async,cosmics"}}, {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}, {"use-full-geometry", o2::framework::VariantType::Bool, false, {"use full geometry instead of the light-weight IT3 part"}}, + {"use-gpu-workflow", o2::framework::VariantType::Bool, false, {"use GPU workflow (default: false)"}}, {"gpu-device", o2::framework::VariantType::Int, 1, {"use gpu device: CPU=1,CUDA=2,HIP=3 (default: CPU)"}}}; o2::raw::HBFUtilsInitializer::addConfigOption(options); std::swap(workflowOptions, options); @@ -67,6 +68,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) auto extClusters = configcontext.options().get("clusters-from-upstream"); auto disableRootOutput = configcontext.options().get("disable-root-output"); auto useGeom = configcontext.options().get("use-full-geometry"); + auto useGPUWfx = configcontext.options().get("use-gpu-workflow"); std::transform(trmode.begin(), trmode.end(), trmode.begin(), [](unsigned char c) { return std::tolower(c); }); o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); @@ -80,7 +82,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) LOG(fatal) << "Unknown trigger type requested for events prescaling: " << selTrig; } } - auto wf = o2::its3::reco_workflow::getWorkflow(useMC, trmode, gpuDevice, extDigits, extClusters, disableRootOutput, useGeom, trType, beamPosOVerride); + auto wf = o2::its3::reco_workflow::getWorkflow(useMC, trmode, gpuDevice, useGPUWfx, extDigits, extClusters, disableRootOutput, useGeom, trType, beamPosOVerride); // configure dpl timer to inject correct firstTForbit: start from the 1st orbit of TF containing 1st sampled orbit o2::raw::HBFUtilsInitializer hbfIni(configcontext, wf); From fad8881705586ffaadde0f965c937e05d3fd766c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 8 Apr 2025 11:26:15 +0200 Subject: [PATCH 0381/1914] GPU: Add ITS3 to workflow Signed-off-by: Felix Schlepper --- GPU/Workflow/CMakeLists.txt | 3 ++- .../include/GPUWorkflow/GPUWorkflowSpec.h | 1 + GPU/Workflow/src/GPUWorkflowITS.cxx | 16 ++++++++++++++++ GPU/Workflow/src/GPUWorkflowSpec.cxx | 10 +++++++--- 4 files changed, 26 insertions(+), 4 deletions(-) diff --git a/GPU/Workflow/CMakeLists.txt b/GPU/Workflow/CMakeLists.txt index aa725168b9db2..db8d22dda1bae 100644 --- a/GPU/Workflow/CMakeLists.txt +++ b/GPU/Workflow/CMakeLists.txt @@ -27,7 +27,8 @@ o2_add_library(GPUWorkflow O2::DataFormatsGlobalTracking O2::DataFormatsTRD PRIVATE_LINK_LIBRARIES O2::GPUTracking - O2::ITSTrackingInterface) + O2::ITSTrackingInterface + $<$:O2::ITS3TrackingInterface>) o2_add_executable(reco-workflow COMPONENT_NAME gpu diff --git a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h index eda3b28c6cff6..0038233f1c376 100644 --- a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h +++ b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h @@ -130,6 +130,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task bool runITSTracking = false; bool itsOverrBeamEst = false; bool tpcTriggerHandling = false; + bool isITS3 = false; }; GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr& ggr, std::function** gPolicyOrder = nullptr); diff --git a/GPU/Workflow/src/GPUWorkflowITS.cxx b/GPU/Workflow/src/GPUWorkflowITS.cxx index db9303c431ae7..e56958cba2c9b 100644 --- a/GPU/Workflow/src/GPUWorkflowITS.cxx +++ b/GPU/Workflow/src/GPUWorkflowITS.cxx @@ -20,6 +20,10 @@ #include "ITStracking/TrackingInterface.h" +#ifdef ENABLE_UPGRADES +#include "ITS3Reconstruction/TrackingInterface.h" +#endif + namespace o2::gpu { @@ -35,6 +39,18 @@ void GPURecoWorkflowSpec::initFunctionITS(o2::framework::InitContext& ic) { o2::its::VertexerTraits* vtxTraits = nullptr; o2::its::TrackerTraits* trkTraits = nullptr; +#ifdef ENABLE_UPGRADES + if (mSpecConfig.isITS3) { + mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, + mSpecConfig.itsTriggerType, + mSpecConfig.itsOverrBeamEst); + } else +#endif + { + mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, + mSpecConfig.itsTriggerType, + mSpecConfig.itsOverrBeamEst); + } mITSTrackingInterface = std::make_unique(mSpecConfig.processMC, mSpecConfig.itsTriggerType, mSpecConfig.itsOverrBeamEst); diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index aa4f3cfca1289..7ad03ec58ae80 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -1174,9 +1174,13 @@ Inputs GPURecoWorkflowSpec::inputs() } else if (mSpecConfig.itsTriggerType == 2) { inputs.emplace_back("phystrig", "TRD", "TRKTRGRD", 0, Lifetime::Timeframe); } - inputs.emplace_back("itscldict", "ITS", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); - inputs.emplace_back("itsalppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); - + if (mSpecConfig.isITS3) { + inputs.emplace_back("cldict", "IT3", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("IT3/Calib/ClusterDictionary")); + inputs.emplace_back("alppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + } else { + inputs.emplace_back("itscldict", "ITS", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); + inputs.emplace_back("itsalppar", "ITS", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + } if (mSpecConfig.itsOverrBeamEst) { inputs.emplace_back("meanvtx", "GLO", "MEANVERTEX", 0, Lifetime::Condition, ccdbParamSpec("GLO/Calib/MeanVertex", {}, 1)); } From 304938e708db28c837d8fc14a6248df3db3ce185 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 26 Feb 2025 15:41:21 +0100 Subject: [PATCH 0382/1914] ITS3: propagating split dict to external Signed-off-by: Felix Schlepper --- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 8 ++++---- .../include/StrangenessTracking/StrangenessTracker.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index 403b7dbbb0e09..f689caed87351 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -671,7 +671,8 @@ bool MatchTPCITS::prepareITSData() auto pattID = clus.getPatternID(); unsigned int npix; #ifdef ENABLE_UPGRADES - if ((pattID == o2::itsmft::CompCluster::InvalidPatternID) || ((withITS3) ? mIT3Dict->isGroup(pattID) : mITSDict->isGroup(pattID))) { // braces guarantee evaluation order + auto ib = o2::its3::constants::detID::isDetITS3(clus.getChipID()); + if ((pattID == o2::itsmft::CompCluster::InvalidPatternID) || ((withITS3) ? mIT3Dict->isGroup(pattID, ib) : mITSDict->isGroup(pattID))) { // braces guarantee evaluation order #else if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mITSDict->isGroup(pattID)) { #endif @@ -681,7 +682,7 @@ bool MatchTPCITS::prepareITSData() } else { #ifdef ENABLE_UPGRADES if (withITS3) { - npix = mIT3Dict->getNpixels(pattID); + npix = mIT3Dict->getNpixels(pattID, ib); } else { npix = mITSDict->getNpixels(pattID); } @@ -1444,8 +1445,7 @@ void MatchTPCITS::refitWinners(pmr::vector& matche #ifdef WITH_OPENMP #pragma omp parallel for schedule(dynamic) num_threads(mNThreads) \ - reduction(+ \ - : nFailedRefit) + reduction(+ : nFailedRefit) #endif for (int ifit = 0; ifit < nToFit; ifit++) { int iTPC = tpcToFit[ifit], iITS; diff --git a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h index 9ca34e548f1c8..11feac64c59ae 100644 --- a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h +++ b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h @@ -279,20 +279,20 @@ class StrangenessTracker for (unsigned int iClus{0}; iClus < ITSclus.size(); ++iClus) { auto& clus = ITSclus[iClus]; auto pattID = clus.getPatternID(); + auto ib = o2::its3::constants::detID::isDetITS3(clus.getChipID()); int npix; o2::itsmft::ClusterPattern patt; - if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mdict->isGroup(pattID)) { + if (pattID == o2::itsmft::CompCluster::InvalidPatternID || mdict->isGroup(pattID, ib)) { patt.acquirePattern(pattIt); npix = patt.getNPixels(); } else { - npix = mdict->getNpixels(pattID); - patt = mdict->getPattern(pattID); + npix = mdict->getNpixels(pattID, ib); + patt = mdict->getPattern(pattID, ib); } clusSizeVec[iClus] = npix; } - // LOG(info) << " Patt Npixel: " << pattVec[0].getNPixels(); } #endif From 9a0a760ec1f251b6728807901e50511d49ed6ab7 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Mon, 14 Apr 2025 10:54:36 +0200 Subject: [PATCH 0383/1914] ITS: CellSeed forced cast to int for chi2 (#14173) @mconcas @mpuccio is there a reason why there the `getChi2()` method casts the chi2 to int? Seems to me that this is a bit broken. --- Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h index fa0473ae88462..9ed5daed447df 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cell.h @@ -97,7 +97,7 @@ class CellSeed final : public o2::track::TrackParCovF GPUhd() void setFirstTrackletIndex(int trkl) { mTracklets[0] = trkl; }; GPUhd() int getSecondTrackletIndex() const { return mTracklets[1]; }; GPUhd() void setSecondTrackletIndex(int trkl) { mTracklets[1] = trkl; }; - GPUhd() int getChi2() const { return mChi2; }; + GPUhd() float getChi2() const { return mChi2; }; GPUhd() void setChi2(float chi2) { mChi2 = chi2; }; GPUhd() int getLevel() const { return mLevel; }; GPUhd() void setLevel(int level) { mLevel = level; }; From ad25169b756bf60e0c46e3cc69ed1cb2ec28e50f Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 13 Apr 2025 23:34:34 +0200 Subject: [PATCH 0384/1914] Fix decay It looks like the comment is correct and the PDG id is wrong. --- Steer/src/O2MCApplication.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Steer/src/O2MCApplication.cxx b/Steer/src/O2MCApplication.cxx index e1bba03e81c50..e44758241b8d2 100644 --- a/Steer/src/O2MCApplication.cxx +++ b/Steer/src/O2MCApplication.cxx @@ -887,7 +887,7 @@ void addSpecialParticles() mode4s[1][1] = 111; // pion0 bratio4s[2] = 40.; mode4s[2][0] = 1000010030; // tritium - mode4s[2][2] = 2212; // pion+ + mode4s[2][2] = 211; // pion+ mode4s[2][1] = 2112; // neutron TVirtualMC::GetMC()->SetDecayMode(1110020040, bratio4s, mode4s); From 0f9450ef8ec19b2dbd2c3b809f5cfcd716913880 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 14 Apr 2025 16:01:21 +0200 Subject: [PATCH 0385/1914] ONNX: Use CMake defines not env variables --- Common/ML/CMakeLists.txt | 16 ++++++++-------- Common/ML/src/OrtInterface.cxx | 32 +++++++++++++------------------- 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 74be306c8b6a5..540fe8ebf271c 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -10,17 +10,17 @@ # or submit itself to any jurisdiction. # Pass ORT variables as a preprocessor definition -if(DEFINED ENV{ORT_ROCM_BUILD}) - add_compile_definitions(ORT_ROCM_BUILD=$ENV{ORT_ROCM_BUILD}) +if(ORT_ROCM_BUILD) + add_compile_definitions(ORT_ROCM_BUILD=1) endif() -if(DEFINED ENV{ORT_CUDA_BUILD}) - add_compile_definitions(ORT_CUDA_BUILD=$ENV{ORT_CUDA_BUILD}) +if(ORT_CUDA_BUILD) + add_compile_definitions(ORT_CUDA_BUILD=1) endif() -if(DEFINED ENV{ORT_MIGRAPHX_BUILD}) - add_compile_definitions(ORT_MIGRAPHX_BUILD=$ENV{ORT_MIGRAPHX_BUILD}) +if(ORT_MIGRAPHX_BUILD) + add_compile_definitions(ORT_MIGRAPHX_BUILD=1) endif() -if(DEFINED ENV{ORT_TENSORRT_BUILD}) - add_compile_definitions(ORT_TENSORRT_BUILD=$ENV{ORT_TENSORRT_BUILD}) +if(ORT_TENSORRT_BUILD) + add_compile_definitions(ORT_TENSORRT_BUILD=1) endif() o2_add_library(ML diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index fc784dd14d2dc..88f548bd4fe7b 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -59,29 +59,23 @@ void OrtModel::reset(std::unordered_map optionsMap) std::string dev_mem_str = "Hip"; #if defined(ORT_ROCM_BUILD) -#if ORT_ROCM_BUILD == 1 - if (device == "ROCM") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) ROCM execution provider set"; - } -#endif + if (device == "ROCM") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) ROCM execution provider set"; + } #endif #if defined(ORT_MIGRAPHX_BUILD) -#if ORT_MIGRAPHX_BUILD == 1 - if (device == "MIGRAPHX") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) MIGraphX execution provider set"; - } -#endif + if (device == "MIGRAPHX") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) MIGraphX execution provider set"; + } #endif #if defined(ORT_CUDA_BUILD) -#if ORT_CUDA_BUILD == 1 - if (device == "CUDA") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) CUDA execution provider set"; - dev_mem_str = "Cuda"; - } -#endif + if (device == "CUDA") { + Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); + LOG(info) << "(ORT) CUDA execution provider set"; + dev_mem_str = "Cuda"; + } #endif if (allocateDeviceMemory) { From 013e1514fc94bc13564dfcf7bafd0c3f633f6a69 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 10 Apr 2025 15:10:20 +0200 Subject: [PATCH 0386/1914] Split GPUDefParameters file into constants, and RTC-dynamic parameters with a defaults file, and a wrapper! --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 2 +- GPU/GPUTracking/CMakeLists.txt | 8 +- GPU/GPUTracking/Definitions/GPUDef.h | 2 +- .../Definitions/GPUDefParametersConstants.h | 87 ++++++++++++ ...rsDefault.h => GPUDefParametersDefaults.h} | 132 ++---------------- .../GPUDefParametersLoad.template.inc | 2 +- ...e.h => GPUDefParametersRuntime.template.h} | 8 +- .../Definitions/GPUDefParametersWrapper.h | 66 +++++++++ .../Standalone/tools/dumpGPUDefParam.C | 7 +- 9 files changed, 182 insertions(+), 132 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersConstants.h rename GPU/GPUTracking/Definitions/{GPUDefParametersDefault.h => GPUDefParametersDefaults.h} (81%) rename GPU/GPUTracking/Definitions/{GPUDefParameters.template.h => GPUDefParametersRuntime.template.h} (87%) create mode 100644 GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 9b569d3e88f3c..39507beda8a55 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -16,7 +16,7 @@ #include "GPUReconstructionIncludes.h" #include "GPUReconstructionThreading.h" #include "GPUChain.h" -#include "GPUDefParameters.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" #include "GPUTPCGMMergedTrack.h" diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 44a630fe19f48..f428d982394e0 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -129,7 +129,9 @@ set(HDRS_INSTALL DataTypes/GPUTriggerOutputs.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h - Definitions/GPUDefParametersDefault.h + Definitions/GPUDefParametersWrapper.h + Definitions/GPUDefParametersConstants.h + Definitions/GPUDefParametersDefaults.h Definitions/GPUDef.h Definitions/GPUDefMacros.h Definitions/GPULogging.h @@ -234,7 +236,7 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h Base/GPUReconstructionKernelIncludes.template.h Base/GPUReconstructionIncludesDeviceAll.template.h cmake/GPUNoFastMathKernels.template.h - Definitions/GPUDefParameters.template.h + Definitions/GPUDefParametersRuntime.template.h Definitions/GPUDefParametersLoad.template.inc) set(GENERATED_HEADERS_LIST "") @@ -258,7 +260,7 @@ add_custom_command( ) list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) -set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) +set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST}) include(kernels.cmake) # Optional sources depending on optional dependencies diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index 404f35f971c94..c77b9ce159306 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -18,7 +18,7 @@ #include "GPUCommonDef.h" #include "GPUDefConstantsAndSettings.h" -#include "GPUDefParametersDefault.h" +#include "GPUDefParametersWrapper.h" #include "GPUCommonRtypes.h" // Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h new file mode 100644 index 0000000000000..3a16d02ecf7c6 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -0,0 +1,87 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersConstants.h +/// \author David Rohr + +// This file contains compile-time constants, independent from the backend + +#ifndef GPUDEFPARAMETERSCONSTANTS_H +#define GPUDEFPARAMETERSCONSTANTS_H +// clang-format off + +#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! + +#if defined(__CUDACC__) || defined(__HIPCC__) + #define GPUCA_SPECIALIZE_THRUST_SORTS +#endif + +#define GPUCA_MAX_THREADS 1024 +#define GPUCA_MAX_STREAMS 36 + +#if defined(GPUCA_GPUCODE) + #define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU +#endif + +#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid +#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers +#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks + +// Default maximum numbers +#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters +#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets +#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit +#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device +#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host +#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread +#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread + +#ifdef GPUCA_GPUCODE + #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #endif + #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #endif + #ifndef GPUCA_ALTERNATE_BORDER_SORT + #define GPUCA_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_SORT_BEFORE_FIT + #define GPUCA_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_COMP_GATHER_KERNEL + #define GPUCA_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_COMP_GATHER_MODE + #define GPUCA_COMP_GATHER_MODE 2 + #endif +#else + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 + #define GPUCA_ALTERNATE_BORDER_SORT 0 + #define GPUCA_SORT_BEFORE_FIT 0 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #define GPUCA_THREAD_COUNT_FINDER 1 + #define GPUCA_COMP_GATHER_KERNEL 0 + #define GPUCA_COMP_GATHER_MODE 0 +#endif +#ifndef GPUCA_DEDX_STORAGE_TYPE + #define GPUCA_DEDX_STORAGE_TYPE float +#endif +#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float +#endif + +// clang-format on +#endif // GPUDEFPARAMETERSCONSTANTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h similarity index 81% rename from GPU/GPUTracking/Definitions/GPUDefParametersDefault.h rename to GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 44f3eb299d4c1..360fb1ffb5269 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefault.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -9,28 +9,22 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefParametersDefault.h +/// \file GPUDefParametersDefaults.h /// \author David Rohr -// This files contains compile-time constants affecting the GPU performance. -// Many of these constants are GPU-architecture specific. -// This file also contains all constants describing memory limitations, essentially limiting the total number of tracks, etc. -// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h +// This file contains compile-time constants affecting the GPU performance. -#ifndef GPUDEFPARAMETERSDEFAULT_H -#define GPUDEFPARAMETERSDEFAULT_H +#ifndef GPUDEFPARAMETERSDEFAULTS_H +#define GPUDEFPARAMETERSDEFAULTS_H // clang-format off -#include "GPUCommonDef.h" -#include "GPUDefMacros.h" - // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) // GPU Run Configuration #ifdef GPUCA_GPUCODE #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT 256 + #define GPUCA_THREAD_COUNT_DEFAULT 256 #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 @@ -93,7 +87,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT 256 + #define GPUCA_THREAD_COUNT_DEFAULT 256 #define GPUCA_LB_GPUTPCCreateTrackingData 128 #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 #define GPUCA_LB_GPUTPCStartHitsFinder 1024 @@ -156,7 +150,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT 512 + #define GPUCA_THREAD_COUNT_DEFAULT 512 #define GPUCA_LB_GPUTPCCreateTrackingData 384 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 @@ -219,7 +213,7 @@ #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT 512 + #define GPUCA_THREAD_COUNT_DEFAULT 512 #define GPUCA_LB_GPUTPCCreateTrackingData 256 #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 #define GPUCA_LB_GPUTPCStartHitsFinder 512 @@ -281,8 +275,8 @@ #ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type - #ifndef GPUCA_THREAD_COUNT - #define GPUCA_THREAD_COUNT 256 + #ifndef GPUCA_THREAD_COUNT_DEFAULT + #define GPUCA_THREAD_COUNT_DEFAULT 256 #endif #ifndef GPUCA_LB_GPUTPCCreateTrackingData #define GPUCA_LB_GPUTPCCreateTrackingData 256 @@ -486,13 +480,11 @@ #define GPUCA_LB_GPUTrackingRefitKernel_mode1asTrackParCov 256 #endif #ifndef GPUCA_LB_GPUMemClean16 - #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT, 1 + #define GPUCA_LB_GPUMemClean16 GPUCA_THREAD_COUNT_DEFAULT, 1 #endif #ifndef GPUCA_LB_GPUitoa - #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT, 1 + #define GPUCA_LB_GPUitoa GPUCA_THREAD_COUNT_DEFAULT, 1 #endif - #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) - // These kernel launch-bounds are derrived from one of the constants set above #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression @@ -516,105 +508,7 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER -#else - #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. -#endif - -#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) - -#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! - -#if defined(__CUDACC__) || defined(__HIPCC__) - #define GPUCA_SPECIALIZE_THRUST_SORTS -#endif - -#ifndef GPUCA_NEIGHBORSFINDER_REGS - #define GPUCA_NEIGHBORSFINDER_REGS NONE, 0 -#endif -#ifdef GPUCA_GPUCODE - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif - #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 - #endif -#else - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #define GPUCA_SORT_BEFORE_FIT 0 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_THREAD_COUNT_FINDER 1 - #define GPUCA_COMP_GATHER_KERNEL 0 - #define GPUCA_COMP_GATHER_MODE 0 -#endif -#ifndef GPUCA_DEDX_STORAGE_TYPE - #define GPUCA_DEDX_STORAGE_TYPE float -#endif -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#endif -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) - -#ifndef GPUCA_WARP_SIZE - #ifdef GPUCA_GPUCODE - #define GPUCA_WARP_SIZE 32 - #else - #define GPUCA_WARP_SIZE 1 - #endif -#endif - -#define GPUCA_MAX_THREADS 1024 -#define GPUCA_MAX_STREAMS 36 - -#define GPUCA_SORT_STARTHITS_GPU // Sort the start hits when running on GPU -#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid -#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers -#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks - -// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling - -// Default maximum numbers -#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters -#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets -#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit -#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device -#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host -#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread -#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread - -// #define GPUCA_KERNEL_DEBUGGER_OUTPUT - -// Some assertions to make sure the parameters are not invalid -#if defined(GPUCA_GPUCODE) - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); -#endif - -// Derived parameters -#ifdef GPUCA_USE_TEXTURES - #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache -#endif -#if defined(GPUCA_SORT_STARTHITS_GPU) && defined(GPUCA_GPUCODE) - #define GPUCA_SORT_STARTHITS #endif // clang-format on -#endif // GPUDEFPARAMETERSDEFAULT_H +#endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index c17244572ee0c..edec6530c1c5d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -18,7 +18,7 @@ #define GPUCA_M_LB_EMPTY_1(...) __VA_ARGS__ #define GPUCA_M_LB_EMPTY0(...) GPUCA_M_CAT(GPUCA_M_LB_EMPTY_, __VA_OPT__(1))(__VA_ARGS__) -#include "GPUDefParameters.h" +#include "GPUDefParametersRuntime.h" #include "GPUDefMacros.h" #include #include diff --git a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h similarity index 87% rename from GPU/GPUTracking/Definitions/GPUDefParameters.template.h rename to GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h index 731cb76b89193..f3537c058a824 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParameters.template.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h @@ -9,11 +9,11 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUDefParameters.h +/// \file GPUDefParametersRuntime.h /// \author David Rohr -#ifndef GPUDEFPARAMETERS_H -#define GPUDEFPARAMETERS_H +#ifndef GPUDEFPARAMETERSRUNTIME_H +#define GPUDEFPARAMETERSRUNTIME_H namespace o2::gpu { @@ -24,4 +24,4 @@ struct GPUDefParameters { // clang-format off }; // clang-format on } // namespace o2::gpu -#endif +#endif // GPUDEFPARAMETERSRUNTIME_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h new file mode 100644 index 0000000000000..7f2bb271d18c8 --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -0,0 +1,66 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDefParametersWrapper.h +/// \author David Rohr + +// Wrapper file to load all compile-time parameters (architecture / rtc - dependent ones, and constant ones) +// Compile-time constants affecting the tracking algorithms / results are located in GPUDefConstantsAndSettings.h + +#ifndef GPUDEFPARAMETERSWRAPPER_H +#define GPUDEFPARAMETERSWRAPPER_H +// clang-format off + +#include "GPUCommonDef.h" +#include "GPUDefMacros.h" + +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUDefParametersDefaults.h" +#endif +#include "GPUDefParametersConstants.h" + +#ifdef GPUCA_GPUCODE + #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) +#else + #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. +#endif + +#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) + +#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) + +#ifndef GPUCA_WARP_SIZE + #ifdef GPUCA_GPUCODE + #define GPUCA_WARP_SIZE 32 + #else + #define GPUCA_WARP_SIZE 1 + #endif +#endif + +// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling + +// #define GPUCA_KERNEL_DEBUGGER_OUTPUT + +// Some assertions to make sure the parameters are not invalid +#if defined(GPUCA_GPUCODE) + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); + static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); +#endif + +// Derived parameters +#ifdef GPUCA_USE_TEXTURES + #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache +#endif + +// clang-format on +#endif // GPUDEFPARAMETERSWRAPPER_H diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index 4a72b0cef31a3..785c049816252 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -16,14 +16,15 @@ // ROOT_INCLUDE_PATH="`pwd`/include" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' // Logic for testing to load the default parameters -/*#define GPUCA_GPUCODE +/* #define GPUCA_GPUCODE #define GPUCA_GPUTYPE_AMPERE #define GPUCA_MAXN 40 #define GPUCA_ROW_COUNT 152 #define GPUCA_TPC_COMP_CHUNK_SIZE 1024 -#include "GPUDefParametersDefault.h"*/ +#include "GPUDefParametersConstants.h" +#include "GPUDefParametersDefaults.h" */ -// Load file that sets GPUDefParameters +// Alternatively, logic to load file that sets GPUDefParameters #include "testParam.h" #include "GPUDefParametersLoad.inc" From 260d7d579864cb8ab91fd2d40278b52b66e7b2d8 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:05:43 +0200 Subject: [PATCH 0387/1914] GPU CUDA/HIP: Set warp size automatically, not hard-coded --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 9 +++------ .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 3 ++- GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h | 3 +++ GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h | 8 -------- 4 files changed, 8 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index f475929d49d50..47a9b675d27f6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -111,6 +111,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() constexpr int32_t reqVerMin = 0; #endif if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) { + mWarpSize = GPUCA_WARP_SIZE; genAndLoadRTC(); exit(0); } @@ -244,16 +245,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment); GPUInfo(" "); } - if (deviceProp.warpSize != GPUCA_WARP_SIZE) { + if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) { throw std::runtime_error("Invalid warp size on GPU"); } + mWarpSize = deviceProp.warpSize; mBlockCount = deviceProp.multiProcessorCount; mMaxBackendThreads = std::max(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount); -#ifndef __HIPCC__ // CUDA - mWarpSize = 32; -#else // HIP - mWarpSize = 64; -#endif mDeviceName = deviceProp.name; mDeviceName += " (CUDA GPU)"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5f481d2cb9058..abcd47ca01c90 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -73,7 +73,8 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } fclose(fp); } - const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true); + const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) + + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) { GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str()); } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 360fb1ffb5269..396934e17da82 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -275,6 +275,9 @@ #ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type + #ifndef GPUCA_WARP_SIZE + #define GPUCA_WARP_SIZE 32 + #endif #ifndef GPUCA_THREAD_COUNT_DEFAULT #define GPUCA_THREAD_COUNT_DEFAULT 256 #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 7f2bb271d18c8..114c8d229b493 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -38,14 +38,6 @@ #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) -#ifndef GPUCA_WARP_SIZE - #ifdef GPUCA_GPUCODE - #define GPUCA_WARP_SIZE 32 - #else - #define GPUCA_WARP_SIZE 1 - #endif -#endif - // #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling // #define GPUCA_KERNEL_DEBUGGER_OUTPUT From a87ed76713dd648eecba500001afc682663ad46f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:54:30 +0200 Subject: [PATCH 0388/1914] GPU: Switch some preprocessor defines to constexpr --- .../Definitions/GPUDefParametersWrapper.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 114c8d229b493..ce7889fe7fadd 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -27,14 +27,17 @@ #endif #include "GPUDefParametersConstants.h" -#ifdef GPUCA_GPUCODE - #define GPUCA_GET_THREAD_COUNT(...) GPUCA_M_FIRST(__VA_ARGS__) +namespace o2::gpu +{ +#if defined(GPUCA_GPUCODE) + GPUhdi() static constexpr uint32_t GPUCA_GET_THREAD_COUNT(uint32_t val, ...) { return val; } + GPUhdi() static constexpr uint32_t GPUCA_GET_WARP_COUNT(uint32_t val, ...) { return val / GPUCA_WARP_SIZE; } #else - #define GPUCA_GET_THREAD_COUNT(...) 1 // On the host, a thread is a block, and we run 1 "device thread" per block. + static constexpr uint32_t GPUCA_WARP_SIZE = 1; // On the host, a thread is a block is a warp, and we run 1 "device thread" per block. + #define GPUCA_GET_THREAD_COUNT(...) 1 // This must be a define not a constexpr function + #define GPUCA_GET_WARP_COUNT(...) 1 // since launch bound constants are not defined in host-code, and must evaluate to 1! #endif -#define GPUCA_GET_WARP_COUNT(...) (GPUCA_GET_THREAD_COUNT(__VA_ARGS__) / GPUCA_WARP_SIZE) - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) #define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) @@ -46,7 +49,7 @@ #if defined(GPUCA_GPUCODE) static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_M_FIRST(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); #endif // Derived parameters @@ -54,5 +57,7 @@ #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache #endif +} // namespace o2::gpu + // clang-format on #endif // GPUDEFPARAMETERSWRAPPER_H From 504bb9cb6102b535ff8f69e137b8d056cdbe043f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 20:55:00 +0200 Subject: [PATCH 0389/1914] GPU: Get rid of GPUCA_RTC_LB_..., use only GPUCA_LB_... --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- .../Base/cuda/GPUReconstructionCUDA.h | 3 +- .../GPUReconstructionCUDAIncludesSystem.h | 6 ++-- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 11 ------- .../cuda/GPUReconstructionCUDARTCCalls.cu | 32 +++++++++++++++++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +-- .../hip/GPUReconstructionHIPIncludesSystem.h | 6 ++-- .../Definitions/GPUDefParametersDefaults.h | 2 +- .../GPUDefParametersLoad.template.inc | 22 ++++++------- .../Definitions/GPUDefParametersWrapper.h | 4 +-- 10 files changed, 56 insertions(+), 36 deletions(-) create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index e17f1fcd7091e..3655eaf66055e 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -17,7 +17,7 @@ if(DEFINED CUDA_COMPUTETARGET) endif() message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}") -set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu) +set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDARTCCalls.cu) set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesSystem.h) # -------------------------------- Prepare RTC ------------------------------------------------------- enable_language(ASM) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ac5920f769f25..3441c6b9a4fd6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -45,8 +45,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase template void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - void getRTCKernelCalls(std::vector& kernels); - template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); GPUReconstructionCUDAInternals* mInternals; @@ -91,6 +89,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels& kernels); void genAndLoadRTC(); void loadKernelModules(bool perKernel); const char *mRtcSrcExtension = ".src", *mRtcBinExtension = ".o"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h index 1cb3679fc30dc..3f072059a9ad7 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h @@ -12,8 +12,8 @@ /// \file GPUReconstructionCUDAIncludesSystem.h /// \author David Rohr -#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H -#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H +#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H +#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H #include #include @@ -32,4 +32,4 @@ #include #include -#endif +#endif // O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index f8efd8428f035..cf08785e6b3d5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -118,14 +118,3 @@ static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstS return retVal; }); #endif - -void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector& kernels) -{ -#undef GPUCA_KRNL_LB -#undef __launch_bounds__ -#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); -#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_RTC_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL -#undef GPUCA_KRNL_LB -} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu new file mode 100644 index 0000000000000..571428dc39e21 --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu @@ -0,0 +1,32 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDARTCCalls.cu +/// \author David Rohr + +#define GPUCA_GPUCODE_HOSTONLY +#define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS + +#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args)) + +#include "GPUReconstructionCUDAIncludesSystem.h" +#include "GPUReconstructionCUDADef.h" +#include "GPUReconstructionCUDA.h" + +using namespace o2::gpu; + +void GPUReconstructionCUDA::getRTCKernelCalls(std::vector& kernels) +{ +#undef GPUCA_KRNL +#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__))); +#undef __launch_bounds__ +#include "GPUReconstructionKernelList.h" +} diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 9a9b1e36a167c..55211e5ff10f6 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) @@ -61,7 +61,7 @@ else() get_filename_component(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE) endif() -set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip) +set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPRTCCalls.hip) set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx) set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesSystem.h) diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h index cfe1121ef1089..1a3a1ff0108af 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h @@ -12,8 +12,8 @@ /// \file GPUReconstructionHIPIncludesSystem.h /// \author David Rohr -#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H -#define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H +#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H +#define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H #include #include @@ -25,4 +25,4 @@ #include #pragma GCC diagnostic pop -#endif +#endif // O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 396934e17da82..406fb5e8048d6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -14,7 +14,7 @@ // This file contains compile-time constants affecting the GPU performance. -#ifndef GPUDEFPARAMETERSDEFAULTS_H +#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. #define GPUDEFPARAMETERSDEFAULTS_H // clang-format off diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index edec6530c1c5d..938cedbdacc93 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -37,17 +37,17 @@ static GPUDefParameters GPUDefParametersLoad() }; } -#define GPUCA_EXPORT_KERNEL(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_" << (forRTC ? "RTC_" : "") << "LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index ce7889fe7fadd..b61312b7b04be 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,14 +22,14 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#ifndef GPUCA_GPUCODE_GENRTC +#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" namespace o2::gpu { -#if defined(GPUCA_GPUCODE) +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) GPUhdi() static constexpr uint32_t GPUCA_GET_THREAD_COUNT(uint32_t val, ...) { return val; } GPUhdi() static constexpr uint32_t GPUCA_GET_WARP_COUNT(uint32_t val, ...) { return val / GPUCA_WARP_SIZE; } #else From 731144f9b7ea49b5001e3c0acfca7edd2970b184 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 22:17:17 +0200 Subject: [PATCH 0390/1914] GPU HIP: Don't hipify cxx files --- GPU/GPUTracking/Base/hip/CMakeLists.txt | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 55211e5ff10f6..3a03a054d4a7e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -30,14 +30,24 @@ if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") foreach(file ${GPUCA_HIP_FILE_LIST}) get_filename_component(ABS_CUDA_SORUCE ../cuda/${file} ABSOLUTE) get_filename_component(CUDA_SOURCE ${file} NAME) + get_filename_component(CUDA_SOURCE_EXT ${file} EXT) string(REPLACE ".cu" ".hip" HIP_SOURCE1 ${CUDA_SOURCE}) string(REPLACE "CUDA" "HIP" HIP_SOURCE ${HIP_SOURCE1}) - add_custom_command( - OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} - COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} - DEPENDS ${ABS_CUDA_SORUCE} - COMMENT "Hippifying ${HIP_SOURCE}" - ) + if(CUDA_SOURCE_EXT STREQUAL ".cu" OR CUDA_SOURCE_EXT STREQUAL ".h") + add_custom_command( + OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + DEPENDS ${ABS_CUDA_SORUCE} + COMMENT "Hippifying ${HIP_SOURCE}" + ) + else() + add_custom_command( + OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + COMMAND sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' ${ABS_CUDA_SORUCE} > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE} + DEPENDS ${ABS_CUDA_SORUCE} + COMMENT "Generating HIP source ${HIP_SOURCE}" + ) + endif() list(APPEND HIP_SOURCES "${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}") endforeach() foreach(file ${GPUCA_HIP_LOCAL_FILE_LIST}) From 3812d54df627c334249c6ca29a4b7615694f7dbb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 22:31:00 +0200 Subject: [PATCH 0391/1914] GPU: Fix some global preprocessor defines that were used incorrectly --- GPU/Common/GPUCommonDefAPI.h | 2 +- .../GPUTPCCompressionKernels.cxx | 3 +++ .../DataCompression/GPUTPCCompressionKernels.h | 18 +++++++++++------- .../Definitions/GPUDefParametersDefaults.h | 2 -- .../Definitions/GPUDefParametersWrapper.h | 7 ------- .../SectorTracker/GPUTPCNeighboursFinder.h | 1 + .../SectorTracker/GPUTPCTrackletSelector.h | 1 + .../GPUTPCCFNoiseSuppression.h | 4 +++- .../GPUTPCCFStreamCompaction.h | 7 +++++++ .../GPUTPCNNClusterizerKernels.h | 8 +++++++- 10 files changed, 34 insertions(+), 19 deletions(-) diff --git a/GPU/Common/GPUCommonDefAPI.h b/GPU/Common/GPUCommonDefAPI.h index f7efbf7e976d4..b029038a3b521 100644 --- a/GPU/Common/GPUCommonDefAPI.h +++ b/GPU/Common/GPUCommonDefAPI.h @@ -43,7 +43,7 @@ #define GPUhd() // Host and device function, inlined during GPU compilation to avoid symbol clashes in host code #define GPUhdi() inline // Host and device function, to-be-inlined on host and device #define GPUhdni() // Host and device function, not to-be-inlined automatically - #define GPUg() INVALID_TRIGGER_ERROR_NO_HOST_CODE // GPU kernel + #define GPUg() INVALID_TRIGGER_ERROR_NO_GPU_CODE // GPU kernel #define GPUshared() // shared memory variable declaration #define GPUglobal() // global memory variable declaration (only used for kernel input pointers) #define GPUconstant() // constant memory variable declaraion diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 445c03113cd39..5dbbf63ca8264 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -258,6 +258,9 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h index b0bb8a6c12ecc..81817abf1e6d6 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h @@ -72,15 +72,19 @@ class GPUTPCCompressionGatherKernels : public GPUKernelTemplate using Vec64 = uint64_t; using Vec128 = uint4; - struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock)); + struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { union { - uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)]; - Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; + uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)]; + Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; struct { - uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; - uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE]; + uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; + uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE]; } unbuffered; }; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 406fb5e8048d6..83ef7d8cf810b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -492,14 +492,12 @@ #define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression #define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression - #ifdef GPUCA_HAS_ONNX #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #endif #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index b61312b7b04be..beeefa4eb5f9d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -45,13 +45,6 @@ namespace o2::gpu // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Some assertions to make sure the parameters are not invalid -#if defined(GPUCA_GPUCODE) - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP"); - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE"); - static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE"); -#endif - // Derived parameters #ifdef GPUCA_USE_TEXTURES #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 41b5eb8a4ffb8..1bf5000cfbe5c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -41,6 +41,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number #if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 + static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP); float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index 5009c672b030e..f487931bdaf4b 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -37,6 +37,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons #if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE); GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; #endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index f5d8f533df651..71236bc317443 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -34,7 +34,9 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate noiseSuppression = 0, updatePeaks = 1, }; - static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression); + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks)); + struct GPUSharedMemory { ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N]; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index 25d3588be6d17..a72907fe55e89 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -37,6 +37,13 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { }; +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); + static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); +#endif typedef GPUTPCClusterFinder processorType; GPUhdi() static processorType* Processor(GPUConstantMem& processors) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index e6c1dc508d6e4..a1d641fdb0b93 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -37,7 +37,13 @@ class MCLabelAccumulator; class GPUTPCNNClusterizerKernels : public GPUKernelTemplate { public: - static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels); + // Must all have same number of threads, since they use a common SCRATCH_PAD_WORK_GROUP_SIZE below + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer); struct GPUSharedMemory { // Regular cluster finder ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; From da00550e828dad4617bc4730797d154e4bf79858 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 13 Apr 2025 23:10:44 +0200 Subject: [PATCH 0392/1914] GPU: Temporarily move some defines back to the wrapper, to be cleaned up in another PR --- .../Definitions/GPUDefParametersDefaults.h | 40 --------------- .../Definitions/GPUDefParametersWrapper.h | 50 ++++++++++++++++++- 2 files changed, 49 insertions(+), 41 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 83ef7d8cf810b..ce703e2ceba4a 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -75,16 +75,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT_DEFAULT 256 @@ -138,16 +128,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 512 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -201,16 +181,6 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 384 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -256,16 +226,6 @@ #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index beeefa4eb5f9d..8d8815d8a8044 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,7 +22,55 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#if defined(GPUCA_GPUCODE) +#ifdef GPUCA_GPUCODE +#if defined(GPUCA_GPUTYPE_MI2xx) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_VEGA) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_AMPERE) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 +#elif defined(GPUCA_GPUTYPE_TURING) + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half +#endif +#endif + +#ifdef GPUCA_GPUCODE #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" From a946be89a02079eb7d8169b23137303cbbab8aa7 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 14 Apr 2025 11:17:23 +0200 Subject: [PATCH 0393/1914] GPU: remove automatic workaround for MI100 memory errors --- prodtests/full-system-test/dpl-workflow.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 2dfc74e3ecfb3..bb2712bedd92e 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -265,7 +265,6 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi - [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From 466ba06a25b9cebf15601a5db4a33b835fee170f Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 15 Apr 2025 13:21:21 +0200 Subject: [PATCH 0394/1914] DPL Analysis: introduce binned expression (#14174) --- .../include/Framework/ExpressionHelpers.h | 12 -- .../Core/include/Framework/Expressions.h | 131 +++++++++++++++++- Framework/Core/src/Expressions.cxx | 34 ++--- Framework/Core/test/test_Expressions.cxx | 27 +++- 4 files changed, 162 insertions(+), 42 deletions(-) diff --git a/Framework/Core/include/Framework/ExpressionHelpers.h b/Framework/Core/include/Framework/ExpressionHelpers.h index b531a39519272..f881abf7b0e6c 100644 --- a/Framework/Core/include/Framework/ExpressionHelpers.h +++ b/Framework/Core/include/Framework/ExpressionHelpers.h @@ -75,18 +75,6 @@ struct ColumnOperationSpec { result.type = type; } }; - -/// helper struct used to parse trees -struct NodeRecord { - /// pointer to the actual tree node - Node* node_ptr = nullptr; - size_t index = 0; - explicit NodeRecord(Node* node_, size_t index_) : node_ptr(node_), index{index_} {} - bool operator!=(NodeRecord const& rhs) - { - return this->node_ptr != rhs.node_ptr; - } -}; } // namespace o2::framework::expressions #endif // O2_FRAMEWORK_EXPRESSIONS_HELPERS_H_ diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 1d2883418de71..af89e56f85835 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -41,6 +41,7 @@ class Projector; #include #include #include +#include namespace gandiva { using Selection = std::shared_ptr; @@ -114,6 +115,8 @@ struct LiteralNode { { } + LiteralNode(LiteralNode const& other) = default; + using var_t = LiteralValue::stored_type; var_t value; atype::type type = atype::NA; @@ -132,6 +135,7 @@ struct BindingNode { /// An expression tree node corresponding to binary or unary operation struct OpNode { OpNode(BasicOp op_) : op{op_} {} + OpNode(OpNode const& other) = default; BasicOp op; }; @@ -147,6 +151,8 @@ struct PlaceholderNode : LiteralNode { } } + PlaceholderNode(PlaceholderNode const& other) = default; + void reset(InitContext& context) { value = retrieve(context, name.data()); @@ -156,6 +162,28 @@ struct PlaceholderNode : LiteralNode { LiteralNode::var_t (*retrieve)(InitContext&, char const*); }; +/// A placeholder node for parameters taken from an array +struct ParameterNode : LiteralNode { + ParameterNode(int index_ = -1) + : LiteralNode((float)0), + index{index_} + { + } + + ParameterNode(ParameterNode const&) = default; + + template + void reset(T value_, int index_ = -1) + { + (*static_cast(this)) = LiteralNode(value_); + if (index_ > 0) { + index = index_; + } + } + + int index; +}; + /// A conditional node struct ConditionalNode { }; @@ -178,6 +206,10 @@ struct Node { { } + Node(ParameterNode&& p) : self{std::forward(p)}, left{nullptr}, right{nullptr}, condition{nullptr} + { + } + Node(ConditionalNode op, Node&& then_, Node&& else_, Node&& condition_) : self{op}, left{std::make_unique(std::forward(then_))}, @@ -196,16 +228,70 @@ struct Node { right{nullptr}, condition{nullptr} {} + Node(Node const& other) + : self{other.self}, + index{other.index} + { + if (other.left != nullptr) { + left = std::make_unique(*other.left); + } + if (other.right != nullptr) { + right = std::make_unique(*other.right); + } + if (other.condition != nullptr) { + condition = std::make_unique(*other.condition); + } + } + /// variant with possible nodes - using self_t = std::variant; + using self_t = std::variant; self_t self; size_t index = 0; /// pointers to children - std::unique_ptr left; - std::unique_ptr right; - std::unique_ptr condition; + std::unique_ptr left = nullptr; + std::unique_ptr right = nullptr; + std::unique_ptr condition = nullptr; +}; + +/// helper struct used to parse trees +struct NodeRecord { + /// pointer to the actual tree node + Node* node_ptr = nullptr; + size_t index = 0; + explicit NodeRecord(Node* node_, size_t index_) : node_ptr(node_), index{index_} {} + bool operator!=(NodeRecord const& rhs) + { + return this->node_ptr != rhs.node_ptr; + } }; +/// Tree-walker helper +template +void walk(Node* head, L const& pred) +{ + std::stack path; + path.emplace(head, 0); + while (!path.empty()) { + auto& top = path.top(); + pred(top.node_ptr); + + auto* leftp = top.node_ptr->left.get(); + auto* rightp = top.node_ptr->right.get(); + auto* condp = top.node_ptr->condition.get(); + path.pop(); + + if (leftp != nullptr) { + path.emplace(leftp, 0); + } + if (rightp != nullptr) { + path.emplace(rightp, 0); + } + if (condp != nullptr) { + path.emplace(condp, 0); + } + } +} + /// overloaded operators to build the tree from an expression #define BINARY_OP_NODES(_operator_, _operation_) \ @@ -402,6 +488,43 @@ inline Node ifnode(Node&& condition_, Configurable const& then_, Configurabl return Node{ConditionalNode{}, PlaceholderNode{then_}, PlaceholderNode{else_}, std::forward(condition_)}; } +/// Parameters +inline Node par(int index) +{ + return Node{ParameterNode{index}}; +} + +/// binned functional +template +inline Node binned(std::vector const& binning, std::vector const& parameters, Node&& binned, Node&& pexp, Node&& out) +{ + int bins = binning.size() - 1; + const auto binned_copy = binned; + const auto out_copy = out; + auto root = ifnode(Node{binned_copy} < binning[0], Node{out_copy}, LiteralNode{-1}); + auto* current = &root; + for (auto i = 0; i < bins; ++i) { + current->right = std::make_unique(ifnode(Node{binned_copy} < binning[i + 1], updateParameters(pexp, bins, parameters, i), LiteralNode{-1})); + current = current->right.get(); + } + current->right = std::make_unique(out); + return root; +} + +template +Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) +{ + Node result{pexp}; + auto updateParameter = [&bins, ¶meters, &bin](Node* node) { + if (node->self.index() == 5) { + auto* n = std::get_if<5>(&node->self); + n->reset(parameters[n->index * bins + bin]); + } + }; + walk(&result, updateParameter); + return result; +} + /// A struct, containing the root of the expression tree struct Filter { Filter() = default; diff --git a/Framework/Core/src/Expressions.cxx b/Framework/Core/src/Expressions.cxx index 45bb120b6eb74..6f646515b7837 100644 --- a/Framework/Core/src/Expressions.cxx +++ b/Framework/Core/src/Expressions.cxx @@ -118,6 +118,13 @@ struct PlaceholderNodeHelper { return DatumSpec{node.value, node.type}; } }; + +struct ParameterNodeHelper { + DatumSpec operator()(ParameterNode const& node) const + { + return DatumSpec{node.value, node.type}; + } +}; } // namespace std::shared_ptr concreteArrowType(atype::type type) @@ -189,37 +196,13 @@ std::ostream& operator<<(std::ostream& os, DatumSpec const& spec) void updatePlaceholders(Filter& filter, InitContext& context) { - std::stack path; - - // insert the top node into stack - path.emplace(filter.node.get(), 0); - auto updateNode = [&](Node* node) { if (node->self.index() == 3) { std::get_if<3>(&node->self)->reset(context); } }; - // while the stack is not empty - while (!path.empty()) { - auto& top = path.top(); - updateNode(top.node_ptr); - - auto* leftp = top.node_ptr->left.get(); - auto* rightp = top.node_ptr->right.get(); - auto* condp = top.node_ptr->condition.get(); - path.pop(); - - if (leftp != nullptr) { - path.emplace(leftp, 0); - } - if (rightp != nullptr) { - path.emplace(rightp, 0); - } - if (condp != nullptr) { - path.emplace(condp, 0); - } - } + expressions::walk(filter.node.get(), updateNode); } const char* stringType(atype::type t) @@ -267,6 +250,7 @@ Operations createOperations(Filter const& expression) [lh = LiteralNodeHelper{}](LiteralNode const& node) { return lh(node); }, [bh = BindingNodeHelper{}](BindingNode const& node) { return bh(node); }, [ph = PlaceholderNodeHelper{}](PlaceholderNode const& node) { return ph(node); }, + [pr = ParameterNodeHelper{}](ParameterNode const& node) { return pr(node); }, [](auto&&) { return DatumSpec{}; }}, node->self); }; diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 8b08a9a38aa63..2296b5dcbfbc4 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -12,7 +12,6 @@ #include "Framework/Configurable.h" #include "Framework/ExpressionHelpers.h" #include "Framework/AnalysisDataModel.h" -#include "Framework/AODReaderHelpers.h" #include #include @@ -283,3 +282,29 @@ TEST_CASE("TestConditionalExpressions") auto gandiva_filter2 = createFilter(schema2, gandiva_condition2); REQUIRE(gandiva_tree2->ToString() == "bool greater_than((float) fSigned1Pt, (const float) 0 raw(0)) && if (bool less_than(float absf((float) fEta), (const float) 1 raw(3f800000)) && if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { bool greater_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) } else { bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) }) { bool greater_than(float absf((float) fX), (const float) 1 raw(3f800000)) } else { bool greater_than(float absf((float) fY), (const float) 1 raw(3f800000)) }"); } + +TEST_CASE("TestBinnedExpressions") +{ + std::vector bins{0.5, 1.5, 2.5, 3.5, 4.5}; + std::vector params{1.0, 1.1, 1.2, 1.3, 2.0, 2.1, 2.2, 2.3, 3.0, 3.1, 3.2, 3.3, 4.0, 4.1, 4.2, 4.3}; + Projector p = binned(bins, params, o2::aod::track::pt, par(0) * o2::aod::track::x + par(1) * o2::aod::track::y + par(2) * o2::aod::track::z + par(3) * o2::aod::track::phi, LiteralNode{0.f}); + auto pspecs = createOperations(p); + auto schema = std::make_shared(std::vector{o2::aod::track::Pt::asArrowField(), o2::aod::track::X::asArrowField(), o2::aod::track::Y::asArrowField(), o2::aod::track::Z::asArrowField(), o2::aod::track::Phi::asArrowField()}); + auto tree = createExpressionTree(pspecs, schema); + REQUIRE(tree->ToString() == "if (bool less_than((float) fPt, (const float) 0.5 raw(3f000000))) { (const float) 0 raw(0) } else { if (bool less_than((float) fPt, (const float) 1.5 raw(3fc00000))) { float add(float add(float add(float multiply((const float) 1 raw(3f800000), (float) fX), float multiply((const float) 2 raw(40000000), (float) fY)), float multiply((const float) 3 raw(40400000), (float) fZ)), float multiply((const float) 4 raw(40800000), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 2.5 raw(40200000))) { float add(float add(float add(float multiply((const float) 1.1 raw(3f8ccccd), (float) fX), float multiply((const float) 2.1 raw(40066666), (float) fY)), float multiply((const float) 3.1 raw(40466666), (float) fZ)), float multiply((const float) 4.1 raw(40833333), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 3.5 raw(40600000))) { float add(float add(float add(float multiply((const float) 1.2 raw(3f99999a), (float) fX), float multiply((const float) 2.2 raw(400ccccd), (float) fY)), float multiply((const float) 3.2 raw(404ccccd), (float) fZ)), float multiply((const float) 4.2 raw(40866666), (float) fPhi)) } else { if (bool less_than((float) fPt, (const float) 4.5 raw(40900000))) { float add(float add(float add(float multiply((const float) 1.3 raw(3fa66666), (float) fX), float multiply((const float) 2.3 raw(40133333), (float) fY)), float multiply((const float) 3.3 raw(40533333), (float) fZ)), float multiply((const float) 4.3 raw(4089999a), (float) fPhi)) } else { (const float) 0 raw(0) } } } } }"); + + std::vector binning{0, o2::constants::math::PIHalf, o2::constants::math::PI, o2::constants::math::PI + o2::constants::math::PIHalf, o2::constants::math::TwoPI}; + std::vector parameters{1.0, 1.1, 1.2, 1.3, // par 0 + 2.0, 2.1, 2.2, 2.3, // par 1 + 3.0, 3.1, 3.2, 3.3, // par 2 + 4.0, 4.1, 4.2, 4.3}; // par 3 + + Projector p2 = binned((std::vector)binning, + (std::vector)parameters, + o2::aod::track::phi, par(0) * o2::aod::track::x * o2::aod::track::x + par(1) * o2::aod::track::y * o2::aod::track::y + par(2) * o2::aod::track::z * o2::aod::track::z, + LiteralNode{-1.f}); + auto p2specs = createOperations(p2); + auto schema2 = std::make_shared(std::vector{o2::aod::track::Phi::asArrowField(), o2::aod::track::X::asArrowField(), o2::aod::track::Y::asArrowField(), o2::aod::track::Z::asArrowField()}); + auto tree2 = createExpressionTree(p2specs, schema2); + REQUIRE(tree2->ToString() == "if (bool less_than((float) fPhi, (const float) 0 raw(0))) { (const float) -1 raw(bf800000) } else { if (bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb))) { float add(float add(float multiply(float multiply((const float) 1 raw(3f800000), (float) fX), (float) fX), float multiply(float multiply((const float) 2 raw(40000000), (float) fY), (float) fY)), float multiply(float multiply((const float) 3 raw(40400000), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 3.14159 raw(40490fdb))) { float add(float add(float multiply(float multiply((const float) 1.1 raw(3f8ccccd), (float) fX), (float) fX), float multiply(float multiply((const float) 2.1 raw(40066666), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.1 raw(40466666), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 4.71239 raw(4096cbe4))) { float add(float add(float multiply(float multiply((const float) 1.2 raw(3f99999a), (float) fX), (float) fX), float multiply(float multiply((const float) 2.2 raw(400ccccd), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.2 raw(404ccccd), (float) fZ), (float) fZ)) } else { if (bool less_than((float) fPhi, (const float) 6.28319 raw(40c90fdb))) { float add(float add(float multiply(float multiply((const float) 1.3 raw(3fa66666), (float) fX), (float) fX), float multiply(float multiply((const float) 2.3 raw(40133333), (float) fY), (float) fY)), float multiply(float multiply((const float) 3.3 raw(40533333), (float) fZ), (float) fZ)) } else { (const float) -1 raw(bf800000) } } } } }"); +} From 64ef6b929885f2f422d22eb6d2d824a50b40e463 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:51 +0200 Subject: [PATCH 0395/1914] Revert "GPU: Temporarily move some defines back to the wrapper, to be cleaned up in another PR" This reverts commit da00550e828dad4617bc4730797d154e4bf79858. --- .../Definitions/GPUDefParametersDefaults.h | 40 +++++++++++++++ .../Definitions/GPUDefParametersWrapper.h | 50 +------------------ 2 files changed, 41 insertions(+), 49 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index ce703e2ceba4a..83ef7d8cf810b 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -75,6 +75,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_VEGA) #define GPUCA_WARP_SIZE 64 #define GPUCA_THREAD_COUNT_DEFAULT 256 @@ -128,6 +138,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 512 #define GPUCA_LB_GPUTPCCFClusterizer 512 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_AMPERE) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -181,6 +201,16 @@ #define GPUCA_LB_GPUTPCCFDeconvolution 384 #define GPUCA_LB_GPUTPCCFClusterizer 448 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 #elif defined(GPUCA_GPUTYPE_TURING) #define GPUCA_WARP_SIZE 32 #define GPUCA_THREAD_COUNT_DEFAULT 512 @@ -226,6 +256,16 @@ #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_ALTERNATE_BORDER_SORT 1 + #define GPUCA_SORT_BEFORE_FIT 1 + #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_NO_ATOMIC_PRECHECK 1 + #define GPUCA_COMP_GATHER_KERNEL 4 + #define GPUCA_COMP_GATHER_MODE 3 + #define GPUCA_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index 8d8815d8a8044..beeefa4eb5f9d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,55 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half -#endif -#endif - -#ifdef GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" #endif #include "GPUDefParametersConstants.h" From efbf1474d8f2dcf42572bd7abdd7439580e8f7ba Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 15 Apr 2025 14:10:39 +0200 Subject: [PATCH 0396/1914] GPU: Move compile-time constant parameters to new scheme with runtimeParameter struct and automatic RTC-generated defines With this, we can revert the workaround to have the parameters as defines in the wrapper file --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 - GPU/GPUTracking/Base/GPUReconstruction.h | 3 +- .../Base/GPUReconstructionIncludes.h | 19 +- .../Base/GPUReconstructionProcessing.h | 1 + .../Base/cuda/GPUReconstructionCUDA.cu | 5 - .../Base/cuda/GPUReconstructionCUDA.h | 1 - .../Base/opencl/GPUReconstructionOCL.cxx | 5 - .../Base/opencl/GPUReconstructionOCL.h | 1 - GPU/GPUTracking/CMakeLists.txt | 9 +- .../DataCompression/GPUTPCCompression.cxx | 16 +- .../Definitions/GPUDefConstantsAndSettings.h | 2 +- .../Definitions/GPUDefParametersConstants.h | 45 +- .../Definitions/GPUDefParametersDefaults.h | 567 ++++++++++-------- .../GPUDefParametersLoad.template.inc | 35 +- .../GPUDefParametersRuntime.template.h | 4 + .../Definitions/GPUDefParametersWrapper.h | 6 +- GPU/GPUTracking/Global/GPUChainTracking.cxx | 8 +- .../Global/GPUChainTrackingCompression.cxx | 25 +- .../Global/GPUChainTrackingMerger.cxx | 10 +- .../Global/GPUChainTrackingSectorTracker.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 10 +- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.h | 10 +- .../SectorTracker/GPUTPCStartHitsFinder.cxx | 4 +- .../SectorTracker/GPUTPCTracker.cxx | 6 +- .../SectorTracker/GPUTPCTrackletSelector.cxx | 18 +- .../SectorTracker/GPUTPCTrackletSelector.h | 8 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 19 + GPU/GPUTracking/dEdx/GPUdEdx.cxx | 4 +- GPU/GPUTracking/dEdx/GPUdEdx.h | 14 +- GPU/GPUTracking/kernels.cmake | 13 + 34 files changed, 466 insertions(+), 419 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 2f643706647ee..b4dac39ae1cd2 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -292,8 +292,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.delayedOutput = false; } - UpdateAutomaticProcessingSettings(); - GPUCA_GPUReconstructionUpdateDefaults(); if (!mProcessingSettings.rtc.enable) { mProcessingSettings.rtc.optConstexpr = false; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 18098396e1349..23fb6e4d9ff06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -48,6 +48,7 @@ struct GPUReconstructionPipelineContext; struct GPUReconstructionThreading; class GPUROOTDumpCore; class ThrustVolatileAllocator; +struct GPUDefParameters; namespace gpu_reconstruction_kernels { @@ -205,6 +206,7 @@ class GPUReconstruction GPUOutputControl& OutputControl() { return mOutputControl; } uint32_t NStreams() const { return mNStreams; } const void* DeviceMemoryBase() const { return mDeviceMemoryBase; } + virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0; RecoStepField GetRecoSteps() const { return mRecoSteps.steps; } RecoStepField GetRecoStepsGPU() const { return mRecoSteps.stepsGPUMask; } @@ -239,7 +241,6 @@ class GPUReconstruction void FreeRegisteredMemory(GPUMemoryResource* res); GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor int32_t InitPhaseBeforeDevice(); - virtual void UpdateAutomaticProcessingSettings() {} virtual int32_t InitDevice() = 0; int32_t InitPhasePermanentMemory(); int32_t InitPhaseAfterDevice(); diff --git a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h index 6aba7e30a49d7..d3f11d86a731d 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIncludes.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIncludes.h @@ -29,21 +29,4 @@ #include #include -#define GPUCA_GPUReconstructionUpdateDefaults() \ - if (mProcessingSettings.alternateBorderSort < 0) { \ - mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \ - } \ - if (mProcessingSettings.mergerSortTracks < 0) { \ - mProcessingSettings.mergerSortTracks = GPUCA_SORT_BEFORE_FIT; \ - } \ - if (param().rec.tpc.looperInterpolationInExtraPass < 0) { \ - param().rec.tpc.looperInterpolationInExtraPass = GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION; \ - } \ - if (mProcessingSettings.tpcCompressionGatherModeKernel < 0) { \ - mProcessingSettings.tpcCompressionGatherModeKernel = GPUCA_COMP_GATHER_KERNEL; \ - } \ - if (mProcessingSettings.tpcCompressionGatherMode < 0) { \ - mProcessingSettings.tpcCompressionGatherMode = GPUCA_COMP_GATHER_MODE; \ - } - -#endif +#endif // GPURECONSTRUCTIONINCLUDES_H diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 2428027118c0a..e8892c4be702b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -101,6 +101,7 @@ class GPUReconstructionProcessing : public GPUReconstruction uint32_t countToGPU = 0; uint32_t countToHost = 0; }; + const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); } protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 47a9b675d27f6..3bea91994ba86 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -99,11 +99,6 @@ void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr } } -void GPUReconstructionCUDA::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionCUDA::InitDevice_Runtime() { #ifndef __HIPCC__ // CUDA diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 3441c6b9a4fd6..b1a3a53a6a62f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -59,7 +59,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels GetThreadContext() override; void SynchronizeGPU() override; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index af26bfc7aeca8..e276f83413bbc 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -60,11 +60,6 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons return error != CL_SUCCESS; } -void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings() -{ - GPUCA_GPUReconstructionUpdateDefaults(); -} - int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() { if (mMaster == nullptr) { diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 16ef9b5e87fe8..abde42f01f073 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -37,7 +37,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; - void UpdateAutomaticProcessingSettings() override; virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index f428d982394e0..eaeec508ff27a 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -247,14 +247,17 @@ foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) endforeach() +set(GPUDEFPARAMETERSLBLIST "$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>\n") +string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>") file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase - CONTENT "$,REPLACE,[^A-Za-z0-9]+,_>,\n>" -) + CONTENT ${GPUDEFPARAMETERSLBLIST}) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h - COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h COMMENT "Generating GPUDefParametersLoadPrepare.h" + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase VERBATIM COMMAND_EXPAND_LISTS ) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 335b201d11d07..8a22545314252 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -16,6 +16,7 @@ #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" +#include "GPUDefParametersRuntime.h" using namespace o2::gpu; @@ -36,11 +37,12 @@ void* GPUTPCCompression::SetPointersOutputHost(void* mem) void* GPUTPCCompression::SetPointersScratch(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mClusterStatus, mMaxClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode >= 2) { + if (gatherMode >= 2) { computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTracks); } - if (mRec->GetProcessingSettings().tpcCompressionGatherMode != 1) { + if (gatherMode != 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -48,8 +50,9 @@ void* GPUTPCCompression::SetPointersScratch(void* mem) void* GPUTPCCompression::SetPointersOutput(void* mem) { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTrackClusters); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false); } return mem; @@ -102,12 +105,13 @@ void* GPUTPCCompression::SetPointersMemory(void* mem) void GPUTPCCompression::RegisterMemoryAllocation() { AllocateAndInitializeLate(); + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; mMemoryResOutputHost = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputHost, GPUMemoryResource::MEMORY_OUTPUT_FLAG | GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_CUSTOM, "TPCCompressionOutputHost"); - if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU"); } - uint32_t stackScratch = (mRec->GetProcessingSettings().tpcCompressionGatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; - if (mRec->GetProcessingSettings().tpcCompressionGatherMode < 2) { + uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0; + if (gatherMode < 2) { mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput"); } mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | stackScratch, "TPCCompressionScratch"); diff --git a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h index 2d7aca8d71b92..48218dd7859e6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h +++ b/GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h @@ -13,7 +13,7 @@ /// \author David Rohr // This files contains compile-time constants affecting the GPU algorithms / reconstruction results. -// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h +// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters*.h #ifndef GPUDEFCONSTANTSANDSETTINGS_H #define GPUDEFCONSTANTSANDSETTINGS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h index 3a16d02ecf7c6..dd4a5dcbe7ba8 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -21,16 +21,12 @@ #define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! #if defined(__CUDACC__) || defined(__HIPCC__) - #define GPUCA_SPECIALIZE_THRUST_SORTS + #define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant #endif #define GPUCA_MAX_THREADS 1024 #define GPUCA_MAX_STREAMS 36 -#if defined(GPUCA_GPUCODE) - #define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU -#endif - #define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid #define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers #define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks @@ -44,44 +40,5 @@ #define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread #define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread -#ifdef GPUCA_GPUCODE - #ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 - #endif - #ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12 - #endif - #ifndef GPUCA_ALTERNATE_BORDER_SORT - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #endif - #ifndef GPUCA_SORT_BEFORE_FIT - #define GPUCA_SORT_BEFORE_FIT 0 - #endif - #ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #endif - #ifndef GPUCA_COMP_GATHER_KERNEL - #define GPUCA_COMP_GATHER_KERNEL 0 - #endif - #ifndef GPUCA_COMP_GATHER_MODE - #define GPUCA_COMP_GATHER_MODE 2 - #endif -#else - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0 - #define GPUCA_ALTERNATE_BORDER_SORT 0 - #define GPUCA_SORT_BEFORE_FIT 0 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0 - #define GPUCA_THREAD_COUNT_FINDER 1 - #define GPUCA_COMP_GATHER_KERNEL 0 - #define GPUCA_COMP_GATHER_MODE 0 -#endif -#ifndef GPUCA_DEDX_STORAGE_TYPE - #define GPUCA_DEDX_STORAGE_TYPE float -#endif -#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float -#endif - // clang-format on #endif // GPUDEFPARAMETERSCONSTANTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 83ef7d8cf810b..b212abbcd2707 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -14,266 +14,265 @@ // This file contains compile-time constants affecting the GPU performance. -#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. +#if !defined(GPUDEFPARAMETERSDEFAULTS_H) #define GPUDEFPARAMETERSDEFAULTS_H // clang-format off // Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds) // GPU Run Configuration -#ifdef GPUCA_GPUCODE -#if defined(GPUCA_GPUTYPE_MI2xx) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_VEGA) - #define GPUCA_WARP_SIZE 64 - #define GPUCA_THREAD_COUNT_DEFAULT 256 - #define GPUCA_LB_GPUTPCCreateTrackingData 128 - #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 - #define GPUCA_LB_GPUTPCStartHitsFinder 1024 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 - #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 896 - #define GPUCA_LB_GPUTPCExtrapolationTracking 256 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 512 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 - #define GPUCA_LB_GPUTPCCFPeakFinder 512 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 - #define GPUCA_LB_GPUTPCCFDeconvolution 512 - #define GPUCA_LB_GPUTPCCFClusterizer 512 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_AMPERE) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 384 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 - #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 - #define GPUCA_LB_GPUTPCCFPeakFinder 128 - #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 - #define GPUCA_LB_GPUTPCCFDeconvolution 384 - #define GPUCA_LB_GPUTPCCFClusterizer 448 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 -#elif defined(GPUCA_GPUTYPE_TURING) - #define GPUCA_WARP_SIZE 32 - #define GPUCA_THREAD_COUNT_DEFAULT 512 - #define GPUCA_LB_GPUTPCCreateTrackingData 256 - #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 - #define GPUCA_LB_GPUTPCStartHitsFinder 512 - #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 - #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 - #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 - #define GPUCA_LB_GPUTPCNeighboursCleaner 512 - #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 - #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 - #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE - #define GPUCA_LB_GPUTPCCFGather 1024, 1 - #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 - #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 - #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 - #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 - #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 - #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 - #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 - #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 - #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 - #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 - #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 - #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 - #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 - #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 - #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 - #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 - #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 - #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 - #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 - #define GPUCA_LB_COMPRESSION_GATHER 1024 - #define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 - #define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 20 - #define GPUCA_ALTERNATE_BORDER_SORT 1 - #define GPUCA_SORT_BEFORE_FIT 1 - #define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 1 - #define GPUCA_NO_ATOMIC_PRECHECK 1 - #define GPUCA_COMP_GATHER_KERNEL 4 - #define GPUCA_COMP_GATHER_MODE 3 - #define GPUCA_DEDX_STORAGE_TYPE uint16_t - #define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE half - // #define GPUCA_USE_TEXTURES -#elif defined(GPUCA_GPUTYPE_OPENCL) -#else - #error GPU TYPE NOT SET -#endif -#endif // GPUCA_GPUCODE +#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. + // GPU-architecture-dependent default settings + #if defined(GPUCA_GPUTYPE_MI2xx) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 512, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 128, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 512 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 512 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 512 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 512 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 512 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_VEGA) + #define GPUCA_WARP_SIZE 64 + #define GPUCA_THREAD_COUNT_DEFAULT 256 + #define GPUCA_LB_GPUTPCCreateTrackingData 128 + #define GPUCA_LB_GPUTPCStartHitsSorter 1024, 2 + #define GPUCA_LB_GPUTPCStartHitsFinder 1024 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 256, 8 + #define GPUCA_LB_GPUTPCNeighboursFinder 1024, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 896 + #define GPUCA_LB_GPUTPCExtrapolationTracking 256 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 4 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 1 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 256, 4, 200 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 256 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 512 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 192, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 128, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 64, 2 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 512 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 512 + #define GPUCA_LB_GPUTPCCFPeakFinder 512 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 512 + #define GPUCA_LB_GPUTPCCFDeconvolution 512 + #define GPUCA_LB_GPUTPCCFClusterizer 512 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 5 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_AMPERE) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 384 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 // best single-kernel: 128, 4 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 // best single-kernel: 768, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 128, 4 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 10 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 64, 4 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 64, 12 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 32, 6 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 64, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 256, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 64, 2 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 3 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_GPUTPCCFCheckPadBaseline 64,8 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillIndexMap 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_fillFromDigits 448 + #define GPUCA_LB_GPUTPCCFChargeMapFiller_findFragmentStart 448 + #define GPUCA_LB_GPUTPCCFPeakFinder 128 + #define GPUCA_LB_GPUTPCCFNoiseSuppression 448 + #define GPUCA_LB_GPUTPCCFDeconvolution 384 + #define GPUCA_LB_GPUTPCCFClusterizer 448 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #elif defined(GPUCA_GPUTYPE_TURING) + #define GPUCA_WARP_SIZE 32 + #define GPUCA_THREAD_COUNT_DEFAULT 512 + #define GPUCA_LB_GPUTPCCreateTrackingData 256 + #define GPUCA_LB_GPUTPCStartHitsSorter 512, 1 + #define GPUCA_LB_GPUTPCStartHitsFinder 512 + #define GPUCA_LB_GPUTPCTrackletConstructor 256, 2 + #define GPUCA_LB_GPUTPCTrackletSelector 192, 3 + #define GPUCA_LB_GPUTPCNeighboursFinder 640, 1 + #define GPUCA_LB_GPUTPCNeighboursCleaner 512 + #define GPUCA_LB_GPUTPCExtrapolationTracking 192, 2 + #define GPUCA_LB_GPUTPCCFDecodeZS 64, 8 + #define GPUCA_LB_GPUTPCCFDecodeZSLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFDecodeZSDenseLink GPUCA_WARP_SIZE + #define GPUCA_LB_GPUTPCCFGather 1024, 1 + #define GPUCA_LB_GPUTPCGMMergerTrackFit 32, 8 + #define GPUCA_LB_GPUTPCGMMergerFollowLoopers 128, 4 + #define GPUCA_LB_GPUTPCGMMergerSectorRefit 64, 5 + #define GPUCA_LB_GPUTPCGMMergerUnpackResetIds 256 + #define GPUCA_LB_GPUTPCGMMergerUnpackGlobal 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step0 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step1 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step2 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step3 256 + #define GPUCA_LB_GPUTPCGMMergerResolve_step4 256, 4 + #define GPUCA_LB_GPUTPCGMMergerClearLinks 256 + #define GPUCA_LB_GPUTPCGMMergerMergeWithinPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerMergeSectorsPrepare 256, 2 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step0 192 + #define GPUCA_LB_GPUTPCGMMergerMergeBorders_step2 256 + #define GPUCA_LB_GPUTPCGMMergerMergeCE 256 + #define GPUCA_LB_GPUTPCGMMergerLinkExtrapolatedTracks 256 + #define GPUCA_LB_GPUTPCGMMergerCollect 128, 2 + #define GPUCA_LB_GPUTPCGMMergerSortTracksPrepare 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step0 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step1 256 + #define GPUCA_LB_GPUTPCGMMergerPrepareClusters_step2 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_0 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_1 256 + #define GPUCA_LB_GPUTPCGMMergerFinalize_2 256 + #define GPUCA_LB_GPUTPCCompressionKernels_step0attached 128 + #define GPUCA_LB_GPUTPCCompressionKernels_step1unattached 512, 2 + #define GPUCA_LB_GPUTPCDecompressionKernels_step0attached 32, 1 + #define GPUCA_LB_GPUTPCDecompressionKernels_step1unattached 32, 1 + #define GPUCA_LB_COMPRESSION_GATHER 1024 + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 4 + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 20 + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 1 + #define GPUCA_PAR_SORT_BEFORE_FIT 1 + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 1 + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 1 + #define GPUCA_PAR_COMP_GATHER_KERNEL 4 + #define GPUCA_PAR_COMP_GATHER_MODE 3 + #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half + // #define GPUCA_USE_TEXTURES + #elif defined(GPUCA_GPUTYPE_OPENCL) + #else + #error GPU TYPE NOT SET + #endif -#ifdef GPUCA_GPUCODE // Default settings for GPU, if not already set for selected GPU type #ifndef GPUCA_WARP_SIZE #define GPUCA_WARP_SIZE 32 @@ -509,7 +508,67 @@ #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock GPUCA_LB_COMPRESSION_GATHER -#endif + + // Defaults for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 1 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 2 + #endif +#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) + +#ifndef GPUCA_GPUCODE_GENRTC + // Defaults (also for CPU) for non-LB parameters + #ifndef GPUCA_PAR_SORT_STARTHITS + #define GPUCA_PAR_SORT_STARTHITS 0 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP + #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 + #endif + #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE + #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 + #endif + #ifndef GPUCA_PAR_ALTERNATE_BORDER_SORT + #define GPUCA_PAR_ALTERNATE_BORDER_SORT 0 + #endif + #ifndef GPUCA_PAR_SORT_BEFORE_FIT + #define GPUCA_PAR_SORT_BEFORE_FIT 0 + #endif + #ifndef GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION + #define GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_KERNEL + #define GPUCA_PAR_COMP_GATHER_KERNEL 0 + #endif + #ifndef GPUCA_PAR_COMP_GATHER_MODE + #define GPUCA_PAR_COMP_GATHER_MODE 0 + #endif + #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE + #define GPUCA_PAR_DEDX_STORAGE_TYPE float + #endif + #ifndef GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE + #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE float + #endif +#endif // GPUCA_GPUCODE_GENRTC // clang-format on #endif // GPUDEFPARAMETERSDEFAULTS_H diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc index 938cedbdacc93..ac71adc6232a6 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc +++ b/GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc @@ -32,29 +32,38 @@ static GPUDefParameters GPUDefParametersLoad() // clang-format off {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_LB_EMPTY0(GPUCA_M_FIRST(GPUCA_LB_>,APPEND,))>,$>}, {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0))>,$>}, - {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>} + {$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_SHIFT(GPUCA_LB_>,APPEND,$0$0)))>,$>}, + $,PREPEND,GPUCA_PAR_>,$>, + $,PREPEND,GPUCA_M_STR(GPUCA_PAR_>,APPEND,)>,$> // clang-format on }; } -#define GPUCA_EXPORT_KERNEL(name) \ - if (par.par_LB_maxThreads[i] > 0) { \ - o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ - if (par.par_LB_minBlocks[i] > 0) { \ - o << ", " << par.par_LB_minBlocks[i]; \ - } \ - if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ - o << ", " << par.par_LB_forceBlocks[i]; \ - } \ - o << "\n"; \ - } \ +#define GPUCA_EXPORT_KERNEL_LB(name) \ + if (par.par_LB_maxThreads[i] > 0) { \ + o << "#define GPUCA_LB_" GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \ + if (par.par_LB_minBlocks[i] > 0) { \ + o << ", " << par.par_LB_minBlocks[i]; \ + } \ + if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \ + o << ", " << par.par_LB_forceBlocks[i]; \ + } \ + o << "\n"; \ + } \ i++; +#define GPUCA_EXPORT_KERNEL_PARAM(name) \ + o << "#define GPUCA_PAR_" GPUCA_M_STR(name) " " << GPUCA_M_CAT(par.par_, name) << "\n"; + static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC) { std::stringstream o; // clang-format off int32_t i = 0; - $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL(>,APPEND,)>, + $,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,GPUCA_EXPORT_KERNEL_LB(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, + > + $,PREPEND,GPUCA_EXPORT_KERNEL_PARAM(>,APPEND,)>, > return o.str(); // clang-format on } diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h index f3537c058a824..d023de7916676 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersRuntime.template.h @@ -21,6 +21,10 @@ struct GPUDefParameters { // clang-format off int32_t par_LB_maxThreads[$>] = {}; int32_t par_LB_minBlocks[$>] = {}; int32_t par_LB_forceBlocks[$>] = {}; + $,PREPEND,int32_t par_>,APPEND, = 0>,$ + >; + $,PREPEND,char par_>,APPEND,[128] = "">,$ + >; }; // clang-format on } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index beeefa4eb5f9d..b2c08d689aeb2 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -22,9 +22,7 @@ #include "GPUCommonDef.h" #include "GPUDefMacros.h" -#if defined(GPUCA_GPUCODE) #include "GPUDefParametersDefaults.h" -#endif #include "GPUDefParametersConstants.h" namespace o2::gpu @@ -38,8 +36,8 @@ namespace o2::gpu #define GPUCA_GET_WARP_COUNT(...) 1 // since launch bound constants are not defined in host-code, and must evaluate to 1! #endif -#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE) -#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE) +#define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE) +#define GPUCA_PAR_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_PAR_DEDX_STORAGE_TYPE) // #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 6753db280d5bf..43fa49ff74817 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -40,6 +40,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" #include "GPUTriggerOutputs.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCClusterStatistics.h" #include "GPUHostDataTypes.h" @@ -254,6 +255,7 @@ bool GPUChainTracking::ValidateSteps() bool GPUChainTracking::ValidateSettings() { + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; if ((param().rec.tpc.nWays & 1) == 0) { GPUError("nWay setting musst be odd number!"); return false; @@ -270,7 +272,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("NStreams of %d insufficient for %d nTPCClustererLanes", mRec->NStreams(), (int32_t)GetProcessingSettings().nTPCClustererLanes); return false; } - if (GetProcessingSettings().noGPUMemoryRegistration && GetProcessingSettings().tpcCompressionGatherMode != 3) { + if (GetProcessingSettings().noGPUMemoryRegistration && gatherMode != 3) { GPUError("noGPUMemoryRegistration only possible with gather mode 3"); return false; } @@ -286,7 +288,7 @@ bool GPUChainTracking::ValidateSettings() GPUError("Must use external output for double pipeline mode"); return false; } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { GPUError("Double pipeline incompatible to compression mode 1"); return false; } @@ -295,7 +297,7 @@ bool GPUChainTracking::ValidateSettings() return false; } } - if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) { + if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (gatherMode == 1 || gatherMode == 3)) { GPUError("Invalid tpcCompressionGatherMode for compression on CPU"); return false; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 03d319f42fd6b..8fb6fc4771658 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -18,6 +18,7 @@ #include "GPUTrackingInputProvider.h" #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -30,6 +31,7 @@ int32_t GPUChainTracking::RunTPCCompression() mRec->PushNonPersistentMemory(qStr2Tag("TPCCOMPR")); RecoStep myStep = RecoStep::TPCCompression; bool doGPU = GetRecoStepsGPU() & RecoStep::TPCCompression; + int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode; GPUTPCCompression& Compressor = processors()->tpcCompressor; GPUTPCCompression& CompressorShadow = doGPU ? processorsShadow()->tpcCompressor : Compressor; const auto& threadContext = GetThreadContext(); @@ -37,7 +39,7 @@ int32_t GPUChainTracking::RunTPCCompression() RecordMarker(&mEvents->single, 0); } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile } SetupGPUProcessor(&Compressor, true); @@ -70,7 +72,7 @@ int32_t GPUChainTracking::RunTPCCompression() Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput); char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters size_t copySize = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { CompressorShadow.mOutputA = Compressor.mOutput; copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU } @@ -81,8 +83,8 @@ int32_t GPUChainTracking::RunTPCCompression() SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes. outputStream = OutputStream(); } - if (GetProcessingSettings().tpcCompressionGatherMode >= 2) { - if (GetProcessingSettings().tpcCompressionGatherMode == 2) { + if (gatherMode >= 2) { + if (gatherMode == 2) { void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat); if (devicePtr != Compressor.mOutputFlat) { CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space @@ -94,7 +96,8 @@ int32_t GPUChainTracking::RunTPCCompression() TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream); constexpr uint32_t nBlocksDefault = 2; constexpr uint32_t nBlocksMulti = 1 + 2 * 200; - switch (GetProcessingSettings().tpcCompressionGatherModeKernel) { + int32_t gatherModeKernel = mRec->GetProcessingSettings().tpcCompressionGatherModeKernel == -1 ? mRec->getGPUParameters(doGPU).par_COMP_GATHER_KERNEL : mRec->GetProcessingSettings().tpcCompressionGatherMode; + switch (gatherModeKernel) { case 0: runKernel(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression)); getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); @@ -117,10 +120,10 @@ int32_t GPUChainTracking::RunTPCCompression() getKernelTimer(RecoStep::TPCCompression, 0, outputSize, false); break; default: - GPUError("Invalid compression kernel %d selected.", (int32_t)GetProcessingSettings().tpcCompressionGatherModeKernel); + GPUError("Invalid compression kernel %d selected.", (int32_t)gatherModeKernel); return 1; } - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { RecordMarker(&mEvents->stream[outputStream], outputStream); char* deviceFlatPts = (char*)Compressor.mOutput->qTotU; if (GetProcessingSettings().doublePipeline) { @@ -135,9 +138,9 @@ int32_t GPUChainTracking::RunTPCCompression() } } else { int8_t direction = 0; - if (GetProcessingSettings().tpcCompressionGatherMode == 0) { + if (gatherMode == 0) { P = &CompressorShadow.mPtrs; - } else if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + } else if (gatherMode == 1) { P = &Compressor.mPtrs; direction = -1; gatherTimer = &getTimer("GPUTPCCompression_GatherOnCPU", 0); @@ -181,11 +184,11 @@ int32_t GPUChainTracking::RunTPCCompression() GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction); GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction); } - if (GetProcessingSettings().tpcCompressionGatherMode == 1) { + if (gatherMode == 1) { gatherTimer->Stop(); } mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat; - if (GetProcessingSettings().tpcCompressionGatherMode == 3) { + if (gatherMode == 3) { SynchronizeEventAndRelease(mEvents->stream[outputStream]); mRec->ReturnVolatileDeviceMemory(); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index ffab3ba0be063..a647c213660c9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPULogging.h" +#include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" #include "GPUQA.h" #include "utils/strtag.h" @@ -31,7 +32,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel({{nBorderTracks, -WarpSize(), 0, deviceType}}, 0); } uint32_t n = withinSector == -1 ? NSECTORS / 2 : NSECTORS; - if (GetProcessingSettings().alternateBorderSort && (!mRec->IsGPU() || doGPU)) { + if (GetProcessingSettings().alternateBorderSort == -1 ? mRec->getGPUParameters(doGPU).par_ALTERNATE_BORDER_SORT : GetProcessingSettings().alternateBorderSort) { RecordMarker(&mEvents->single, 0); TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->init); for (uint32_t i = 0; i < n; i++) { @@ -176,7 +177,8 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) waitForTransfer = 1; } - if (GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(doGPU).par_SORT_BEFORE_FIT : GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { runKernel(GetGridAuto(0, deviceType)); CondWaitEvent(waitForTransfer, &mEvents->single); runKernel(GetGridAuto(0, deviceType)); @@ -212,11 +214,11 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), GetProcessingSettings().mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } - if (param().rec.tpc.looperInterpolationInExtraPass) { + if (param().rec.tpc.looperInterpolationInExtraPass == -1 ? mRec->getGPUParameters(doGPU).par_MERGER_SPLIT_LOOP_INTERPOLATION : param().rec.tpc.looperInterpolationInExtraPass) { runKernel(GetGridAuto(0)); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 3e7447892307a..64a9179baf0e6 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -19,6 +19,7 @@ #include "GPUTPCClusterData.h" #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" +#include "GPUDefParametersRuntime.h" #include "utils/strtag.h" #include @@ -200,11 +201,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (doGPU) { + if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { runKernel({GetGridAuto(useStream), {iSector}}); } -#endif if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index bdf60f744b9ca..f42e5f35b1dc9 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -37,6 +37,7 @@ #include "TPCFastTransform.h" #include "GPUTPCConvertImpl.h" #include "GPUTPCGeometry.h" +#include "GPUDefParametersRuntime.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" @@ -288,7 +289,8 @@ void* GPUTPCGMMerger::SetPointersMemory(void* mem) void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) { computePointerWithAlignment(mem, mTrackOrderAttach, mNMaxTracks); - if (mRec->GetProcessingSettings().mergerSortTracks) { + const bool mergerSortTracks = mRec->GetProcessingSettings().mergerSortTracks == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCMerging).par_SORT_BEFORE_FIT : mRec->GetProcessingSettings().mergerSortTracks; + if (mergerSortTracks) { computePointerWithAlignment(mem, mTrackOrderProcess, mNMaxTracks); } return mem; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h index 238b04510862e..ba251ce34a3eb 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerTypes.h @@ -32,7 +32,7 @@ enum attachTypes { attachAttached = 0x40000000, struct InterpolationErrorHit { float posY, posZ; - GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; + GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A errorY, errorZ; }; struct InterpolationErrors { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index f1aac3da9a7a2..1617ac7b828af 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -663,7 +663,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, GPUCA_DEBUG_STREAMER_CHECK(if (debugVals) { debugVals->err2Y = err2Y; debugVals->err2Z = err2Z; }); if (rejectChi2 >= rejectInterFill) { - if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (rejectChi2 == rejectInterReject && inter->errorY < (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { rejectChi2 = rejectDirect; } else { int32_t retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 260c64db052af..77453a87b3763 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -309,7 +309,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 - if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { + if (iWay == nWays - 1 && interpolation.hit[ihit].errorY > (GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE_A)0) { const float Iz0 = interpolation.hit[ihit].posY - mP[0]; const float Iz1 = interpolation.hit[ihit].posZ - mP[1]; float Iw0 = mC[2] + (float)interpolation.hit[ihit].errorZ; @@ -631,7 +631,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; -#if !defined(GPUCA_NO_ATOMIC_PRECHECK) && GPUCA_NO_ATOMIC_PRECHECK < 1 +#if GPUCA_NO_ATOMIC_PRECHECK == 0 if (myWeight <= *weight) { continue; } @@ -757,7 +757,8 @@ GPUdic(0, 1) int32_t GPUTPCGMTrackParam::FollowCircle(const GPUTPCGMMerger* GPUr if (Merger->Param().rec.tpc.disableRefitAttachment & 4) { return 1; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, toAlpha, toY, toX, toSector, toRow, inFlyDirection, prop.GetAlpha()); return 1; } @@ -862,7 +863,8 @@ GPUdni() void GPUTPCGMTrackParam::AttachClustersMirror(const GPUTPCGMMerger* GPU if (Merger->Param().rec.tpc.disableRefitAttachment & 8) { return; } - if (Merger->Param().rec.tpc.looperInterpolationInExtraPass && phase2 == false) { + const bool inExtraPass = Merger->Param().rec.tpc.looperInterpolationInExtraPass == -1 ? GPUCA_PAR_MERGER_SPLIT_LOOP_INTERPOLATION : Merger->Param().rec.tpc.looperInterpolationInExtraPass; + if (inExtraPass && phase2 == false) { StoreAttachMirror(Merger, sector, iRow, iTrack, 0, toY, 0, -1, 0, 0, prop.GetAlpha()); return; } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index ec348b59ce7a5..d76c079bb406f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -76,7 +76,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } #define UnrollGlobal 4 -#define MaxShared GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP +#define MaxShared GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #if MaxShared < GPUCA_MAXN #define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal #else diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 1bf5000cfbe5c..0ecd230a67415 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -40,11 +40,11 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number -#if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 - static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP); - float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; - calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; +#if GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 + static_assert(GPUCA_MAXN >= GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP); + float mA1[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + float mA2[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; + calink mB[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; #endif GPUTPCRow mRow, mRowUp, mRowDown; }; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index af79dddae554e..20dfd69864816 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -39,7 +39,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; if (tracker.mData.mLinkDownData[lHitNumberOffset + ih] == CALINK_INVAL && linkUpData != CALINK_INVAL && tracker.mData.mLinkUpData[rowUp.mHitNumberOffset + linkUpData] != CALINK_INVAL) { -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); if (nextRowStartHits >= tracker.mNMaxRowStartHits) { @@ -61,7 +61,7 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr } GPUbarrier(); -#ifdef GPUCA_SORT_STARTHITS +#if GPUCA_PAR_SORT_STARTHITS > 0 if (iThread == 0) { uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx index 6c1b4eda0d7f5..e923e126e1841 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.cxx @@ -22,6 +22,7 @@ #include "GPUTPCTrackParam.h" #include "GPUParam.inc" #include "GPUTPCConvertImpl.h" +#include "GPUDefParametersRuntime.h" #if !defined(GPUCA_GPUCODE) #include @@ -143,13 +144,12 @@ void GPUTPCTracker::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxRowHits = mRec->MemoryScalers()->NTPCTrackletHits(mData.NumberOfHits()); mNMaxTracks = mRec->MemoryScalers()->NTPCSectorTracks(mData.NumberOfHits()); mNMaxTrackHits = mRec->MemoryScalers()->NTPCSectorTrackHits(mData.NumberOfHits(), mRec->GetProcessingSettings().tpcInputWithClusterRejection); -#ifdef GPUCA_SORT_STARTHITS_GPU - if (mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking) { + + if (mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCSectorTracking).par_SORT_STARTHITS) { if (mNMaxStartHits > mNMaxRowStartHits * GPUCA_ROW_COUNT) { mNMaxStartHits = mNMaxRowStartHits * GPUCA_ROW_COUNT; } } -#endif mData.SetMaxData(); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index 8810b692e1377..e27a8f66ae754 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -33,7 +33,7 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } GPUbarrier(); - GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + GPUTPCHitId trackHits[GPUCA_ROW_COUNT - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; const float maxSharedFrac = tracker.Param().rec.tpc.trackletMaxSharedFraction; for (int32_t itr = s.mItr0 + iThread; itr < s.mNTracklets; itr += s.mNThreadsTotal) { @@ -67,13 +67,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (nHits < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (nHits < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { s.mHits[nHits][iThread].Set(irow, ih); } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - trackHits[nHits - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); + trackHits[nHits - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); } nHits++; if (!own) { @@ -101,13 +101,13 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread tracker.Tracks()[itrout].SetFirstHitID(nFirstTrackHit); tracker.Tracks()[itrout].SetNHits(nHits); for (int32_t jh = 0; jh < nHits; jh++) { -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (jh < GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE) { +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + if (jh < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; } else -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 { - tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE]; + tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index f487931bdaf4b..e5a28c80f37f9 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -36,10 +36,10 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNThreadsTotal; // total n threads int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons -#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE); - GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; -#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 +#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 + static_assert(GPUCA_ROW_COUNT >= GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE); + GPUTPCHitId mHits[GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; +#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; typedef GPUconstantref() GPUTPCTracker processorType; diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 7faab410d20ea..35f2915d9486a 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -18,6 +18,8 @@ define_property(TARGET PROPERTY O2_GPU_KERNEL_NAMES) define_property(TARGET PROPERTY O2_GPU_KERNEL_INCLUDES) define_property(TARGET PROPERTY O2_GPU_KERNEL_FILES) define_property(TARGET PROPERTY O2_GPU_KERNEL_NO_FAST_MATH) +define_property(TARGET PROPERTY O2_GPU_KERNEL_PARAMS) +define_property(TARGET PROPERTY O2_GPU_KERNEL_STRING_PARAMS) set(O2_GPU_KERNEL_WRAPPER_FOLDER "${CMAKE_CURRENT_BINARY_DIR}/GPU/include_gpu_onthefly") file(MAKE_DIRECTORY ${O2_GPU_KERNEL_WRAPPER_FOLDER}) set(O2_GPU_BASE_DIR "${CMAKE_CURRENT_LIST_DIR}/../") @@ -167,3 +169,20 @@ function(o2_gpu_kernel_set_deterministic) endif() endforeach() endfunction() + +function(o2_gpu_kernel_add_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_PARAMS "${ARGV${i}}") + endforeach() +endfunction() +function(o2_gpu_kernel_add_string_parameter) + list(LENGTH ARGV n) + math(EXPR n "${n} - 1") + foreach(i RANGE 0 ${n}) + message(STATUS "Adding ${ARGV${i}}") + set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_STRING_PARAMS "${ARGV${i}}") + endforeach() +endfunction() diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.cxx b/GPU/GPUTracking/dEdx/GPUdEdx.cxx index fd2aeda2828e3..340463b9ec7f7 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.cxx +++ b/GPU/GPUTracking/dEdx/GPUdEdx.cxx @@ -55,7 +55,7 @@ GPUd() void GPUdEdx::computedEdx(GPUdEdxInfo& GPUrestrict() output, const GPUPar output.NHitsSubThresholdOROC3 = countOROC3; } -GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) +GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* GPUrestrict() array, int32_t count, int32_t trunclow, int32_t trunchigh) { trunclow = count * trunclow / 128; trunchigh = count * trunchigh / 128; @@ -65,7 +65,7 @@ GPUd() float GPUdEdx::GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* GPUrestrict() CAAlgo::sort(array, array + count); float mean = 0; for (int32_t i = trunclow; i < trunchigh; i++) { - mean += (float)array[i] * (1.f / scalingFactor::factor); + mean += (float)array[i] * (1.f / scalingFactor::factor); } return (mean / (trunchigh - trunclow)); } diff --git a/GPU/GPUTracking/dEdx/GPUdEdx.h b/GPU/GPUTracking/dEdx/GPUdEdx.h index 4d3b652bdc5d1..e556fd3845d42 100644 --- a/GPU/GPUTracking/dEdx/GPUdEdx.h +++ b/GPU/GPUTracking/dEdx/GPUdEdx.h @@ -37,7 +37,7 @@ class GPUdEdx GPUd() void computedEdx(GPUdEdxInfo& output, const GPUParam& param); private: - GPUd() float GetSortTruncMean(GPUCA_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); + GPUd() float GetSortTruncMean(GPUCA_PAR_DEDX_STORAGE_TYPE_A* array, int32_t count, int32_t trunclow, int32_t trunchigh); GPUd() void checkSubThresh(int32_t roc); template @@ -62,8 +62,8 @@ class GPUdEdx static constexpr int32_t MAX_NCL = GPUCA_ROW_COUNT; // Must fit in mNClsROC (uint8_t)! - GPUCA_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory - GPUCA_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeTot[MAX_NCL]; // No need for default, just some memory + GPUCA_PAR_DEDX_STORAGE_TYPE_A mChargeMax[MAX_NCL]; // No need for default, just some memory float mSubThreshMinTot = 0.f; float mSubThreshMinMax = 0.f; uint8_t mNClsROC[4] = {0}; @@ -78,8 +78,8 @@ GPUdi() void GPUdEdx::checkSubThresh(int32_t roc) if (roc != mLastROC) { if (mNSubThresh && mCount + mNSubThresh <= MAX_NCL) { for (int32_t i = 0; i < mNSubThresh; i++) { - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinTot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(mSubThreshMinMax * scalingFactor::factor + scalingFactor::round); } mNClsROC[mLastROC] += mNSubThresh; mNClsROCSubThresh[mLastROC] += mNSubThresh; @@ -151,8 +151,8 @@ GPUdnii() void GPUdEdx::fillCluster(float qtot, float qmax, int32_t padRow, uint qmax /= residualGainMapGain; qtot /= residualGainMapGain; - mChargeTot[mCount] = (GPUCA_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); - mChargeMax[mCount++] = (GPUCA_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); + mChargeTot[mCount] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qtot * scalingFactor::factor + scalingFactor::round); + mChargeMax[mCount++] = (GPUCA_PAR_DEDX_STORAGE_TYPE_A)(qmax * scalingFactor::factor + scalingFactor::round); mNClsROC[roc]++; if (qtot < mSubThreshMinTot) { mSubThreshMinTot = qtot; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 994f10a516b10..ee3af2b87d925 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -134,3 +134,16 @@ o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFD o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) + +o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP + TRACKLET_SELECTOR_HITS_REG_SIZE + ALTERNATE_BORDER_SORT + SORT_BEFORE_FIT + MERGER_SPLIT_LOOP_INTERPOLATION + NO_ATOMIC_PRECHECK + COMP_GATHER_KERNEL + COMP_GATHER_MODE + SORT_STARTHITS) + +o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE + MERGER_INTERPOLATION_ERROR_TYPE) From 9cb8054883602562b7f62034bb468bb3ed707017 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 16 Apr 2025 10:15:47 +0200 Subject: [PATCH 0397/1914] DPL GUI: do not sent any state when GUI is disabled (#14184) --- Framework/Core/src/CommonServices.cxx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index e13f1cb2094b7..5c333bbb85e3b 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -44,6 +44,7 @@ #include "Framework/DeviceConfig.h" #include "Framework/DefaultsHelpers.h" #include "Framework/Signpost.h" +#include "Framework/DriverConfig.h" #include "TextDriverClient.h" #include "WSDriverClient.h" @@ -800,6 +801,9 @@ auto sendRelayerMetrics(ServiceRegistryRef registry, DataProcessingStats& stats) auto flushStates(ServiceRegistryRef registry, DataProcessingStates& states) -> void { + if (!registry.get().driverHasGUI) { + return; + } states.flushChangedStates([&states, registry](std::string const& spec, int64_t timestamp, std::string_view value) mutable -> void { auto& client = registry.get(); client.push(spec, value, timestamp); From 39b766b44eb9a19c9f8dc4c39f880273ea1205f1 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Mon, 14 Apr 2025 15:06:19 +0200 Subject: [PATCH 0398/1914] Updated EPOS4 example using different versions and HQ --- run/SimExamples/HepMC_EPOS4/README.md | 7 ++-- run/SimExamples/HepMC_EPOS4/epos.sh | 36 +++++++++++++++---- run/SimExamples/HepMC_EPOS4/example.optns | 3 +- run/SimExamples/HepMC_EPOS4/rundpg.sh | 42 ++++++++++++++++++++-- run/SimExamples/HepMC_EPOS4/rundpl.sh | 43 +++++++++++++++++++++-- run/SimExamples/HepMC_EPOS4/runo2sim.sh | 42 ++++++++++++++++++++-- 6 files changed, 156 insertions(+), 17 deletions(-) mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/README.md mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/example.optns mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/rundpg.sh mode change 100644 => 100755 run/SimExamples/HepMC_EPOS4/runo2sim.sh diff --git a/run/SimExamples/HepMC_EPOS4/README.md b/run/SimExamples/HepMC_EPOS4/README.md old mode 100644 new mode 100755 index 94c50572cff9f..8609501a6981a --- a/run/SimExamples/HepMC_EPOS4/README.md +++ b/run/SimExamples/HepMC_EPOS4/README.md @@ -7,10 +7,12 @@ An in-depth explanation of the mechanisms behind the HepMC(3) data handling can HepMC_fifo folder of the MC examples. The scripts use the `cmd` parameter of `GeneratorHepMC` to spawn the EPOS4 generation via the `epos.sh` script. -EPOS4 uses the outdated HepMC2 libraries, so this had to be specified in the steering scripts +EPOS 4.0.0 uses the outdated HepMC2 libraries, so this had to be specified in the steering scripts of the generators configuration. If `HepMC.version=2` is removed then the scripts will not work anymore. This is to say that the balance achieved with the configurations provided is easily destroyed if the user base edits parts that are not understood completely. +The latest EPOS 4.0.3 and EPOS4HQ both use HepMC3, so the version is automatically +updated when these generators are used. # Scripts description @@ -47,6 +49,7 @@ If no parameters are provided to the scripts, they will run with default values - **-n , --nevents** → changes the number of events in the .optns file or gets the one in the file if no events are provided - **-i , --input** → .optns filename to feed EPOS4, no extension must be set in the filename - **-j , --jobs** → sets the number of workers (jobs) +- **-hq** → enables EPOS4HQ generation - **-h , --help** → prints usage instructions - **-e , --ecm** → sets the center-of-mass energy in the options file @@ -62,6 +65,6 @@ Now the three scripts start to differ: - **rundpg.sh** → first the o2dpg_sim_workflow.py script will be launched generating the json configuration, then the o2_dpg_workflow_runner.py script will start the workflow - **rundpl.sh** → o2-sim-dpl-eventgen is executed piping its results to o2-sim-mctracks-to-aod and afterwards to o2-analysis-mctracks-to-aod-simple-task -The last few lines of the scripts contain the execution of o2-sim, DPG worflow creator/runner and DPL software respectively, so this part can be modified by the users following their requirements. It's important not to delete from the configuration keys `GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;` and it would be better to provide additional configurations via the -m flag. EPOS4 cannot set a maximum impact parameter value, so it's better to leave the bMaxSwitch to none, while the others serve the sole purpose of running successfully the generator using auto generated FIFOs. +The last few lines of the scripts contain the execution of o2-sim, DPG worflow creator/runner and DPL software respectively, so this part can be modified by the users following their requirements. It's important not to delete from the configuration keys `GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;` and it would be better to provide additional configurations via the -m flag. EPOS4 cannot set a maximum impact parameter value, so it's better to leave the bMaxSwitch to none, while the others serve the sole purpose of running successfully the generator using auto generated FIFOs. diff --git a/run/SimExamples/HepMC_EPOS4/epos.sh b/run/SimExamples/HepMC_EPOS4/epos.sh index 46a7dbfa27e5c..a4185b756bf56 100755 --- a/run/SimExamples/HepMC_EPOS4/epos.sh +++ b/run/SimExamples/HepMC_EPOS4/epos.sh @@ -1,11 +1,23 @@ -#!/bin/sh +#!/bin/bash # Script based on CRMC example # EPOS4 option files must contain ihepmc set to 2 to print HepMC # data on stdout. -hepmc flag is not needed anymore, but -hepstd is fundamental # in order not to print useless information on stdout (a z-*optns*.mtr file will be created) optns="example" -seed=$RANDOM +seed=1 +EPOS4="" + +if [ -z "$EPO4VSN" ]; then + # Error: EPO4VSN environment variable is not set + exit 1 +fi + +if [ "$EPO4VSN" = "4.0.0" ]; then + EPOS4="$EPOS4_ROOT/epos4/scripts/epos" +else + EPOS4="$EPOS4_ROOT/bin/epos" +fi while test $# -gt 0 ; do case $1 in @@ -18,13 +30,23 @@ done if [ ! -f $optns.optns ]; then echo "Error: Options file $optns.optns not found" - exit 1 + exit 2 +fi + +if grep -Fq "set ihq 1" $optns.optns; then + if [ -z "$EPO4HQVSN" ]; then + # Error: EPOS4HQ version not found + exit 3 + else + # Running with EPOS4HQ + EPOS4="$EPO4HQ/bin/eposhq" + fi fi if [ $seed -eq 0 ]; then - echo "Seed can't be 0, random number will be used" - seed=$RANDOM + # Seed can't be 0, random number will be used + seed="$RANDOM" fi -# Or filters the stdout with only HepMC2 useful data -$EPOS4_ROOT/epos4/scripts/epos -hepstd -s $seed $optns | sed -n 's/^\(HepMC::\|[EAUWVP] \)/\1/p' +# OR filters the stdout with only HepMC useful data +$EPOS4 -hepstd -s $seed $optns | sed -n 's/^\(HepMC::\|[EAUWVP] \)/\1/p' diff --git a/run/SimExamples/HepMC_EPOS4/example.optns b/run/SimExamples/HepMC_EPOS4/example.optns old mode 100644 new mode 100755 index c2b067941e4e8..9df738d15fcff --- a/run/SimExamples/HepMC_EPOS4/example.optns +++ b/run/SimExamples/HepMC_EPOS4/example.optns @@ -29,4 +29,5 @@ set nfreeze 1 !number of freeze out events per hydro event set modsho 1 !printout every modsho events set centrality 0 !0=min bias set ihepmc 2 !HepMC output enabled on stdout -set nfull 10 +set nfull 10 !Total nEvents to be generated +set ihq 1 !Enable EPOS4HQ diff --git a/run/SimExamples/HepMC_EPOS4/rundpg.sh b/run/SimExamples/HepMC_EPOS4/rundpg.sh old mode 100644 new mode 100755 index 93993f66bfbd6..ea6d29ce0fa54 --- a/run/SimExamples/HepMC_EPOS4/rundpg.sh +++ b/run/SimExamples/HepMC_EPOS4/rundpg.sh @@ -23,6 +23,18 @@ optns="example" TF=1 eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -38,6 +50,7 @@ Options: -h,--help Print these instructions -e,--ecm ENERGY Center-of-Mass energy -t,--tf TF Timeframes ($TF) + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -59,6 +72,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; ${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py --help ; exit 0 ;; -t|--tf) TF=$2 ; shift ;; --) shift ; break ;; @@ -115,6 +129,30 @@ else fi fi +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + # Copy options file in each timeframe folder for i in $(seq 1 $TF); do if [ ! -d tf$i ]; then @@ -125,8 +163,8 @@ done # create workflow -${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py -eCM $eCM -ns $NEV -gen hepmc -tf $TF -j $JOBS \ - -interactionRate 500000 -confKey "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" +${O2DPG_ROOT}/MC/bin/o2dpg_sim_workflow.py -eCM $eCM -ns $NEV -gen hepmc -tf $TF -j $JOBS -seed $RANDOM \ + -interactionRate 500000 -confKey "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" # Run workflow ${O2DPG_ROOT}/MC/bin/o2_dpg_workflow_runner.py -f workflow.json -tt aod --stdout-on-failure diff --git a/run/SimExamples/HepMC_EPOS4/rundpl.sh b/run/SimExamples/HepMC_EPOS4/rundpl.sh index c3851175d08f4..919eedf1a2340 100755 --- a/run/SimExamples/HepMC_EPOS4/rundpl.sh +++ b/run/SimExamples/HepMC_EPOS4/rundpl.sh @@ -21,6 +21,18 @@ more="" optns="example" eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -35,6 +47,7 @@ Options: -j,--jobs JOBS Number of jobs ($JOBS) -e,--ecm ENERGY Center-of-Mass energy -h,--help Print these instructions + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -56,6 +69,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; o2-sim-dpl-eventgen --help full ; exit 0 ;; --) shift ; break ;; *) echo "Unknown option '$1', did you forget '--'?" >/dev/stderr @@ -111,9 +125,32 @@ else fi fi -# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + +# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory for version 4.0.0 # otherwise the simulation won't work. # Seed is automatically set to Random by the epos.sh script because the --seed option with o2-sim-dpl-eventgen does not feed the number to GeneratorHepMC - -o2-sim-dpl-eventgen -b --nEvents ${NEV} --generator hepmc --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" |\ +o2-sim-dpl-eventgen -b --nEvents ${NEV} --generator hepmc --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" |\ o2-sim-mctracks-to-aod -b | o2-analysis-mctracks-to-aod-simple-task -b diff --git a/run/SimExamples/HepMC_EPOS4/runo2sim.sh b/run/SimExamples/HepMC_EPOS4/runo2sim.sh old mode 100644 new mode 100755 index 31698f39a87f0..a241f9affba19 --- a/run/SimExamples/HepMC_EPOS4/runo2sim.sh +++ b/run/SimExamples/HepMC_EPOS4/runo2sim.sh @@ -21,6 +21,18 @@ more="" optns="example" eCM=-1 JOBS=2 +HEPMC="" +HQ=false + +if [ -z "$EPO4VSN" ]; then + echo "Error: EPOS4 version not found" + exit 7 +fi +if [ "$EPO4VSN" == "4.0.0" ]; then + HEPMC=";HepMC.version=2" +else + HEPMC=";HepMC.version=3" +fi usage() { @@ -35,6 +47,7 @@ Options: -j,--jobs JOBS Number of jobs ($JOBS) -e,--ecm ENERGY Center-of-Mass energy -h,--help Print these instructions + -hq HQ Enable EPOS4HQ -- Rest of command line sent to o2-sim COMMAND must be quoted if it contains spaces or other special @@ -56,6 +69,7 @@ while test $# -gt 0 ; do -i|--input) optns=$2 ; shift ;; -j|--jobs) JOBS=$2 ; shift ;; -e|--ecm) eCM=$2 ; shift ;; + -hq) HQ=true ; shift ;; -h|--help) usage; o2-sim --help full ; exit 0 ;; --) shift ; break ;; *) echo "Unknown option '$1', did you forget '--'?" >/dev/stderr @@ -111,7 +125,31 @@ else fi fi -# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory +# Set HQ mode + +if [ "$HQ" = true ]; then + echo "Setting HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 1" $optns.optns + else + echo "set ihq 1" >> $optns.optns + fi + if [ -z "$EPO4HQVSN" ]; then + echo "Error: EPOS4HQ version not found" + exit 7 + else + HEPMC=";HepMC.version=3" + fi +else + echo "Turning OFF HQ mode" + if grep -Fq "ihq" $optns.optns; then + sed -i "/ihq/c\set ihq 0" $optns.optns + else + echo "set ihq 0" >> $optns.optns + fi +fi + +# Starting simulation => seed is fed automatically to epos with the --seed flag. HepMC.version = 2 is mandatory for version 4.0.0 # otherwise the simulation won't work o2-sim -j $JOBS -n ${NEV} -g hepmc --seed $RANDOM \ - --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none;HepMC.version=2;${more}" + --configKeyValues "GeneratorFileOrCmd.cmd=$cmd -i $optns;GeneratorFileOrCmd.bMaxSwitch=none$HEPMC;${more}" From 455f7df21328b0d74a86f36a4f92811f4bc09726 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Wed, 16 Apr 2025 16:31:21 +0200 Subject: [PATCH 0399/1914] Include pO and OO example configurations for Pythia8 --- Generators/share/egconfig/pythia8_OO.cfg | 8 ++++++++ Generators/share/egconfig/pythia8_pO.cfg | 8 ++++++++ 2 files changed, 16 insertions(+) create mode 100644 Generators/share/egconfig/pythia8_OO.cfg create mode 100644 Generators/share/egconfig/pythia8_pO.cfg diff --git a/Generators/share/egconfig/pythia8_OO.cfg b/Generators/share/egconfig/pythia8_OO.cfg new file mode 100644 index 0000000000000..ff098e6b65135 --- /dev/null +++ b/Generators/share/egconfig/pythia8_OO.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 1000080160 # Oxygen +Beams:idB 1000080160 # Oxygen +Beams:eCM 10720. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/share/egconfig/pythia8_pO.cfg b/Generators/share/egconfig/pythia8_pO.cfg new file mode 100644 index 0000000000000..aff9d3337cd9d --- /dev/null +++ b/Generators/share/egconfig/pythia8_pO.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 2212 # proton +Beams:idB 1000080160 # Oxygen +Beams:eCM 13600. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. From 10cd81636c902b15283a504eae413445e97e6d84 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 15:15:27 +0200 Subject: [PATCH 0400/1914] GPU TPC: When running cluster rejection based on interpolation, also reject during update with current cluster position --- GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 34 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 5 ++- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 1617ac7b828af..9e23f9af3cf43 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -691,7 +691,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect, err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || rejectChi2 == rejectInterReject, err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) @@ -704,7 +704,7 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict inter->errorY = mC[0]; inter->errorZ = mC[2]; } else if (rejectChi2 == rejectInterReject) { - float chiY, chiZ; + float chi2Y, chi2Z; if (mFitInProjections || mT->NDF() <= 0) { const float Iz0 = inter->posY - mP[0]; const float Iz1 = inter->posZ - mP[1]; @@ -721,8 +721,8 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict const float Jz1 = posZ - ImP1; const float Jw0 = 1.f / (ImC0 + err2Y); const float Jw2 = 1.f / (ImC2 + err2Z); - chiY = Jw0 * Jz0 * Jz0; - chiZ = Jw2 * Jz1 * Jz1; + chi2Y = Jw0 * Jz0 * Jz0; + chi2Z = Jw2 * Jz1 * Jz1; } else { const float Iz0 = inter->posY - mP[0]; const float Iz1 = inter->posZ - mP[1]; @@ -751,11 +751,11 @@ GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict Jw0 *= Jdet; const float Jw1 = ImC1 * Jdet; Jw2 *= Jdet; - chiY = CAMath::Abs((Jw0 * Jz0 + Jw1 * Jz1) * Jz0); - chiZ = CAMath::Abs((Jw1 * Jz0 + Jw2 * Jz1) * Jz1); + chi2Y = CAMath::Abs((Jw0 * Jz0 + Jw1 * Jz1) * Jz0); + chi2Z = CAMath::Abs((Jw1 * Jz0 + Jw2 * Jz1) * Jz1); } - if (RejectCluster(chiY * param.rec.tpc.clusterRejectChi2TolleranceY, chiZ * param.rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { // TODO: Relative Pt resolution decreases slightly, why? - return updateErrorClusterRejected; + if (RejectCluster(chi2Y * param.rec.tpc.clusterRejectChi2TolleranceY, chi2Z * param.rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { // TODO: Relative Pt resolution decreases slightly, why? + return updateErrorClusterRejectedInInterpolation; } } return 0; @@ -771,13 +771,13 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int16_t cluste const float z0 = posY - mP[0]; const float z1 = posZ - mP[1]; - float w0, w1, w2, chiY, chiZ; + float w0, w1, w2, chi2Y, chi2Z; if (mFitInProjections || mT->NDF() <= 0) { w0 = 1.f / (err2Y + d00); w1 = 0; w2 = 1.f / (err2Z + d11); - chiY = w0 * z0 * z0; - chiZ = w2 * z1 * z1; + chi2Y = w0 * z0 * z0; + chi2Z = w2 * z1 * z1; } else { w0 = d11 + err2Z, w1 = d10, w2 = d00 + err2Y; { // Invert symmetric matrix @@ -790,13 +790,13 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int16_t cluste w1 = -w1 * det; w2 = w2 * det; } - chiY = CAMath::Abs((w0 * z0 + w1 * z1) * z0); - chiZ = CAMath::Abs((w1 * z0 + w2 * z1) * z1); + chi2Y = CAMath::Abs((w0 * z0 + w1 * z1) * z0); + chi2Z = CAMath::Abs((w1 * z0 + w2 * z1) * z1); } - float dChi2 = chiY + chiZ; - // GPUInfo("hits %d chi2 %f, new %f %f (dy %f dz %f)", N, mChi2, chiY, chiZ, z0, z1); - if (rejectChi2 == 1 && RejectCluster(chiY * param->rec.tpc.clusterRejectChi2TolleranceY, chiZ * param->rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { - return updateErrorClusterRejected; + float dChi2 = chi2Y + chi2Z; + // GPUInfo("hits %d chi2 %f, new %f %f (dy %f dz %f)", N, mChi2, chi2Y, chi2Z, z0, z1); + if (rejectChi2 && RejectCluster(chi2Y * param->rec.tpc.clusterRejectChi2TolleranceY, chi2Z * param->rec.tpc.clusterRejectChi2TolleranceZ, clusterState)) { + return updateErrorClusterRejectedInUpdate; } mT->Chi2() += dChi2; mT->NDF() += 2; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index a2369bafc9751..d2d06df7b5710 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -52,7 +52,10 @@ class GPUTPCGMPropagator enum UpdateRetVal { updateErrorFitFailed = -1, updateErrorClusterRejected = 2, - updateErrorEdgeCluster = 3 + updateErrorClusterRejectedDistance = 2, + updateErrorEdgeCluster = 3, + updateErrorClusterRejectedInInterpolation = 4, + updateErrorClusterRejectedInUpdate = 5 }; enum RejectChi2Mode { rejectDirect = 1, diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 77453a87b3763..3b50bec45a41e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -305,7 +305,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ int32_t retVal; float threshold = 3.f + (lastUpdateX >= 0 ? (CAMath::Abs(mX - lastUpdateX) / 2) : 0.f); if (mNDF > 5 && (CAMath::Abs(yy - mP[0]) > threshold || CAMath::Abs(zz - mP[1]) > threshold)) { - retVal = GPUTPCGMPropagator::updateErrorClusterRejected; + retVal = GPUTPCGMPropagator::updateErrorClusterRejectedDistance; } else { int8_t rejectChi2 = attempt ? 0 : ((param.rec.tpc.mergerInterpolateErrors && CAMath::Abs(ihit - ihitMergeFirst) <= 1) ? (refit ? (GPUTPCGMPropagator::rejectInterFill + ((nWays - iWay) & 1)) : 0) : (allowModification && goodRows > 5)); #if EXTRACT_RESIDUALS == 1 From 84714d9838b3a07f4ed88903e6628071e322b549 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 18:37:13 +0200 Subject: [PATCH 0401/1914] GPU: Fix track buffer size for 0 magnetic field --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f42e5f35b1dc9..bf8d5294bb7c4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -378,8 +378,8 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.01f * gpu_common_constants::kCLight)) { - mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); } From 63bc2e3893851ef0f849bb4c98c65eae1ba21e47 Mon Sep 17 00:00:00 2001 From: noferini <9963644+noferini@users.noreply.github.com> Date: Sat, 4 Jan 2025 11:17:58 +0100 Subject: [PATCH 0402/1914] fix in FT0 digitization (time wrt BC) --- Detectors/FIT/FT0/simulation/src/Digitizer.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/FIT/FT0/simulation/src/Digitizer.cxx b/Detectors/FIT/FT0/simulation/src/Digitizer.cxx index a261475df31f5..aca012f1bc5a9 100644 --- a/Detectors/FIT/FT0/simulation/src/Digitizer.cxx +++ b/Detectors/FIT/FT0/simulation/src/Digitizer.cxx @@ -220,7 +220,7 @@ void Digitizer::process(const std::vector* hits, // Subtract time-of-flight from hit time const Float_t timeOfFlight = hit.GetPos().R() / o2::constants::physics::LightSpeedCm2NS; const Float_t timeOffset = is_A_side ? params.hitTimeOffsetA : params.hitTimeOffsetC; - Double_t hit_time = hit.GetTime() - timeOfFlight + timeOffset; + Double_t hit_time = hit.GetTime() - timeOfFlight + timeOffset + mIntRecord.getTimeOffsetWrtBC(); if (hit_time > 150) { continue; // not collect very slow particles @@ -285,7 +285,7 @@ void Digitizer::storeBC(BCCache& bc, if (mCalibOffset) { miscalib = mCalibOffset->mTimeOffsets[ipmt]; } - int smeared_time = 1000. * (*cfd.particle - params.mCfdShift) * params.mChannelWidthInverse + miscalib + int(1000. * mIntRecord.getTimeOffsetWrtBC() * params.mChannelWidthInverse); + int smeared_time = 1000. * (*cfd.particle - params.mCfdShift) * params.mChannelWidthInverse + miscalib; // + int(1000. * mIntRecord.getTimeOffsetWrtBC() * params.mChannelWidthInverse); bool is_time_in_signal_gate = (smeared_time > -params.mTime_trg_gate && smeared_time < params.mTime_trg_gate); float charge = measure_amplitude(channel_times) * params.mCharge2amp; float amp = is_time_in_signal_gate ? params.mMV_2_Nchannels * charge : 0; From d98f5354db54f8410925a3578a8d7cead2079fae Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 17 Apr 2025 01:55:38 +0200 Subject: [PATCH 0403/1914] Fix margin for 0 B-field in the GPU code --- GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx index 7793fac7e03ac..cf35a7f261167 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx @@ -195,7 +195,7 @@ int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(float nominalFieldkG, StoredField_t type = kUnknown; - if (fabsf(nominalFieldkG) < 0.01f) { + if (fabsf(nominalFieldkG) < 0.013f) { type = kUniform; nominalFieldkG = 0; } else if (fabsf(fabsf(nominalFieldkG) - 5.00668f) <= fabsf(fabsf(nominalFieldkG) - 2.f)) { From 9a197c20a013b43f11c1605214307306a78a9c1a Mon Sep 17 00:00:00 2001 From: shahoian Date: Wed, 16 Apr 2025 10:06:28 +0200 Subject: [PATCH 0404/1914] Use common FatalWhenNull setting in getSpecificForRun --- CCDB/include/CCDB/BasicCCDBManager.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CCDB/include/CCDB/BasicCCDBManager.h b/CCDB/include/CCDB/BasicCCDBManager.h index 8af1817718fa2..b7bf6920a5c7c 100644 --- a/CCDB/include/CCDB/BasicCCDBManager.h +++ b/CCDB/include/CCDB/BasicCCDBManager.h @@ -330,7 +330,7 @@ T* CCDBManagerInstance::getForRun(std::string const& path, int runNumber, bool s template T* CCDBManagerInstance::getSpecificForRun(std::string const& path, int runNumber, MD metaData) { - auto [start, stop] = getRunDuration(runNumber); + auto [start, stop] = getRunDuration(runNumber, mFatalWhenNull); if (start < 0 || stop < 0) { if (mFatalWhenNull) { reportFatal(std::string("Failed to get run duration for run ") + std::to_string(runNumber) + std::string(" from CCDB")); From 919e8f2c8b81d80d3d249e289dcdea657b4377ae Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Mon, 14 Apr 2025 11:02:43 +0200 Subject: [PATCH 0405/1914] DPL: set runNumber in DataHeader from raw data readers --- Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx | 7 ++++++- Detectors/Raw/src/RawFileReaderWorkflow.cxx | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx index ef59d94b26048..07a62a7fd4a58 100644 --- a/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx +++ b/Detectors/Raw/TFReaderDD/src/TFReaderSpec.cxx @@ -327,7 +327,12 @@ void TFReaderSpec::stopProcessing(o2f::ProcessingContext& ctx) auto device = ctx.services().get().device(); o2f::SourceInfoHeader exitHdr; exitHdr.state = o2f::InputChannelState::Completed; - const auto exitStack = o2h::Stack(o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0), o2f::DataProcessingHeader(), exitHdr); + o2h::DataHeader dh = o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0); + try { + dh.runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } + const auto exitStack = o2h::Stack(dh, o2f::DataProcessingHeader(), exitHdr); auto fmqFactory = device->GetChannel(mInput.rawChannelConfig, 0).Transport(); auto hdEOSMessage = fmqFactory->CreateMessage(exitStack.size(), fair::mq::Alignment{64}); auto plEOSMessage = fmqFactory->CreateMessage(0, fair::mq::Alignment{64}); diff --git a/Detectors/Raw/src/RawFileReaderWorkflow.cxx b/Detectors/Raw/src/RawFileReaderWorkflow.cxx index 46f7ba2a5c7f1..a7313f3154ac2 100644 --- a/Detectors/Raw/src/RawFileReaderWorkflow.cxx +++ b/Detectors/Raw/src/RawFileReaderWorkflow.cxx @@ -347,7 +347,12 @@ void RawReaderSpecs::run(o2f::ProcessingContext& ctx) if (!mRawChannelName.empty()) { // send endOfStream message to raw channel o2f::SourceInfoHeader exitHdr; exitHdr.state = o2f::InputChannelState::Completed; - const auto exitStack = o2::header::Stack(o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0), o2f::DataProcessingHeader(), exitHdr); + o2h::DataHeader dh = o2h::DataHeader(o2h::gDataDescriptionInfo, o2h::gDataOriginAny, 0, 0); + try { + dh.runNumber = strtoul(device->fConfig->GetProperty("runNumber", "").c_str(), nullptr, 10); + } catch (...) { + } + const auto exitStack = o2::header::Stack(dh, o2f::DataProcessingHeader(), exitHdr); auto fmqFactory = device->GetChannel(mRawChannelName, 0).Transport(); auto hdEOSMessage = fmqFactory->CreateMessage(exitStack.size(), fair::mq::Alignment{64}); auto plEOSMessage = fmqFactory->CreateMessage(0, fair::mq::Alignment{64}); From a7246d0f2f8c01fd893483fe83e902a1ff5c7e47 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 09:33:09 +0200 Subject: [PATCH 0406/1914] GPU: Use a unified constant instead of copy and paste for 0 field cut --- GPU/Common/GPUCommonConstants.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/GPU/Common/GPUCommonConstants.h b/GPU/Common/GPUCommonConstants.h index 01d67eab1f9d3..1a7e34885c34a 100644 --- a/GPU/Common/GPUCommonConstants.h +++ b/GPU/Common/GPUCommonConstants.h @@ -20,6 +20,7 @@ namespace o2::gpu::gpu_common_constants { static constexpr const float kCLight = 0.000299792458f; // TODO: Duplicate of MathConstants, fix this now that we use only OpenCL CPP +static constexpr const float kZeroFieldCut = 0.013f; } #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index bf8d5294bb7c4..82b21e2045b8e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -378,7 +378,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) } } mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { mNMaxTracks = mRec->MemoryScalers()->NTPCMergedTracks(mNTotalSectorTracks); @@ -1743,7 +1743,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.DzDs() = p2.DzDs(); p1.QPt() = p2.QPt(); mergedTrack.SetAlpha(p2.Alpha()); - if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (0.013f * gpu_common_constants::kCLight)) { + if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx index cf35a7f261167..6717ac775b077 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPolynomialFieldManager.cxx @@ -195,7 +195,7 @@ int32_t GPUTPCGMPolynomialFieldManager::GetPolynomialField(float nominalFieldkG, StoredField_t type = kUnknown; - if (fabsf(nominalFieldkG) < 0.013f) { + if (fabsf(nominalFieldkG) < gpu_common_constants::kZeroFieldCut) { type = kUniform; nominalFieldkG = 0; } else if (fabsf(fabsf(nominalFieldkG) - 5.00668f) <= fabsf(fabsf(nominalFieldkG) - 2.f)) { From b45085c120b07fd313e9d0d546705a0f9c0f7c3e Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Thu, 17 Apr 2025 11:04:40 +0200 Subject: [PATCH 0407/1914] Include 20Neon in Pythia8 particles database --- Generators/share/egconfig/pythia8_NeNe.cfg | 8 ++++++++ Generators/src/GeneratorPythia8.cxx | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 Generators/share/egconfig/pythia8_NeNe.cfg diff --git a/Generators/share/egconfig/pythia8_NeNe.cfg b/Generators/share/egconfig/pythia8_NeNe.cfg new file mode 100644 index 0000000000000..fff1dbb5f3d59 --- /dev/null +++ b/Generators/share/egconfig/pythia8_NeNe.cfg @@ -0,0 +1,8 @@ +### beams +Beams:idA 1000100200 # Neon +Beams:idB 1000100200 # Neon +Beams:eCM 10720. # GeV + +### decays +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/src/GeneratorPythia8.cxx b/Generators/src/GeneratorPythia8.cxx index 385eb148412ef..e883ceb6470a9 100644 --- a/Generators/src/GeneratorPythia8.cxx +++ b/Generators/src/GeneratorPythia8.cxx @@ -209,6 +209,8 @@ Bool_t GeneratorPythia8::Init() mPythia.setUserHooksPtr((Pythia8::UserHooksPtr)powhegHooks); } } + /** Add 20Neon to collision particle database */ + mPythia.particleData.addParticle(1000100200, "20Ne", 6, 30, 0, 19.992440); /** initialise **/ if (!mPythia.init()) { LOG(fatal) << "Failed to init \'Pythia8\': init returned with error"; From 99c08d385ed02f7dfb2f8e2032f6a2882bd24171 Mon Sep 17 00:00:00 2001 From: pillot Date: Thu, 17 Apr 2025 17:08:29 +0200 Subject: [PATCH 0408/1914] improve track extrapolation to vtx wo MCS correction (#14189) --- .../include/MCHTracking/TrackExtrap.h | 16 +++++++++----- .../MUON/MCH/Tracking/src/TrackExtrap.cxx | 21 ++++++++++++++++++- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h b/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h index 709d2c991a1be..37b3cafb90523 100644 --- a/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h +++ b/Detectors/MUON/MCH/Tracking/include/MCHTracking/TrackExtrap.h @@ -18,6 +18,7 @@ #define O2_MCH_TRACKEXTRAP_H_ #include +#include #include @@ -70,17 +71,21 @@ class TrackExtrap /// Add branson correction resolution to parameter covariances return extrapToVertex(trackParam, xVtx, yVtx, zVtx, errXVtx, errYVtx, true, false); } - static bool extrapToVertexWithoutBranson(TrackParam& trackParam, double zVtx) + static bool extrapToVertexWithoutBranson(TrackParam& trackParam, double zVtx, + double xUpstream = 0., double yUpstream = 0., + std::optional zUpstream = std::nullopt) { /// Extrapolate track parameters to vertex, corrected for energy loss effects only /// Add dispersion due to multiple scattering and energy loss fluctuation to parameter covariances - return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, true); + return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, true, xUpstream, yUpstream, zUpstream); } - static bool extrapToVertexUncorrected(TrackParam& trackParam, double zVtx) + static bool extrapToVertexUncorrected(TrackParam& trackParam, double zVtx, + double xUpstream = 0., double yUpstream = 0., + std::optional zUpstream = std::nullopt) { /// Extrapolate track parameters to vertex without multiple scattering and energy loss corrections /// Add dispersion due to multiple scattering to parameter covariances - return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, false); + return extrapToVertex(trackParam, 0., 0., zVtx, 0., 0., false, false, xUpstream, yUpstream, zUpstream); } static bool extrapToMID(TrackParam& trackParam); @@ -92,7 +97,8 @@ class TrackExtrap private: static bool extrapToVertex(TrackParam& trackParam, double xVtx, double yVtx, double zVtx, - double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss); + double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss, + double xUpstream = 0., double yUpstream = 0., std::optional zUpstream = std::nullopt); static bool getAbsorberCorrectionParam(double trackXYZIn[3], double trackXYZOut[3], double pTotal, double& pathLength, double& f0, double& f1, double& f2, diff --git a/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx b/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx index a91d42aca0f3b..09b8d6d3b330f 100644 --- a/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx +++ b/Detectors/MUON/MCH/Tracking/src/TrackExtrap.cxx @@ -276,7 +276,8 @@ bool TrackExtrap::extrapToMID(TrackParam& trackParam) //__________________________________________________________________________ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVtx, double zVtx, - double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss) + double errXVtx, double errYVtx, bool correctForMCS, bool correctForEnergyLoss, + double xUpstream, double yUpstream, std::optional zUpstream) { /// Main method for extrapolation to the vertex: /// Returns the track parameters and covariances resulting from the extrapolation of the current trackParam @@ -285,6 +286,8 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt /// if correctForMCS=false: add parameter dispersion due to MCS in parameter covariances /// if correctForEnergyLoss=true: correct parameters for energy loss and add energy loss fluctuation to covariances /// if correctForEnergyLoss=false: do nothing about energy loss + /// In case correctForMCS=false and the position of the track upstream the absorber is provided, it is used + /// to compute the absorber correction parameters, instead of the extrapolated track position from downstream if (trackParam.getZ() == zVtx) { return true; // nothing to be done if already at vertex @@ -301,6 +304,18 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt } } + // check the upstream track position with respect to the absorber if provided and used (spectro z<0) + // zUpstream must be >= SAbsZBeg with 100 µm tolerance to account for numerical precision + if (!correctForMCS && zUpstream && *zUpstream < SAbsZBeg - 0.01) { + if (*zUpstream < SAbsZEnd) { + LOG(warning) << "Upstream Z (" << *zUpstream << ") downstream the front absorber (zAbsorberEnd = " << SAbsZEnd << ")"; + return false; + } else { + LOG(warning) << "Upstream Z (" << *zUpstream << ") inside the front absorber (" << SAbsZBeg << ", " << SAbsZEnd << ")"; + return false; + } + } + // Check the track position with respect to the vertex and the absorber (spectro z<0) if (trackParam.getZ() > SAbsZEnd) { if (trackParam.getZ() > zVtx) { @@ -328,6 +343,10 @@ bool TrackExtrap::extrapToVertex(TrackParam& trackParam, double xVtx, double yVt trackXYZIn[2] = SAbsZBeg; trackXYZIn[0] = trackXYZOut[0] + (xVtx - trackXYZOut[0]) / (zVtx - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); trackXYZIn[1] = trackXYZOut[1] + (yVtx - trackXYZOut[1]) / (zVtx - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); + } else if (zUpstream) { // or linear propagation to the upstream track position + trackXYZIn[2] = SAbsZBeg; + trackXYZIn[0] = trackXYZOut[0] + (xUpstream - trackXYZOut[0]) / (*zUpstream - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); + trackXYZIn[1] = trackXYZOut[1] + (yUpstream - trackXYZOut[1]) / (*zUpstream - trackXYZOut[2]) * (trackXYZIn[2] - trackXYZOut[2]); } else { // or standard propagation without vertex constraint TrackParam trackParamIn(trackParam); if (!extrapToZ(trackParamIn, SAbsZBeg)) { From 8c94458e83debdbc773d4e1a57923a6e63733c2d Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 17 Apr 2025 20:36:15 +0200 Subject: [PATCH 0409/1914] Saner publishing period for FLP case (#14198) --- Framework/Core/src/CommonServices.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonServices.cxx b/Framework/Core/src/CommonServices.cxx index 5c333bbb85e3b..22324cd84b390 100644 --- a/Framework/Core/src/CommonServices.cxx +++ b/Framework/Core/src/CommonServices.cxx @@ -891,6 +891,11 @@ o2::framework::ServiceSpec CommonServices::dataProcessingStats() if (!DefaultsHelpers::onlineDeploymentMode() && DefaultsHelpers::deploymentMode() != DeploymentMode::FST) { arrowAndResourceLimitingMetrics = true; } + + int64_t consumedTimeframesPublishInterval = 0; + if (DefaultsHelpers::deploymentMode() == DeploymentMode::OnlineECS) { + consumedTimeframesPublishInterval = 5000; + } // Input proxies should not report cpu_usage_fraction, // because of the rate limiting which biases the measurement. auto& spec = services.get(); @@ -950,7 +955,7 @@ o2::framework::ServiceSpec CommonServices::dataProcessingStats() MetricSpec{.name = "consumed-timeframes", .metricId = (int)ProcessingStatsId::CONSUMED_TIMEFRAMES, .kind = Kind::UInt64, - .minPublishInterval = 0, + .minPublishInterval = consumedTimeframesPublishInterval, .maxRefreshLatency = quickRefreshInterval, .sendInitialValue = true}, MetricSpec{.name = "min_input_latency_ms", From b856a634468cfe8e80890ce3d6b77fb7bbba9dd6 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 15:52:54 +0200 Subject: [PATCH 0410/1914] GPU CMake: Fix compile flags, particularly HIP deterministic mode was missing -ffp-contract=off --- dependencies/FindO2GPU.cmake | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 95db55041184f..d50705d106bf3 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -162,7 +162,7 @@ if(ENABLE_CUDA) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CUDA_NO_FAST_MATH_FLAGS}") elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math --ftz=true") + string(APPEND CMAKE_CUDA_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -use_fast_math ${GPUCA_CUDA_DENORMALS_FLAGS}") endif() if(CMAKE_CXX_FLAGS MATCHES "(^| )-Werror( |$)") string(APPEND CMAKE_CUDA_FLAGS " -Werror=cross-execution-space-call") @@ -293,11 +293,10 @@ if(ENABLE_HIP) if(GPUCA_KERNEL_RESOURCE_USAGE_VERBOSE) string(APPEND CMAKE_HIP_FLAGS " -Rpass-analysis=kernel-resource-usage") endif() - if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) - string(APPEND CMAKE_HIP_FLAGS " -ffast-math") - endif() - if(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -O3") + if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") + elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") + string(APPEND CMAKE_HIP_FLAGS_${CMAKE_BUILD_TYPE_UPPER} " -ffast-math -O3") endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) From fc52b5db290f2ebeced93820c8de20379f9f3283 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 16 Apr 2025 18:44:10 +0200 Subject: [PATCH 0411/1914] GPU TPC: Change some code in sector tracking from prepreccor #if to if constexpr, to avoid preprocessing the RTC code --- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 153 +++++++++--------- .../SectorTracker/GPUTPCNeighboursFinder.h | 2 - .../SectorTracker/GPUTPCStartHitsFinder.cxx | 50 +++--- .../SectorTracker/GPUTPCTrackletSelector.cxx | 48 +++--- .../SectorTracker/GPUTPCTrackletSelector.h | 2 - 5 files changed, 127 insertions(+), 128 deletions(-) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index d76c079bb406f..4cdb4d151eba4 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -75,14 +75,10 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh return; } -#define UnrollGlobal 4 -#define MaxShared GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP -#if MaxShared < GPUCA_MAXN -#define MaxGlobal ((GPUCA_MAXN - MaxShared - 1) / UnrollGlobal + 1) * UnrollGlobal -#else -#define MaxGlobal 0 -#endif -#define MaxTotal MaxShared + MaxGlobal + static constexpr uint32_t UNROLL_GLOBAL = 4; + static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; + static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; + static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL; const float chi2Cut = 3.f * 3.f * 4 * (s.mUpDx * s.mUpDx + s.mDnDx * s.mDnDx); // float chi2Cut = 3.f*3.f*(s.mUpDx*s.mUpDx + s.mDnDx*s.mDnDx ); //SG @@ -117,10 +113,8 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh const float kAreaSlopeZUp = kAngularMultiplier != 0.f ? 1.f : s.mUpTx; const float kAreaSlopeZDn = kAngularMultiplier != 0.f ? 1.f : s.mDnTx; -#if MaxGlobal > 0 - calink neighUp[MaxGlobal]; - float yzUp[2 * MaxGlobal]; -#endif + calink neighUp[MAX_GLOBAL]; + float yzUp[2 * MAX_GLOBAL]; for (int32_t ih = iThread; ih < s.mNHits; ih += nThreads) { @@ -128,7 +122,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh const float y = y0 + hitData.x * stepY; const float z = z0 + hitData.y * stepZ; - int32_t nNeighUp = 0; + uint32_t nNeighUp = 0; float minZ, maxZ, minY, maxY; int32_t binYmin, binYmax, binZmin, binZmax; int32_t nY; @@ -145,11 +139,11 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh nY = rowUp.Grid().Ny(); } - for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MaxTotal); k1++) { + for (int32_t k1 = binZmin; k1 <= binZmax && (nNeighUp < MAX_TOTAL); k1++) { int32_t iMin = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmin]; int32_t iMax = lFirstHitInBin[lFirstHitInBinOffsetUp + k1 * nY + binYmax + 1]; GPUCA_UNROLL(U(4), U(2)) - for (int32_t i = iMin; i < iMax && (nNeighUp < MaxTotal); i++) { + for (int32_t i = iMin; i < iMax && (nNeighUp < MAX_TOTAL); i++) { const GPUglobalref() cahit2& hitDataUp = pHitData[lHitNumberOffsetUp + i]; GPUTPCHit h; h.mY = y0Up + (hitDataUp.x) * stepYUp; @@ -159,51 +153,48 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh continue; } -#if MaxGlobal > 0 -#if MaxShared == 0 - if (true) { -#else - if (nNeighUp >= MaxShared) { -#endif - neighUp[nNeighUp - MaxShared] = (calink)i; - yzUp[2 * (nNeighUp - MaxShared)] = s.mDnDx * (h.Y() - y); - yzUp[2 * (nNeighUp - MaxShared) + 1] = s.mDnDx * (h.Z() - z); - } else -#endif - { -#if MaxShared > 0 - s.mB[nNeighUp][iThread] = (calink)i; - s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y); - s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z); -#endif + const bool inGlobal = nNeighUp >= MAX_SHARED; + if constexpr (MAX_GLOBAL > 0) { + if (inGlobal) { + neighUp[nNeighUp - MAX_SHARED] = (calink)i; + yzUp[2 * (nNeighUp - MAX_SHARED)] = s.mDnDx * (h.Y() - y); + yzUp[2 * (nNeighUp - MAX_SHARED) + 1] = s.mDnDx * (h.Z() - z); + } + } + if constexpr (MAX_SHARED > 0) { + if (!inGlobal) { + s.mB[nNeighUp][iThread] = (calink)i; + s.mA1[nNeighUp][iThread] = s.mDnDx * (h.Y() - y); + s.mA2[nNeighUp][iThread] = s.mDnDx * (h.Z() - z); + } } nNeighUp++; } } -#if MaxShared > 0 // init a rest of the shared array - for (int32_t iUp = nNeighUp; iUp < MaxShared; iUp++) { - s.mA1[iUp][iThread] = -1.e10f; - s.mA2[iUp][iThread] = -1.e10f; - s.mB[iUp][iThread] = (calink)-1; + if constexpr (MAX_SHARED > 0) { // init the rest of the shared array + for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) { + s.mA1[iUp][iThread] = -1.e10f; + s.mA2[iUp][iThread] = -1.e10f; + s.mB[iUp][iThread] = (calink)-1; + } } -#endif -#if MaxGlobal > 0 // init a rest of the UnrollGlobal chunk of the global array - int32_t Nrest = nNeighUp - MaxShared; - int32_t N4 = (Nrest / UnrollGlobal) * UnrollGlobal; - if (N4 < Nrest) { - N4 += UnrollGlobal; - GPUCA_UNROLL(U(UnrollGlobal - 1), U(UnrollGlobal - 1)) - for (int32_t k = 0; k < UnrollGlobal - 1; k++) { - if (Nrest + k < N4) { - yzUp[2 * (Nrest + k)] = -1.e10f; - yzUp[2 * (Nrest + k) + 1] = -1.e10f; - neighUp[Nrest + k] = (calink)-1; + const uint32_t Nrest = nNeighUp - MAX_SHARED; + uint32_t N4 = (Nrest / UNROLL_GLOBAL) * UNROLL_GLOBAL; + if constexpr (MAX_GLOBAL > 0) { // init the rest of the UNROLL_GLOBAL chunk of the global array + if (nNeighUp > MAX_SHARED && N4 < Nrest) { + N4 += UNROLL_GLOBAL; + GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1)) + for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) { + if (Nrest + k < N4) { + yzUp[2 * (Nrest + k)] = -1.e10f; + yzUp[2 * (Nrest + k) + 1] = -1.e10f; + neighUp[Nrest + k] = (calink)-1; + } } } } -#endif { // area in the lower row const float yy = y * s.mDnTx; @@ -236,47 +227,49 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh float yDnProjUp = s.mUpDx * (yDn - y); float zDnProjUp = s.mUpDx * (zDn - z); -#if MaxShared > 0 - GPUCA_UNROLL(U(MaxShared), U(MaxShared)) - for (int32_t iUp = 0; iUp < MaxShared; iUp++) { - const float dy = yDnProjUp - s.mA1[iUp][iThread]; - const float dz = zDnProjUp - s.mA2[iUp][iThread]; - const float d = dy * dy + dz * dz; - if (d < bestD) { - bestD = d; - linkDn = i; - linkUp = iUp; - } - } -#endif - -#if MaxGlobal > 0 - for (int32_t iUp = 0; iUp < N4; iUp += UnrollGlobal) { - GPUCA_UNROLL(U(UnrollGlobal), U(UnrollGlobal)) - for (int32_t k = 0; k < UnrollGlobal; k++) { - int32_t jUp = iUp + k; - const float dy = yDnProjUp - yzUp[2 * jUp]; - const float dz = zDnProjUp - yzUp[2 * jUp + 1]; + if constexpr (MAX_SHARED > 0) { + GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED)) + for (uint32_t iUp = 0; iUp < MAX_SHARED; iUp++) { + const float dy = yDnProjUp - s.mA1[iUp][iThread]; + const float dz = zDnProjUp - s.mA2[iUp][iThread]; const float d = dy * dy + dz * dz; if (d < bestD) { bestD = d; linkDn = i; - linkUp = MaxShared + jUp; + linkUp = iUp; + } + } + } + + if constexpr (MAX_GLOBAL > 0) { + if (nNeighUp > MAX_SHARED) { + for (uint32_t iUp = 0; iUp < N4; iUp += UNROLL_GLOBAL) { + GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL)) + for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) { + const uint32_t jUp = iUp + k; + const float dy = yDnProjUp - yzUp[2 * jUp]; + const float dz = zDnProjUp - yzUp[2 * jUp + 1]; + const float d = dy * dy + dz * dz; + if (d < bestD) { + bestD = d; + linkDn = i; + linkUp = MAX_SHARED + jUp; + } + } } } } -#endif } } if (linkUp >= 0) { -#if MaxShared > 0 && MaxGlobal > 0 - linkUp = (linkUp >= MaxShared) ? neighUp[linkUp - MaxShared] : s.mB[linkUp][iThread]; -#elif MaxShared > 0 - linkUp = s.mB[linkUp][iThread]; -#else - linkUp = neighUp[linkUp]; -#endif + if constexpr (MAX_SHARED > 0 && MAX_GLOBAL > 0) { + linkUp = ((uint32_t)linkUp >= MAX_SHARED) ? neighUp[linkUp - MAX_SHARED] : s.mB[linkUp][iThread]; + } else if constexpr (MAX_SHARED > 0) { + linkUp = s.mB[linkUp][iThread]; + } else { + linkUp = neighUp[linkUp]; + } } tracker.mData.mLinkUpData[lHitNumberOffset + ih] = linkUp; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h index 0ecd230a67415..6bdc637b6bad6 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h @@ -40,12 +40,10 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate int32_t mIRow; // row number int32_t mIRowUp; // next row number int32_t mIRowDn; // previous row number -#if GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0 static_assert(GPUCA_MAXN >= GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP); float mA1[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; float mA2[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; calink mB[GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)]; -#endif GPUTPCRow mRow, mRowUp, mRowDown; }; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx index 20dfd69864816..06dac4a68c540 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCStartHitsFinder.cxx @@ -39,36 +39,38 @@ GPUdii() void GPUTPCStartHitsFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nThr uint32_t linkUpData = tracker.mData.mLinkUpData[lHitNumberOffset + ih]; if (tracker.mData.mLinkDownData[lHitNumberOffset + ih] == CALINK_INVAL && linkUpData != CALINK_INVAL && tracker.mData.mLinkUpData[rowUp.mHitNumberOffset + linkUpData] != CALINK_INVAL) { -#if GPUCA_PAR_SORT_STARTHITS > 0 - GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; - uint32_t nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); - if (nextRowStartHits >= tracker.mNMaxRowStartHits) { - tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); - CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); - break; + GPUglobalref() GPUTPCHitId* GPUrestrict() startHits; + uint32_t nextRowStartHits; + if constexpr (GPUCA_PAR_SORT_STARTHITS > 0) { + startHits = tracker.mTrackletTmpStartHits + s.mIRow * tracker.mNMaxRowStartHits; + nextRowStartHits = CAMath::AtomicAddShared(&s.mNRowStartHits, 1u); + if (nextRowStartHits >= tracker.mNMaxRowStartHits) { + tracker.raiseError(GPUErrors::ERROR_ROWSTARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxRowStartHits); + CAMath::AtomicExchShared(&s.mNRowStartHits, tracker.mNMaxRowStartHits); + break; + } + } else { + startHits = tracker.mTrackletStartHits; + nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); + if (nextRowStartHits >= tracker.mNMaxStartHits) { + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); + CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + break; + } } -#else - GPUglobalref() GPUTPCHitId* const GPUrestrict() startHits = tracker.mTrackletStartHits; - uint32_t nextRowStartHits = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, 1u); - if (nextRowStartHits >= tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nextRowStartHits, tracker.mNMaxStartHits); - CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); - break; - } -#endif startHits[nextRowStartHits].Set(s.mIRow, ih); } } GPUbarrier(); -#if GPUCA_PAR_SORT_STARTHITS > 0 - if (iThread == 0) { - uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); - tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; - if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { - tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); - CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + if constexpr (GPUCA_PAR_SORT_STARTHITS > 0) { + if (iThread == 0) { + uint32_t nOffset = CAMath::AtomicAdd(&tracker.mCommonMem->nStartHits, s.mNRowStartHits); + tracker.mRowStartHitCountOffset[s.mIRow] = s.mNRowStartHits; + if (nOffset + s.mNRowStartHits > tracker.mNMaxStartHits) { + tracker.raiseError(GPUErrors::ERROR_STARTHIT_OVERFLOW, tracker.ISector() * 1000 + s.mIRow, nOffset + s.mNRowStartHits, tracker.mNMaxStartHits); + CAMath::AtomicExch(&tracker.mCommonMem->nStartHits, tracker.mNMaxStartHits); + } } } -#endif } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx index e27a8f66ae754..0bf3448bed730 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.cxx @@ -48,11 +48,11 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread int32_t irow = firstRow; - int32_t gap = 0; - int32_t nShared = 0; - int32_t nHits = 0; - const int32_t minHits = tracker.Param().rec.tpc.minNClustersTrackSeed == -1 ? GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracklet.Param().QPt() * tracker.Param().qptB5Scaler) : tracker.Param().rec.tpc.minNClustersTrackSeed; - const int32_t sharingMinNorm = minHits * tracker.Param().rec.tpc.trackletMinSharedNormFactor; + uint32_t gap = 0; + uint32_t nShared = 0; + uint32_t nHits = 0; + const uint32_t minHits = tracker.Param().rec.tpc.minNClustersTrackSeed == -1 ? GPUCA_TRACKLET_SELECTOR_MIN_HITS_B5(tracklet.Param().QPt() * tracker.Param().qptB5Scaler) : tracker.Param().rec.tpc.minNClustersTrackSeed; + const uint32_t sharingMinNorm = minHits * tracker.Param().rec.tpc.trackletMinSharedNormFactor; float maxShared = maxSharedFrac * sharingMinNorm; GPUCA_UNROLL(, U(1)) @@ -63,16 +63,20 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread } if (ih != CALINK_INVAL && ih != CALINK_DEAD_CHANNEL) { GPUglobalref() const GPUTPCRow& row = tracker.Row(irow); - bool own = (tracker.HitWeight(row, ih) <= w); - bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); + const bool own = (tracker.HitWeight(row, ih) <= w); + const bool sharedOK = nShared <= (nHits < sharingMinNorm ? maxShared : nHits * maxSharedFrac); if (own || sharedOK) { // SG!!! gap = 0; -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (nHits < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { - s.mHits[nHits][iThread].Set(irow, ih); - } else -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + const bool inShared = nHits < (uint32_t)GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE; +#pragma GCC diagnostic pop + if constexpr (GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE > 0) { + if (inShared) { + s.mHits[nHits][iThread].Set(irow, ih); + } + } + if (!inShared) { trackHits[nHits - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE].Set(irow, ih); } nHits++; @@ -100,13 +104,17 @@ GPUdii() void GPUTPCTrackletSelector::Thread<0>(int32_t nBlocks, int32_t nThread tracker.Tracks()[itrout].SetParam(tracklet.Param()); tracker.Tracks()[itrout].SetFirstHitID(nFirstTrackHit); tracker.Tracks()[itrout].SetNHits(nHits); - for (int32_t jh = 0; jh < nHits; jh++) { -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - if (jh < GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE) { - tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; - } else -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 - { + for (uint32_t jh = 0; jh < nHits; jh++) { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + const bool inShared = jh < (uint32_t)GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE; +#pragma GCC diagnostic pop + if constexpr (GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE > 0) { + if (inShared) { + tracker.TrackHits()[nFirstTrackHit + jh] = s.mHits[jh][iThread]; + } + } + if (!inShared) { tracker.TrackHits()[nFirstTrackHit + jh] = trackHits[jh - GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE]; } } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h index e5a28c80f37f9..070e02fad8222 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h @@ -36,10 +36,8 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate int32_t mNThreadsTotal; // total n threads int32_t mNTracklets; // n of tracklets int32_t mReserved; // for alignment reasons -#if GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 static_assert(GPUCA_ROW_COUNT >= GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE); GPUTPCHitId mHits[GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)]; -#endif // GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE != 0 }; typedef GPUconstantref() GPUTPCTracker processorType; From e966e71fcd2033de0b95fae6d1e437381b4b9a1a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 10:10:11 +0200 Subject: [PATCH 0412/1914] GPU: Add missing static_assert to guarantee correct unroll factor, will crash otherwise (apparently forgotten by AMD) --- GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx | 1 + 1 file changed, 1 insertion(+) diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 4cdb4d151eba4..6731fed55cc9c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -76,6 +76,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } static constexpr uint32_t UNROLL_GLOBAL = 4; + static_assert(GPUCA_MAXN % UNROLL_GLOBAL == 0); static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; static constexpr uint32_t MAX_TOTAL = MAX_SHARED + MAX_GLOBAL; From 2ab600016e4a20b2e0dfea2cae94105dab3ca942 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 17 Apr 2025 13:40:07 +0200 Subject: [PATCH 0413/1914] GPU: Make some of the optimizations AMD did for the neighbors finder for MI50 optional --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 6 ++++- .../Definitions/GPUDefParametersDefaults.h | 12 +++++++++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../SectorTracker/GPUTPCNeighboursFinder.cxx | 27 ++++++++++--------- GPU/GPUTracking/kernels.cmake | 2 ++ 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index b4dac39ae1cd2..acca74e57a80e 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -263,7 +263,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } if (mProcessingSettings.deterministicGPUReconstruction) { #ifndef GPUCA_DETERMINISTIC_MODE - GPUError("Warning, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); + GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; @@ -274,6 +274,10 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings.createO2Output = 1; } mProcessingSettings.rtc.deterministic = 1; + } else { +#ifdef GPUCA_DETERMINISTIC_MODE + GPUError("WARNING, compiled with GPUCA_DETERMINISTIC_MODE but deterministicGPUReconstruction not set, only compile-time determinism and deterministic math enforced, not fully deterministic!"); +#endif } if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { mProcessingSettings.nTPCClustererLanes = 1; diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index b212abbcd2707..4ee6b23d46b51 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -516,6 +516,12 @@ #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6 #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 4 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 1 + #endif #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 12 #endif @@ -544,6 +550,12 @@ #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP #define GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0 #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL 0 + #endif + #ifndef GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED + #define GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED 0 + #endif #ifndef GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE #define GPUCA_PAR_TRACKLET_SELECTOR_HITS_REG_SIZE 0 #endif diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 34fac6514851c..9d1772379f6bd 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -284,7 +284,7 @@ AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for m AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") -AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6") +AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) AddOption(showOutputStat, bool, false, "", 0, "Print some track output statistics") AddOption(runCompressionStatistics, bool, false, "compressionStat", 0, "Run statistics and verification for cluster compression") AddOption(resetTimers, int8_t, 1, "", 0, "Reset timers every event") diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx index 6731fed55cc9c..54ce7f12c655f 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.cxx @@ -75,7 +75,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh return; } - static constexpr uint32_t UNROLL_GLOBAL = 4; + static constexpr uint32_t UNROLL_GLOBAL = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL > 1 ? GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_GLOBAL : 1; static_assert(GPUCA_MAXN % UNROLL_GLOBAL == 0); static constexpr uint32_t MAX_SHARED = GPUCA_PAR_NEIGHBOURS_FINDER_MAX_NNEIGHUP; static constexpr uint32_t MAX_GLOBAL = (MAX_SHARED < GPUCA_MAXN) ? (((GPUCA_MAXN - MAX_SHARED - 1) / UNROLL_GLOBAL + 1) * UNROLL_GLOBAL) : 0; @@ -173,7 +173,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } - if constexpr (MAX_SHARED > 0) { // init the rest of the shared array + if constexpr (MAX_SHARED > 0 && GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED) { // init the rest of the shared array for (uint32_t iUp = nNeighUp; iUp < MAX_SHARED; iUp++) { s.mA1[iUp][iThread] = -1.e10f; s.mA2[iUp][iThread] = -1.e10f; @@ -181,17 +181,17 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh } } - const uint32_t Nrest = nNeighUp - MAX_SHARED; - uint32_t N4 = (Nrest / UNROLL_GLOBAL) * UNROLL_GLOBAL; - if constexpr (MAX_GLOBAL > 0) { // init the rest of the UNROLL_GLOBAL chunk of the global array - if (nNeighUp > MAX_SHARED && N4 < Nrest) { - N4 += UNROLL_GLOBAL; + const uint32_t nRest = nNeighUp - MAX_SHARED; + uint32_t nRestUnrolled = (nRest / UNROLL_GLOBAL) * UNROLL_GLOBAL; + if constexpr (MAX_GLOBAL > 1) { // init the rest of the UNROLL_GLOBAL chunk of the global array + if (nNeighUp > MAX_SHARED && nRestUnrolled < nRest) { + nRestUnrolled += UNROLL_GLOBAL; GPUCA_UNROLL(U(UNROLL_GLOBAL - 1), U(UNROLL_GLOBAL - 1)) for (uint32_t k = 0; k + 1 < UNROLL_GLOBAL; k++) { - if (Nrest + k < N4) { - yzUp[2 * (Nrest + k)] = -1.e10f; - yzUp[2 * (Nrest + k) + 1] = -1.e10f; - neighUp[Nrest + k] = (calink)-1; + if (nRest + k < nRestUnrolled) { + yzUp[2 * (nRest + k)] = -1.e10f; + yzUp[2 * (nRest + k) + 1] = -1.e10f; + neighUp[nRest + k] = (calink)-1; } } } @@ -229,8 +229,9 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh float zDnProjUp = s.mUpDx * (zDn - z); if constexpr (MAX_SHARED > 0) { + const uint32_t maxSharedUp = GPUCA_PAR_NEIGHBOURS_FINDER_UNROLL_SHARED ? MAX_SHARED : CAMath::Min(nNeighUp, MAX_SHARED); GPUCA_UNROLL(U(MAX_SHARED), U(MAX_SHARED)) - for (uint32_t iUp = 0; iUp < MAX_SHARED; iUp++) { + for (uint32_t iUp = 0; iUp < maxSharedUp; iUp++) { const float dy = yDnProjUp - s.mA1[iUp][iThread]; const float dz = zDnProjUp - s.mA2[iUp][iThread]; const float d = dy * dy + dz * dz; @@ -244,7 +245,7 @@ GPUdii() void GPUTPCNeighboursFinder::Thread<0>(int32_t /*nBlocks*/, int32_t nTh if constexpr (MAX_GLOBAL > 0) { if (nNeighUp > MAX_SHARED) { - for (uint32_t iUp = 0; iUp < N4; iUp += UNROLL_GLOBAL) { + for (uint32_t iUp = 0; iUp < nRestUnrolled; iUp += UNROLL_GLOBAL) { GPUCA_UNROLL(U(UNROLL_GLOBAL), U(UNROLL_GLOBAL)) for (uint32_t k = 0; k < UNROLL_GLOBAL; k++) { const uint32_t jUp = iUp + k; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index ee3af2b87d925..fcf576d828b7f 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -136,6 +136,8 @@ o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALR o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP + NEIGHBOURS_FINDER_UNROLL_GLOBAL + NEIGHBOURS_FINDER_UNROLL_SHARED TRACKLET_SELECTOR_HITS_REG_SIZE ALTERNATE_BORDER_SORT SORT_BEFORE_FIT From 233a4e4bf1f5ea05c218598c05ba734909d3155f Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Fri, 18 Apr 2025 09:32:40 +0200 Subject: [PATCH 0414/1914] Common: DCAFitter explanation to README --- Common/DCAFitter/README.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Common/DCAFitter/README.md b/Common/DCAFitter/README.md index 1699ffb4f8aca..e385378d10caf 100644 --- a/Common/DCAFitter/README.md +++ b/Common/DCAFitter/README.md @@ -93,3 +93,41 @@ In this case the relevant correlation coefficient of the cov.matrix is redefined `DCAFitterN::setBadCovPolicy(DCAFitterN::OverrideAnFlag);` continue fit with overridden cov.matrix but set the propagation failure flag (can be checked using the same `isPropagationFailure(int cand = 0)` method). +## Fit status +The fitter provides a fit status for each candidate, which can be retrieved using: +``` +FitStatus status = ft.getFitStatus(int cand = 0); +``` +The possible values are: +``` +enum FitStatus : uint8_t { // part of the DCAFitterN class + None, // no status set (should not be possible!) + + /* Good Conditions */ + Converged, // fit converged + MaxIter, // max iterations reached before fit convergence (can still be a good vertex) + + /* Error Conditions */ + NoCrossing, // no reasonable crossing was found + RejRadius, // radius of crossing was not acceptable + RejTrackX, // one candidate track x was below the minimum required radius + RejTrackRoughZ, // rejected by rough cut on tracks Z difference + RejChi2Max, // rejected by maximum chi2 cut + FailProp, // propagation of at least prong to PCA failed + FailInvCov, // inversion of cov.-matrix failed + FailInvWeight, // inversion of Ti weight matrix failed + FailInv2ndDeriv, // inversion of 2nd derivatives failed + FailCorrTracks, // correction of tracks to updated x failed + FailCloserAlt, // alternative PCA is closer +}; +``` +This is allows to track where candiate fit was abondended. +``` +int nc = ft.process(tr0,tr1,tr2); +auto status = ft.getFitStatus(); +if (nc) { + // status can either be FitStatus::Converged or FitStatus::MaxIter +} +// status can be on of the error conditions +``` +A more thorough example is given in `testDCAFitterN.cxx`. From 6963217343a8c835231e1494eae7508ac1badb74 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 10 Apr 2025 10:59:24 +0200 Subject: [PATCH 0415/1914] Fix for TPC edge clusters in CTF decoding --- .../DataCompression/GPUTPCDecompressionKernels.cxx | 13 +++++++++++++ .../DataCompression/TPCClusterDecompressor.cxx | 13 +++++++++++++ GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 3 files changed, 27 insertions(+) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx index ee1a9c97cc30b..68e45f0c08c32 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx @@ -57,6 +57,19 @@ GPUdii() void GPUTPCDecompressionKernels::Thread= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]); TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout); + if (processors.param.rec.tpc.clustersEdgeFixDistance > 0.f) { + constexpr GPUTPCGeometry geo; + for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { + auto& cluster = buffer[k]; + if (cluster.getFlags() & ClusterNative::flagEdge) { + auto padF = cluster.getPad(); + float distEdge = padF < geo.NPads(iRow) / 2 ? padF : geo.NPads(iRow) - 1 - padF; + if (distEdge > processors.param.rec.tpc.clustersEdgeFixDistance) { + cluster.setFlags(cluster.getFlags() ^ ClusterNative::flagEdge); + } + } + } + } if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) { for (uint32_t k = 0; k < outputAccess->nClusters[iSector][iRow]; k++) { auto& cl = buffer[k]; diff --git a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx index 296a203cf070b..cd1717faf178d 100644 --- a/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx +++ b/GPU/GPUTracking/DataCompression/TPCClusterDecompressor.cxx @@ -94,6 +94,19 @@ int32_t TPCClusterDecompressor::decompress(const CompressedClusters* clustersCom ClusterNative* clout = buffer + clusters[i][j].size(); uint32_t end = offsets[i][j] + ((i * GPUCA_ROW_COUNT + j >= clustersCompressed->nSliceRows) ? 0 : clustersCompressed->nSliceRowClusters[i * GPUCA_ROW_COUNT + j]); TPCClusterDecompressionCore::decompressHits(*clustersCompressed, offsets[i][j], end, clout); + if (param.rec.tpc.clustersEdgeFixDistance > 0.f) { + constexpr GPUTPCGeometry geo; + for (uint32_t k = 0; k < clustersNative.nClusters[i][j]; k++) { + auto& cluster = buffer[k]; + if (cluster.getFlags() & ClusterNative::flagEdge) { + auto padF = cluster.getPad(); + float distEdge = padF < geo.NPads(j) / 2 ? padF : geo.NPads(j) - 1 - padF; + if (distEdge > param.rec.tpc.clustersEdgeFixDistance) { + cluster.setFlags(cluster.getFlags() ^ ClusterNative::flagEdge); + } + } + } + } if (param.rec.tpc.clustersShiftTimebins != 0.f) { for (uint32_t k = 0; k < clustersNative.nClusters[i][j]; k++) { auto& cl = buffer[k]; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9d1772379f6bd..6858889f9a603 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -72,6 +72,7 @@ AddOptionRTC(tubeChi2, float, 5.f * 5.f, "", 0, "Max chi2 to mark cluster adjace AddOptionRTC(tubeMaxSize2, float, 2.5f * 2.5f, "", 0, "Square of max tube size (normally derrived from tpcTubeChi2)") AddOptionRTC(clustersShiftTimebins, float, 0, "", 0, "Shift of TPC clusters (applied during CTF cluster decoding)") AddOptionRTC(clustersShiftTimebinsClusterizer, float, 0, "", 0, "Shift of TPC clusters (applied during CTF clusterization)") +AddOptionRTC(clustersEdgeFixDistance, float, 0.f, "", 0, "If >0, revert cluster.flag edge bit distance to edge exceeds this parameter (fixed during CTF decoding)") AddOptionRTC(defaultZOffsetOverR, float, 0.5210953f, "", 0, "Shift of TPC clusters (applied during CTF cluster decoding)") AddOptionRTC(PID_EKrangeMin, float, 0.47f, "", 0, "min P of electron/K BB bands crossing") AddOptionRTC(PID_EKrangeMax, float, 0.57f, "", 0, "max P of electron/K BB bands crossing") From 186e9142adfd224096e1c84559a1a9d53c94bc87 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Sat, 19 Apr 2025 11:07:02 +0200 Subject: [PATCH 0416/1914] ctpdev: consistency checker fixes (#14202) * fix * clang --------- Co-authored-by: Roman Lietava --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index faa3dbac3e934..fa7fd673c7e85 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -636,8 +636,13 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, for (auto const& digit : digits) { // if class mask => inps for (int i = 0; i < digit.CTPClassMask.size(); i++) { - if (digit.CTPClassMask[i]) { + if (digit.CTPClassMask[i] & trgclassmask) { const CTPClass* cls = mCTPConfig.getCTPClassFromHWIndex(i); + if (cls == nullptr) { + LOG(error) << "Class mask index not found in CTP config:" << i; + ret = 128; + continue; + } uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { From fc8f84f350a60ef31a78e18310f1b1be84dd1137 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 18 Apr 2025 19:55:43 +0200 Subject: [PATCH 0417/1914] GPU RTC: Don't pass a third launch bounds parameter --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu index 571428dc39e21..3e4d3113fb995 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu @@ -15,7 +15,7 @@ #define GPUCA_GPUCODE_HOSTONLY #define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS -#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args)) +#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) #include "GPUReconstructionCUDAIncludesSystem.h" #include "GPUReconstructionCUDADef.h" From 1de28ad9206a381bcfc569c8f0c1675ada63db65 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 18 Apr 2025 22:51:43 +0200 Subject: [PATCH 0418/1914] GPU: Fix parameter that was forgotten when moving to if constexpr with GPUCA_PAR_... --- GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h | 3 +++ GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 4ee6b23d46b51..57ad9907ca86f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -574,6 +574,9 @@ #ifndef GPUCA_PAR_COMP_GATHER_MODE #define GPUCA_PAR_COMP_GATHER_MODE 0 #endif + #ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK + #define GPUCA_PAR_NO_ATOMIC_PRECHECK 0 + #endif #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE #define GPUCA_PAR_DEDX_STORAGE_TYPE float #endif diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 3b50bec45a41e..ddf01b586cd70 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -631,11 +631,11 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; -#if GPUCA_NO_ATOMIC_PRECHECK == 0 - if (myWeight <= *weight) { - continue; + if constexpr (GPUCA_PAR_NO_ATOMIC_PRECHECK == 0) { + if (myWeight <= *weight) { + continue; + } } -#endif const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); const float y = y0 + hh.x * stepY; const float z = z0 + hh.y * stepZ; From 8ffe16744647bf7792b3e04dfa9df8dbefcb8afd Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 19 Apr 2025 12:28:36 +0200 Subject: [PATCH 0419/1914] GPU: Cleanup, remove obsolete code --- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 1 - .../Base/cuda/GPUReconstructionCUDA.cu | 18 ---------- .../Base/cuda/GPUReconstructionCUDA.h | 1 - .../Base/cuda/GPUReconstructionCUDAKernels.cu | 5 --- .../Base/opencl/GPUReconstructionOCL.cl | 2 -- GPU/GPUTracking/Definitions/GPUDef.h | 6 ---- .../Definitions/GPUDefParametersDefaults.h | 1 - .../Definitions/GPUDefParametersWrapper.h | 5 --- GPU/GPUTracking/Global/GPUChain.h | 1 - .../Global/GPUChainTrackingSectorTracker.cxx | 11 ------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 8 ++--- GPU/GPUTracking/SectorTracker/GPUTPCTracker.h | 33 ++++--------------- .../SectorTracker/GPUTPCTrackingData.h | 8 +---- .../GPUTPCTrackletConstructor.cxx | 18 +++++----- .../SectorTracker/GPUTPCTrackletConstructor.h | 3 -- 15 files changed, 19 insertions(+), 102 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index 163b00c804d7f..dfd6176827484 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -112,7 +112,6 @@ class GPUReconstructionCPU : public GPUReconstructionKernels (size_t)deviceProp.maxTexture1DLinear) { - GPUError("Invalid maximum texture size of device: %ld < %ld\n", (int64_t)deviceProp.maxTexture1DLinear, (int64_t)(GPUCA_SECTOR_DATA_MEMORY * NSECTORS)); - return (1); - } -#endif #ifndef GPUCA_NO_CONSTANT_MEMORY if (gGPUConstantMemBufferSize > deviceProp.totalConstMem) { GPUError("Insufficient constant memory available on GPU %d < %d!", (int32_t)deviceProp.totalConstMem, (int32_t)gGPUConstantMemBufferSize); @@ -627,18 +621,6 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } #ifndef __HIPCC__ // CUDA -int32_t GPUReconstructionCUDA::PrepareTextures() -{ -#ifdef GPUCA_USE_TEXTURES - cudaChannelFormatDesc channelDescu2 = cudaCreateChannelDesc(); - size_t offset; - GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu2, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu2, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); - cudaChannelFormatDesc channelDescu = cudaCreateChannelDesc(); - GPUChkErr(cudaBindTexture(&offset, &gAliTexRefu, mProcessorsShadow->tpcTrackers[0].Data().Memory(), &channelDescu, NSECTORS * GPUCA_SECTOR_DATA_MEMORY)); -#endif - return (0); -} - void GPUReconstructionCUDA::startGPUProfiling() { GPUChkErr(cudaProfilerStart()); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index b1a3a53a6a62f..106168ef961a5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -79,7 +79,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels gAliTexRefu2; -texture gAliTexRefu; -#endif - #include "GPUReconstructionIncludesDeviceAll.h" #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 10a425e4c76e8..ffdc34d6c9881 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -73,8 +73,6 @@ typedef signed char int8_t; #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" -// if (gpu_mem != pTracker.GPUParametersConst()->gpumem) return; //TODO! - #define GPUCA_KRNL(...) GPUCA_KRNLGPU(__VA_ARGS__) #define GPUCA_CONSMEM_PTR GPUglobal() char *gpu_mem, GPUconstant() GPUConstantMem* pConstant, #define GPUCA_CONSMEM (*pConstant) diff --git a/GPU/GPUTracking/Definitions/GPUDef.h b/GPU/GPUTracking/Definitions/GPUDef.h index c77b9ce159306..69723813d384f 100644 --- a/GPU/GPUTracking/Definitions/GPUDef.h +++ b/GPU/GPUTracking/Definitions/GPUDef.h @@ -58,12 +58,6 @@ #define CA_SHARED_CACHE_REF(target, src, size, reftype, ref) GPUglobalref() const reftype* __restrict__ ref = src #endif -#ifdef GPUCA_TEXTURE_FETCH_CONSTRUCTOR - #define CA_TEXTURE_FETCH(type, texture, address, entry) tex1Dfetch(texture, ((char*) address - tracker.Data().GPUTextureBase()) / sizeof(type) + entry); -#else - #define CA_TEXTURE_FETCH(type, texture, address, entry) address[entry]; -#endif - #endif //GPUTPCDEF_H #ifdef GPUCA_CADEBUG diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 57ad9907ca86f..19301ef2bef9f 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -267,7 +267,6 @@ #define GPUCA_PAR_COMP_GATHER_MODE 3 #define GPUCA_PAR_DEDX_STORAGE_TYPE uint16_t #define GPUCA_PAR_MERGER_INTERPOLATION_ERROR_TYPE half - // #define GPUCA_USE_TEXTURES #elif defined(GPUCA_GPUTYPE_OPENCL) #else #error GPU TYPE NOT SET diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h index b2c08d689aeb2..8a54ab2163eab 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h @@ -43,11 +43,6 @@ namespace o2::gpu // #define GPUCA_KERNEL_DEBUGGER_OUTPUT -// Derived parameters -#ifdef GPUCA_USE_TEXTURES - #define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache -#endif - } // namespace o2::gpu // clang-format on diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 290ae32cafca8..c4dccb091fc95 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -216,7 +216,6 @@ class GPUChain inline GPUChain* GetNextChainInQueue() { return mRec->GetNextChainInQueue(); } - virtual int32_t PrepareTextures() { return 0; } virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 64a9179baf0e6..962b0922eeecc 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -103,17 +103,6 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() int32_t streamInitAndOccMap = mRec->NStreams() - 1; if (doGPU) { - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - processorsShadow()->tpcTrackers[iSector].GPUParametersConst()->gpumem = (char*)mRec->DeviceMemoryBase(); - // Initialize Startup Constants - processors()->tpcTrackers[iSector].GPUParameters()->nextStartHit = (((getKernelProperties().minBlocks * BlockCount()) + NSECTORS - 1 - iSector) / NSECTORS) * getKernelProperties().nThreads; - processorsShadow()->tpcTrackers[iSector].SetGPUTextureBase(mRec->DeviceMemoryBase()); - } - - if (PrepareTextures()) { - return (2); - } - // Copy Tracker Object to GPU Memory if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Copying Tracker objects to GPU"); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index ddf01b586cd70..5bd8fd556aa3f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -580,10 +580,8 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric } const GPUTPCTracker& GPUrestrict() tracker = *(Merger -> GetConstantMem()->tpcTrackers + sector); const GPUTPCRow& GPUrestrict() row = tracker.Row(iRow); -#ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); -#endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR if (row.NHits() == 0) { return -1e6f; } @@ -626,8 +624,8 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric } for (int32_t k = 0; k <= nz; k++) { const int32_t mybin = bin + k * nBinsY; - const uint32_t hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin); - const uint32_t hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1); + const uint32_t hitFst = firsthit[mybin]; + const uint32_t hitLst = firsthit[mybin + ny + 1]; for (uint32_t ih = hitFst; ih < hitLst; ih++) { int32_t id = idOffset + ids[ih]; GPUAtomic(uint32_t)* const weight = weights + id; @@ -636,7 +634,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric continue; } } - const cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); + const cahit2 hh = hits[ih]; const float y = y0 + hh.x * stepY; const float z = z0 + hh.y * stepZ; const float dy = y - uncorrectedY; diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h index e8aac872198f5..3bebdc4fa2b06 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTracker.h @@ -59,24 +59,14 @@ class GPUTPCTracker : public GPUProcessor void DumpTrackletHits(std::ostream& out); // Same for Track Hits #endif - struct StructGPUParameters { - GPUAtomic(uint32_t) nextStartHit; // Next Tracklet to process - }; - - struct StructGPUParametersConst { - GPUglobalref() char* gpumem; // Base pointer to GPU memory (Needed for OpenCL for verification) - }; - struct commonMemoryStruct { - commonMemoryStruct() : nStartHits(0), nTracklets(0), nRowHits(0), nTracks(0), nLocalTracks(0), nTrackHits(0), nLocalTrackHits(0), gpuParameters() {} - GPUAtomic(uint32_t) nStartHits; // number of start hits - GPUAtomic(uint32_t) nTracklets; // number of tracklets - GPUAtomic(uint32_t) nRowHits; // number of tracklet hits - GPUAtomic(uint32_t) nTracks; // number of reconstructed tracks - int32_t nLocalTracks; // number of reconstructed tracks before extrapolation tracking - GPUAtomic(uint32_t) nTrackHits; // number of track hits - int32_t nLocalTrackHits; // see above - StructGPUParameters gpuParameters; // GPU parameters + GPUAtomic(uint32_t) nStartHits = 0; // number of start hits + GPUAtomic(uint32_t) nTracklets = 0; // number of tracklets + GPUAtomic(uint32_t) nRowHits = 0; // number of tracklet hits + GPUAtomic(uint32_t) nTracks = 0; // number of reconstructed tracks + int32_t nLocalTracks = 0; // number of reconstructed tracks before extrapolation tracking + GPUAtomic(uint32_t) nTrackHits = 0; // number of track hits + int32_t nLocalTrackHits = 0; // see above }; GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const @@ -205,13 +195,6 @@ class GPUTPCTracker : public GPUProcessor GPUhd() GPUglobalref() GPUTPCRow* TrackingDataRows() const { return (mData.Rows()); } GPUhd() GPUglobalref() int32_t* RowStartHitCountOffset() const { return (mRowStartHitCountOffset); } - GPUhd() GPUglobalref() StructGPUParameters* GPUParameters() const { return (&mCommonMem->gpuParameters); } - GPUhd() StructGPUParametersConst* GPUParametersConst() - { - return (&mGPUParametersConst); - } - GPUhd() const StructGPUParametersConst* GetGPUParametersConst() const { return (&mGPUParametersConst); } - GPUhd() void SetGPUTextureBase(GPUglobalref() const void* val) { mData.SetGPUTextureBase(val); } struct trackSortData { int32_t fTtrack; // Track ID @@ -253,8 +236,6 @@ class GPUTPCTracker : public GPUProcessor GPUglobalref() GPUTPCHitId* mTrackletTmpStartHits = nullptr; // Unsorted start hits GPUglobalref() char* mGPUTrackletTemp = nullptr; // Temp Memory for GPU Tracklet Constructor - StructGPUParametersConst mGPUParametersConst; // Parameters for GPU if this is a GPU tracker - // event GPUglobalref() commonMemoryStruct* mCommonMem = nullptr; // common event memory GPUglobalref() GPUTPCHitId* mTrackletStartHits = nullptr; // start hits for the tracklets diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h index d7d5e76bc9d44..b08fbed4b319d 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackingData.h @@ -29,7 +29,7 @@ class GPUTPCHit; class GPUTPCTrackingData { public: - GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mGPUTextureBase(nullptr), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} + GPUTPCTrackingData() : mNumberOfHits(0), mNumberOfHitsPlusAlign(0), mClusterIdOffset(0), mRows(nullptr), mLinkUpData(nullptr), mLinkDownData(nullptr), mClusterData(nullptr) {} #ifndef GPUCA_GPUCODE_DEVICE ~GPUTPCTrackingData() = default; @@ -112,10 +112,6 @@ class GPUTPCTrackingData GPUhdi() GPUglobalref() GPUAtomic(uint32_t) * HitWeights() { return (mHitWeights); } - GPUhdi() void SetGPUTextureBase(GPUglobalref() const void* val) { mGPUTextureBase = val; } - GPUhdi() char* GPUTextureBase() const { return ((char*)mGPUTextureBase); } - GPUhdi() char* GPUTextureBaseConst() const { return ((char*)mGPUTextureBase); } - GPUhdi() GPUglobalref() const GPUTPCClusterData* ClusterData() const { return mClusterData; } private: @@ -135,8 +131,6 @@ class GPUTPCTrackingData int32_t mNumberOfHitsPlusAlign; int32_t mClusterIdOffset; - GPUglobalref() const void* mGPUTextureBase; // pointer to start of GPU texture - GPUglobalref() GPUTPCRow* mRows; // The row objects needed for most accessor functions GPUglobalref() calink* mLinkUpData; // hit index in the row above which is linked to the given (global) hit index diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx index 71df683eee1dc..2660f6d8cbf44 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.cxx @@ -123,10 +123,10 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, break; // SG!!! - jump over the row } - cahit2 hh = CA_TEXTURE_FETCH(cahit22, gAliTexRefu2, tracker.HitData(row), r.mCurrIH); + cahit2 hh = tracker.HitData(row)[r.mCurrIH]; int32_t seedIH = r.mCurrIH; - r.mCurrIH = CA_TEXTURE_FETCH(calink, gAliTexRefs, tracker.HitLinkUpData(row), r.mCurrIH); + r.mCurrIH = tracker.HitLinkUpData(row)[r.mCurrIH]; float x = row.X(); float y = y0 + hh.x * stepY; @@ -282,10 +282,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, break; } -#ifndef GPUCA_TEXTURE_FETCH_CONSTRUCTOR GPUglobalref() const cahit2* hits = tracker.HitData(row); GPUglobalref() const calink* firsthit = tracker.FirstHitInBin(row); -#endif //! GPUCA_TEXTURE_FETCH_CONSTRUCTOR tracker.GetConstantMem()->calibObjects.fastTransformHelper->InverseTransformYZtoNominalYZ(tracker.ISector(), iRow, yUncorrected, zUncorrected, yUncorrected, zUncorrected); if (tracker.Param().rec.tpc.rejectEdgeClustersInSeeding && tracker.Param().rejectEdgeClusterByY(yUncorrected, iRow, CAMath::Sqrt(tParam.Err2Y()))) { @@ -318,14 +316,14 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, #endif int32_t nBinsY = row.Grid().Ny(); int32_t mybin = bin + k * nBinsY; - uint32_t hitFst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin); - uint32_t hitLst = CA_TEXTURE_FETCH(calink, gAliTexRefu, firsthit, mybin + ny + 1); + uint32_t hitFst = firsthit[mybin]; + uint32_t hitLst = firsthit[mybin + ny + 1]; #ifdef __HIPCC__ // Todo: fixme! for (uint32_t ih = hitFst - 1; ++ih < hitLst; /*ih++*/) { #else for (uint32_t ih = hitFst; ih < hitLst; ih++) { #endif - cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, ih); + cahit2 hh = hits[ih]; float y = y0 + hh.x * stepY; float z = z0 + hh.y * stepZ; float dy = y - yUncorrected; @@ -353,7 +351,7 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, } } - cahit2 hh = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits, best); + cahit2 hh = hits[best]; float y = y0 + hh.x * stepY + tParam.GetY() - yUncorrected; float z = z0 + hh.y * stepZ + tParam.GetZ() - zUncorrected; @@ -390,8 +388,8 @@ GPUdic(2, 1) void GPUTPCTrackletConstructor::UpdateTracklet(int32_t /*nBlocks*/, const GPUglobalref() GPUTPCRow& GPUrestrict() row2 = tracker.Row(r.mLastRow); GPUglobalref() const cahit2* hits1 = tracker.HitData(row1); GPUglobalref() const cahit2* hits2 = tracker.HitData(row2); - const cahit2 hh1 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits1, rowHits[r.mFirstRow]); - const cahit2 hh2 = CA_TEXTURE_FETCH(cahit2, gAliTexRefu2, hits2, rowHits[r.mLastRow]); + const cahit2 hh1 = hits1[rowHits[r.mFirstRow]]; + const cahit2 hh2 = hits2[rowHits[r.mLastRow]]; const float z1 = row1.Grid().ZMin() + hh1.y * row1.HstepZ(); const float z2 = row2.Grid().ZMin() + hh2.y * row2.HstepZ(); float oldOffset = tParam.ZOffset(); diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h index af87d0276f1c7..031c32b2b4334 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackletConstructor.h @@ -63,9 +63,6 @@ class GPUTPCTrackletConstructor : public GPUKernelTemplate struct GPUSharedMemory { CA_SHARED_STORAGE(GPUTPCRow mRows[GPUCA_ROW_COUNT]); // rows - int32_t mNextStartHitFirst; // First start hit to be processed by CUDA block during next iteration - int32_t mNextStartHitCount; // Number of start hits to be processed by CUDA block during next iteration - int32_t mNextStartHitFirstRun; // First run for dynamic scheduler? int32_t mNStartHits; // Total number of start hits #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE From 497d53fdd0e4daaae9cf526a3c1988f8684728a9 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Sun, 20 Apr 2025 12:22:22 +0200 Subject: [PATCH 0420/1914] GPU stream implementation for ONNX runtime (#14117) * Initial set of bug.fixes and cosmetic changes * Please consider the following formatting changes * Adjusting eval sizes. Makes code neater and avoids some calculations * Adding separate functions. Now the host process only needs one instance and one initialization * First version of CCDB implementation * Working CCDB API calls (tested with test-ccdb) * Improve fetching, but have to pass settings by value, not const ref * Using const ref and moving CCDB calls to host initialization * Simplifications and renaming * Please consider the following formatting changes * First version of GPU stream implementation. Still needs testing. * Fixes * Please consider the following formatting changes * Adding the lane variable. This PR will in any case conflict with #14069 * Compiles on EPNs. Need to add shadow processors next. But for this, I will merge https://github.com/AliceO2Group/AliceO2/pull/14069 to have the changes in GPUChainTrackingClusterizer. * Adding shadow instance. Not sure if this correctly allocates GPU memory using AllocateRegisteredMemory * This runs, but will eventually fill up the VRAM. Need to include a mem clean * Found the stream allocation issue. Now starting optimizations * Improve readability and adapt for some comments * Fixing memory assignment issue. Reconstruction runs through with FP32 networks * Major reworkings to add FP16 support * Bug-fixes * Improved data filling speeds by factor 3 * Limiting threads for ONNX evaluation * Bug-fix for correct thread assignment and input data filling * Minor changes * Adding I** inference, potentally needed for CNN + FC inference * CCDB fetching of NNs ported to GPUWorkflowSpec * Adjusting CPU threads and ORT copmile definitions * About 10x speed-up due to explicit io binding * Changes for synchronization and consistency. No performance loss. * Please consider the following formatting changes * Fixing warnings (errors due to size_t) * Fixing linker issues * Adding volatile memory allocation and MockedOrtAllocator. Removing print statements and time measurements * Please consider the following formatting changes * Circumvent "unused result" warning and build failure * Adjust for comments * Please consider the following formatting changes * Fixing build flags --------- Co-authored-by: ALICE Action Bot --- Common/ML/CMakeLists.txt | 21 +- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 2 +- Common/ML/include/ML/OrtInterface.h | 86 +++- Common/ML/src/OrtInterface.cxx | 410 +++++++++++++----- Detectors/TPC/calibration/CMakeLists.txt | 2 + .../TPCCalibration/NeuralNetworkClusterizer.h | 38 ++ .../src/NeuralNetworkClusterizer.cxx | 48 ++ GPU/GPUTracking/Base/GPUReconstructionCPU.h | 8 + .../Base/GPUReconstructionProcessing.h | 5 + GPU/GPUTracking/Base/cuda/CMakeLists.txt | 6 + .../Base/cuda/GPUReconstructionCUDA.cu | 42 ++ .../Base/cuda/GPUReconstructionCUDA.h | 6 + GPU/GPUTracking/Base/hip/CMakeLists.txt | 6 + GPU/GPUTracking/CMakeLists.txt | 1 + .../Definitions/GPUDefParametersDefaults.h | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 14 +- GPU/GPUTracking/Global/GPUChain.h | 1 + .../Global/GPUChainTrackingClusterizer.cxx | 250 ++++++++--- .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 78 +++- .../TPCClusterFinder/GPUTPCNNClusterizer.h | 33 +- .../GPUTPCNNClusterizerHost.cxx | 191 +++++++- .../GPUTPCNNClusterizerHost.h | 39 +- .../GPUTPCNNClusterizerKernels.cxx | 393 ++++++++++------- .../GPUTPCNNClusterizerKernels.h | 10 +- GPU/GPUTracking/kernels.cmake | 1 + .../include/GPUWorkflow/GPUWorkflowSpec.h | 3 + GPU/Workflow/src/GPUWorkflowSpec.cxx | 45 ++ 27 files changed, 1301 insertions(+), 439 deletions(-) create mode 100644 Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h create mode 100644 Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 540fe8ebf271c..2db91fc4f4320 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -9,21 +9,14 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. -# Pass ORT variables as a preprocessor definition -if(ORT_ROCM_BUILD) - add_compile_definitions(ORT_ROCM_BUILD=1) -endif() -if(ORT_CUDA_BUILD) - add_compile_definitions(ORT_CUDA_BUILD=1) -endif() -if(ORT_MIGRAPHX_BUILD) - add_compile_definitions(ORT_MIGRAPHX_BUILD=1) -endif() -if(ORT_TENSORRT_BUILD) - add_compile_definitions(ORT_TENSORRT_BUILD=1) -endif() - o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName PRIVATE_LINK_LIBRARIES O2::Framework ONNXRuntime::ONNXRuntime) + +# Pass ORT variables as a preprocessor definition +target_compile_definitions(${targetName} PRIVATE + $<$:ORT_ROCM_BUILD> + $<$:ORT_CUDA_BUILD> + $<$:ORT_MIGRAPHX_BUILD> + $<$:ORT_TENSORRT_BUILD>) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index 76fd6734cf9db..9516ba5dad573 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -882,4 +882,4 @@ static_assert(sizeof(BFloat16_t) == sizeof(uint16_t), "Sizes must match"); } // namespace OrtDataType } // namespace o2 -#endif \ No newline at end of file +#endif diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 93549178848ca..e37b6a69b6036 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -26,6 +26,13 @@ // O2 includes #include "Framework/Logger.h" +namespace Ort +{ +struct SessionOptions; +struct MemoryInfo; +struct Env; +} // namespace Ort + namespace o2 { @@ -36,14 +43,52 @@ class OrtModel { public: - // Constructor + // Constructors & destructors OrtModel() = default; - OrtModel(std::unordered_map optionsMap) { reset(optionsMap); } - void init(std::unordered_map optionsMap) { reset(optionsMap); } - void reset(std::unordered_map); + OrtModel(std::unordered_map optionsMap) { init(optionsMap); } + void init(std::unordered_map optionsMap) + { + initOptions(optionsMap); + initEnvironment(); + } + virtual ~OrtModel() = default; + + // General purpose + void initOptions(std::unordered_map optionsMap); + void initEnvironment(); + void initSession(); + void memoryOnDevice(int32_t = 0); bool isInitialized() { return mInitialized; } + void resetSession(); - virtual ~OrtModel() = default; + // Getters + std::vector> getNumInputNodes() const { return mInputShapes; } + std::vector> getNumOutputNodes() const { return mOutputShapes; } + std::vector getInputNames() const { return mInputNames; } + std::vector getOutputNames() const { return mOutputNames; } + Ort::SessionOptions* getSessionOptions(); + Ort::MemoryInfo* getMemoryInfo(); + Ort::Env* getEnv(); + int32_t getIntraOpNumThreads() const { return intraOpNumThreads; } + int32_t getInterOpNumThreads() const { return interOpNumThreads; } + + // Setters + void setDeviceId(int32_t id) { deviceId = id; } + void setIO(); + void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void setIntraOpNumThreads(int threads) + { + if (deviceType == "CPU") { + intraOpNumThreads = threads; + } + } + void setInterOpNumThreads(int threads) + { + if (deviceType == "CPU") { + interOpNumThreads = threads; + } + } + void setEnv(Ort::Env*); // Conversion template @@ -53,41 +98,36 @@ class OrtModel template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h std::vector inference(std::vector&); - template // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h + template std::vector inference(std::vector>&); - template // class I is the input data type, e.g. float, class O is the output data type, e.g. OrtDataType::Float16_t from O2/Common/ML/include/ML/GPUORTFloat16.h - void inference(I*, size_t, O*); - - // template // class I is the input data type, e.g. float, class T the throughput data type and class O is the output data type - // std::vector inference(std::vector&); - - // Reset session - void resetSession(); + template + void inference(I*, int64_t, O*); - std::vector> getNumInputNodes() const { return mInputShapes; } - std::vector> getNumOutputNodes() const { return mOutputShapes; } - std::vector getInputNames() const { return mInputNames; } - std::vector getOutputNames() const { return mOutputNames; } + template + void inference(I**, int64_t, O*); - void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void release(bool = false); private: - // ORT variables -> need to be hidden as Pimpl + // ORT variables -> need to be hidden as pImpl struct OrtVariables; OrtVariables* pImplOrt; // Input & Output specifications of the loaded network std::vector inputNamesChar, outputNamesChar; std::vector mInputNames, mOutputNames; - std::vector> mInputShapes, mOutputShapes; + std::vector> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes + std::vector inputSizePerNode, outputSizePerNode; // Output shapes + int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs // Environment settings bool mInitialized = false; - std::string modelPath, device = "cpu", dtype = "float", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda - int intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = 0, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda + int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; std::string printShape(const std::vector&); + std::string printShape(const std::vector>&, std::vector&); }; } // namespace ml diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index 88f548bd4fe7b..24a2fbffb252c 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -33,11 +33,12 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c Ort::SessionOptions sessionOptions; Ort::AllocatorWithDefaultOptions allocator; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault); + std::unique_ptr ioBinding = nullptr; }; -void OrtModel::reset(std::unordered_map optionsMap) +// General purpose +void OrtModel::initOptions(std::unordered_map optionsMap) { - pImplOrt = new OrtVariables(); // Load from options map @@ -47,77 +48,60 @@ void OrtModel::reset(std::unordered_map optionsMap) if (!optionsMap["model-path"].empty()) { modelPath = optionsMap["model-path"]; - device = (optionsMap.contains("device") ? optionsMap["device"] : "CPU"); - dtype = (optionsMap.contains("dtype") ? optionsMap["dtype"] : "float"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : 0); + deviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); + deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - - std::string dev_mem_str = "Hip"; -#if defined(ORT_ROCM_BUILD) - if (device == "ROCM") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) ROCM execution provider set"; - } -#endif -#if defined(ORT_MIGRAPHX_BUILD) - if (device == "MIGRAPHX") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) MIGraphX execution provider set"; - } -#endif -#if defined(ORT_CUDA_BUILD) - if (device == "CUDA") { - Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, deviceId)); - LOG(info) << "(ORT) CUDA execution provider set"; - dev_mem_str = "Cuda"; + envName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); + + if (deviceType == "CPU") { + (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); + (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); + if (intraOpNumThreads > 1 || interOpNumThreads > 1) { + (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); + } else if (intraOpNumThreads == 1) { + (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + } + if (loggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + } } -#endif - if (allocateDeviceMemory) { - pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceId, OrtMemType::OrtMemTypeDefault); - LOG(info) << "(ORT) Memory info set to on-device memory"; - } + // OrtROCMProviderOptions rocm_options{}; + // (pImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); - if (device == "CPU") { - (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); - if (intraOpNumThreads > 1 || interOpNumThreads > 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); - } else if (intraOpNumThreads == 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); - } - if (loggingLevel < 2) { - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; - } - } - - (pImplOrt->sessionOptions).DisableMemPattern(); - (pImplOrt->sessionOptions).DisableCpuMemArena(); + (pImplOrt->sessionOptions).DisableMemPattern(); + (pImplOrt->sessionOptions).DisableCpuMemArena(); - if (enableProfiling) { - if (optionsMap.contains("profiling-output-path")) { - (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + if (enableProfiling) { + if (optionsMap.contains("profiling-output-path")) { + (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + } else { + LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; + (pImplOrt->sessionOptions).DisableProfiling(); + } } else { - LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; (pImplOrt->sessionOptions).DisableProfiling(); } - } else { - (pImplOrt->sessionOptions).DisableProfiling(); - } - mInitialized = true; + (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); + (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); - (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); - (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); + mInitialized = true; + } else { + LOG(fatal) << "(ORT) Model path cannot be empty!"; + } +} +void OrtModel::initEnvironment() +{ pImplOrt->env = std::make_shared( OrtLoggingLevel(loggingLevel), - (optionsMap["onnx-environment-name"].empty() ? "onnx_model_inference" : optionsMap["onnx-environment-name"].c_str()), + (envName.empty() ? "ORT" : envName.c_str()), // Integrate ORT logging into Fairlogger [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) { if (severity == ORT_LOGGING_LEVEL_VERBOSE) { @@ -136,31 +120,48 @@ void OrtModel::reset(std::unordered_map optionsMap) }, (void*)3); (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events - pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); +} - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); - } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); +void OrtModel::initSession() +{ + if (allocateDeviceMemory) { + memoryOnDevice(deviceId); } + pImplOrt->session = std::make_shared(*pImplOrt->env, modelPath.c_str(), pImplOrt->sessionOptions); + pImplOrt->ioBinding = std::make_unique(*pImplOrt->session); + + setIO(); - inputNamesChar.resize(mInputNames.size(), nullptr); - std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), - [&](const std::string& str) { return str.c_str(); }); - outputNamesChar.resize(mOutputNames.size(), nullptr); - std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), - [&](const std::string& str) { return str.c_str(); }); - } if (loggingLevel < 2) { - LOG(info) << "(ORT) Model loaded successfully! (input: " << printShape(mInputShapes[0]) << ", output: " << printShape(mOutputShapes[0]) << ")"; + LOG(info) << "(ORT) Model loaded successfully! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")"; + } +} + +void OrtModel::memoryOnDevice(int32_t deviceIndex) +{ +#if (defined(ORT_ROCM_BUILD) || defined(ORT_MIGRAPHX_BUILD) || defined(ORT_CUDA_BUILD) || defined(ORT_TENSORRT_BUILD)) + if (deviceIndex >= 0) { + (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); + (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h + (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (pImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + // Arena memory shrinkage comes at performance cost + /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; + // (pImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + + std::string dev_mem_str = ""; + if (deviceType == "ROCM") { + dev_mem_str = "Hip"; + } + if (deviceType == "CUDA") { + dev_mem_str = "Cuda"; + } + pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); + if (loggingLevel < 2) { + LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; + } } +#endif } void OrtModel::resetSession() @@ -168,6 +169,22 @@ void OrtModel::resetSession() pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); } +// Getters +Ort::SessionOptions* OrtModel::getSessionOptions() +{ + return &pImplOrt->sessionOptions; +} + +Ort::MemoryInfo* OrtModel::getMemoryInfo() +{ + return &pImplOrt->memoryInfo; +} + +Ort::Env* OrtModel::getEnv() +{ + return (pImplOrt->env).get(); +} + template std::vector OrtModel::v2v(std::vector& input, bool clearInput) { @@ -183,20 +200,70 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) } } -std::string OrtModel::printShape(const std::vector& v) +void OrtModel::setIO() { - std::stringstream ss(""); - for (size_t i = 0; i < v.size() - 1; i++) { - ss << v[i] << "x"; + for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { + mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); } - ss << v[v.size() - 1]; - return ss.str(); + for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { + mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + } + for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { + mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); + } + for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { + mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + } + + inputNamesChar.resize(mInputNames.size(), nullptr); + std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), + [&](const std::string& str) { return str.c_str(); }); + outputNamesChar.resize(mOutputNames.size(), nullptr); + std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), + [&](const std::string& str) { return str.c_str(); }); + + inputShapesCopy = mInputShapes; + outputShapesCopy = mOutputShapes; + inputSizePerNode.resize(mInputShapes.size(), 1); + outputSizePerNode.resize(mOutputShapes.size(), 1); + mInputsTotal = 1; + for (size_t i = 0; i < mInputShapes.size(); ++i) { + if (mInputShapes[i].size() > 0) { + for (size_t j = 1; j < mInputShapes[i].size(); ++j) { + if (mInputShapes[i][j] > 0) { + mInputsTotal *= mInputShapes[i][j]; + inputSizePerNode[i] *= mInputShapes[i][j]; + } + } + } + } + mOutputsTotal = 1; + for (size_t i = 0; i < mOutputShapes.size(); ++i) { + if (mOutputShapes[i].size() > 0) { + for (size_t j = 1; j < mOutputShapes[i].size(); ++j) { + if (mOutputShapes[i][j] > 0) { + mOutputsTotal *= mOutputShapes[i][j]; + outputSizePerNode[i] *= mOutputShapes[i][j]; + } + } + } + } +} + +void OrtModel::setEnv(Ort::Env* env) +{ + pImplOrt->env = std::shared_ptr(env); } +// Inference template std::vector OrtModel::inference(std::vector& input) { - std::vector inputShape{(int64_t)(input.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + std::vector inputShape = mInputShapes[0]; + inputShape[0] = input.size(); + for (size_t i = 1; i < mInputShapes[0].size(); ++i) { + inputShape[0] /= mInputShapes[0][i]; + } std::vector inputTensor; if constexpr (std::is_same_v) { inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); @@ -212,51 +279,182 @@ std::vector OrtModel::inference(std::vector& input) } template std::vector OrtModel::inference(std::vector&); - template std::vector OrtModel::inference(std::vector&); - template std::vector OrtModel::inference(std::vector&); template -void OrtModel::inference(I* input, size_t input_size, O* output) +void OrtModel::inference(I* input, int64_t input_size, O* output) { - std::vector inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + // std::vector providers = Ort::GetAvailableProviders(); + // for (const auto& provider : providers) { + // LOG(info) << "Available Execution Provider: " << provider; + // } + std::vector inputShape{input_size, (int64_t)mInputShapes[0][1]}; Ort::Value inputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size, inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } else { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } + (pImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); - std::vector outputShape{inputShape[0], mOutputShapes[0][1]}; - size_t outputSize = (int64_t)(input_size * mOutputShapes[0][1] / mInputShapes[0][1]); - Ort::Value outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size()); + std::vector outputShape{input_size, mOutputShapes[0][1]}; + Ort::Value outputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + } else { + outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + } + (pImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); - (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, outputNamesChar.size()); // TODO: Not sure if 1 is correct here + (pImplOrt->session)->Run(pImplOrt->runOptions, *pImplOrt->ioBinding); } -template void OrtModel::inference(OrtDataType::Float16_t*, size_t, float*); +template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, float*); +template void OrtModel::inference(float*, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(float*, int64_t, float*); + +template +void OrtModel::inference(I** input, int64_t input_size, O* output) +{ + std::vector inputTensors(inputShapesCopy.size()); + + for (size_t i = 0; i < inputShapesCopy.size(); ++i) { + + inputShapesCopy[i][0] = input_size; // batch-size + outputShapesCopy[i][0] = input_size; // batch-size + + if constexpr (std::is_same_v) { + inputTensors[i] = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(input[i]), + inputSizePerNode[i] * input_size, + inputShapesCopy[i].data(), + inputShapesCopy[i].size()); + } else { + inputTensors[i] = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + input[i], + inputSizePerNode[i] * input_size, + inputShapesCopy[i].data(), + inputShapesCopy[i].size()); + } + } + + Ort::Value outputTensor = Ort::Value(nullptr); + if constexpr (std::is_same_v) { + outputTensor = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(output), + outputSizePerNode[0] * input_size, // assumes that there is only one output node + outputShapesCopy[0].data(), + outputShapesCopy[0].size()); + } else { + outputTensor = Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + output, + outputSizePerNode[0] * input_size, // assumes that there is only one output node + outputShapesCopy[0].data(), + outputShapesCopy[0].size()); + } + + // === Run inference === + pImplOrt->session->Run( + pImplOrt->runOptions, + inputNamesChar.data(), + inputTensors.data(), + inputNamesChar.size(), + outputNamesChar.data(), + &outputTensor, + outputNamesChar.size()); +} -template void OrtModel::inference(float*, size_t, float*); +template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, float*); +template void OrtModel::inference(float**, int64_t, OrtDataType::Float16_t*); +template void OrtModel::inference(float**, int64_t, float*); template -std::vector OrtModel::inference(std::vector>& input) +std::vector OrtModel::inference(std::vector>& inputs) { - std::vector inputTensor; - for (auto i : input) { - std::vector inputShape{(int64_t)(i.size() / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]}; + std::vector input_tensors; + + for (size_t i = 0; i < inputs.size(); ++i) { + + inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size + if constexpr (std::is_same_v) { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(i.data()), i.size(), inputShape.data(), inputShape.size())); + input_tensors.emplace_back( + Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + reinterpret_cast(inputs[i].data()), + inputSizePerNode[i] * inputShapesCopy[i][0], + inputShapesCopy[i].data(), + inputShapesCopy[i].size())); } else { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, i.data(), i.size(), inputShape.data(), inputShape.size())); + input_tensors.emplace_back( + Ort::Value::CreateTensor( + pImplOrt->memoryInfo, + inputs[i].data(), + inputSizePerNode[i] * inputShapesCopy[i][0], + inputShapesCopy[i].data(), + inputShapesCopy[i].size())); } } - // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); - O* outputValues = reinterpret_cast(outputTensors[0].template GetTensorMutableData()); - std::vector outputValuesVec{outputValues, outputValues + inputTensor.size() / mInputShapes[0][1] * mOutputShapes[0][1]}; - outputTensors.clear(); - return outputValuesVec; + + int32_t totalOutputSize = mOutputsTotal * inputShapesCopy[0][0]; + + // === Run inference === + auto output_tensors = pImplOrt->session->Run( + pImplOrt->runOptions, + inputNamesChar.data(), + input_tensors.data(), + input_tensors.size(), + outputNamesChar.data(), + outputNamesChar.size()); + + // === Extract output values === + O* output_data = output_tensors[0].template GetTensorMutableData(); + std::vector output_vec(output_data, output_data + totalOutputSize); + output_tensors.clear(); + return output_vec; +} + +template std::vector OrtModel::inference(std::vector>&); +template std::vector OrtModel::inference(std::vector>&); + +// Release session +void OrtModel::release(bool profilingEnabled) +{ + // if (profilingEnabled) { + // pImplOrt->session->EndProfiling(); + // } + LOG(info) << "(ORT) Size of pImplOrt: " << sizeof(*pImplOrt) << " bytes"; +} + +// private +std::string OrtModel::printShape(const std::vector& v) +{ + std::stringstream ss(""); + for (size_t i = 0; i < v.size() - 1; i++) { + ss << v[i] << "x"; + } + ss << v[v.size() - 1]; + return ss.str(); +} + +std::string OrtModel::printShape(const std::vector>& v, std::vector& n) +{ + std::stringstream ss(""); + for (size_t i = 0; i < v.size(); i++) { + ss << n[i] << " -> ("; + for (size_t j = 0; j < v[i].size() - 1; j++) { + ss << v[i][j] << "x"; + } + ss << v[i][v[i].size() - 1] << "); "; + } + return ss.str(); } } // namespace ml diff --git a/Detectors/TPC/calibration/CMakeLists.txt b/Detectors/TPC/calibration/CMakeLists.txt index 0ec62e5f323b3..7722fc4e2884f 100644 --- a/Detectors/TPC/calibration/CMakeLists.txt +++ b/Detectors/TPC/calibration/CMakeLists.txt @@ -25,6 +25,7 @@ o2_add_library(TPCCalibration src/CalibPadGainTracksBase.cxx src/CalibLaserTracks.cxx src/LaserTracksCalibrator.cxx + src/NeuralNetworkClusterizer.cxx src/SACDecoder.cxx src/IDCAverageGroup.cxx src/IDCAverageGroupBase.cxx @@ -82,6 +83,7 @@ o2_target_root_dictionary(TPCCalibration include/TPCCalibration/FastHisto.h include/TPCCalibration/CalibLaserTracks.h include/TPCCalibration/LaserTracksCalibrator.h + include/TPCCalibration/NeuralNetworkClusterizer.h include/TPCCalibration/SACDecoder.h include/TPCCalibration/IDCAverageGroup.h include/TPCCalibration/IDCAverageGroupBase.h diff --git a/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h b/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h new file mode 100644 index 0000000000000..196bba644714c --- /dev/null +++ b/Detectors/TPC/calibration/include/TPCCalibration/NeuralNetworkClusterizer.h @@ -0,0 +1,38 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file NeuralNetworkClusterizer.h +/// \brief Fetching neural networks for clusterization from CCDB +/// \author Christian Sonnabend + +#ifndef AliceO2_TPC_NeuralNetworkClusterizer_h +#define AliceO2_TPC_NeuralNetworkClusterizer_h + +#include "CCDB/CcdbApi.h" + +namespace o2::tpc +{ + +class NeuralNetworkClusterizer +{ + public: + NeuralNetworkClusterizer() = default; + void initCcdbApi(std::string url); + void loadIndividualFromCCDB(std::map settings); + + private: + o2::ccdb::CcdbApi ccdbApi; + std::map metadata; + std::map headers; +}; + +} // namespace o2::tpc +#endif diff --git a/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx b/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx new file mode 100644 index 0000000000000..bfbb7afc946f8 --- /dev/null +++ b/Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx @@ -0,0 +1,48 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file NeuralNetworkClusterizer.cxx +/// \brief Fetching neural networks for clusterization from CCDB +/// \author Christian Sonnabend + +#include +#include "TPCCalibration/NeuralNetworkClusterizer.h" + +using namespace o2::tpc; + +void NeuralNetworkClusterizer::initCcdbApi(std::string url) +{ + ccdbApi.init(url); +} + +void NeuralNetworkClusterizer::loadIndividualFromCCDB(std::map settings) +{ + metadata["inputDType"] = settings["inputDType"]; + metadata["outputDType"] = settings["outputDType"]; + metadata["nnCCDBEvalType"] = settings["nnCCDBEvalType"]; // classification_1C, classification_2C, regression_1C, regression_2C + metadata["nnCCDBWithMomentum"] = settings["nnCCDBWithMomentum"]; // 0, 1 -> Only for regression model + metadata["nnCCDBLayerType"] = settings["nnCCDBLayerType"]; // FC, CNN + if (settings["nnCCDBInteractionRate"] != "" && std::stoi(settings["nnCCDBInteractionRate"]) > 0) { + metadata["nnCCDBInteractionRate"] = settings["nnCCDBInteractionRate"]; + } + if (settings["nnCCDBBeamType"] != "") { + metadata["nnCCDBBeamType"] = settings["nnCCDBBeamType"]; + } + + bool retrieveSuccess = ccdbApi.retrieveBlob(settings["nnCCDBPath"], settings["outputFolder"], metadata, 1, false, settings["outputFile"]); + // headers = ccdbApi.retrieveHeaders(settings["nnPathCCDB"], metadata, 1); // potentially needed to init some local variables + + if (retrieveSuccess) { + LOG(info) << "Network " << settings["nnCCDBPath"] << " retrieved from CCDB, stored at " << settings["outputFile"]; + } else { + LOG(error) << "Failed to retrieve network from CCDB"; + } +} diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index dfd6176827484..b37bf2b75f01c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -24,6 +24,11 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionKernels.h" +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { @@ -108,6 +113,9 @@ class GPUReconstructionCPU : public GPUReconstructionKernels #include +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 3655eaf66055e..f595fb051db54 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,6 +114,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -121,6 +122,11 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) + target_compile_definitions(${targetName} PRIVATE + GPUCA_HAS_ONNX=1 + $<$:ORT_CUDA_BUILD> + $<$:ORT_TENSORRT_BUILD>) + install(FILES ${HDRS} DESTINATION include/GPU) endif() diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 3b5e257cc8000..fe2906caace80 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -37,6 +37,10 @@ #undef GPUCA_KRNL #endif +#ifdef GPUCA_HAS_ONNX +#include +#endif + static constexpr size_t REQUIRE_MIN_MEMORY = 1024L * 1024 * 1024; static constexpr size_t REQUIRE_MEMORY_RESERVED = 512L * 1024 * 1024; static constexpr size_t REQUIRE_FREE_MEMORY_RESERVED_PER_SM = 40L * 1024 * 1024; @@ -630,6 +634,28 @@ void GPUReconstructionCUDA::endGPUProfiling() { GPUChkErr(cudaProfilerStop()); } + +void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) +{ +#ifdef ORT_CUDA_BUILD + cudaGetDevice(deviceId); + OrtCUDAProviderOptionsV2* cuda_options = nullptr; + CreateCUDAProviderOptions(&cuda_options); + + // std::vector keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"}; + // std::vector values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"}; + // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); + + // this implicitly sets "has_user_compute_stream" + cuda_options.has_user_compute_stream = 1; + UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream]); + session_options.AppendExecutionProvider_CUDA_V2(cuda_options); + + // Finally, don't forget to release the provider options + ReleaseCUDAProviderOptions(cuda_options); +#endif // ORT_CUDA_BUILD +} + #else // HIP void* GPUReconstructionHIP::getGPUPointer(void* ptr) { @@ -637,6 +663,22 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr) GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); return retVal; } + +void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) +{ +#ifdef ORT_ROCM_BUILD + // Create ROCm provider options + cudaGetDevice(deviceId); + // const auto& api = Ort::GetApi(); + // api.GetCurrentGpuDeviceId(deviceId); + OrtROCMProviderOptions rocm_options; + rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream + rocm_options.arena_extend_strategy = 0; // kNextPowerOfTwo = 0, kSameAsRequested = 1 -> https://github.com/search?q=repo%3Amicrosoft%2Fonnxruntime%20kSameAsRequested&type=code + // rocm_options.gpu_mem_limit = 1073741824; // 0 means no limit + rocm_options.user_compute_stream = mInternals->Streams[stream]; + session_options.AppendExecutionProvider_ROCM(rocm_options); +#endif // ORT_ROCM_BUILD +} #endif // __HIPCC__ namespace o2::gpu diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 106168ef961a5..2fc4d14bba491 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -25,6 +25,11 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg); #endif +namespace Ort +{ +struct SessionOptions; +} + namespace o2::gpu { struct GPUReconstructionCUDAInternals; @@ -74,6 +79,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame) override; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 3a03a054d4a7e..d7adb222d547b 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,6 +162,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -169,6 +170,11 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) + target_compile_definitions(${targetName} PRIVATE + GPUCA_HAS_ONNX=1 + $<$:ORT_ROCM_BUILD> + $<$:ORT_MIGRAPHX_BUILD>) + install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index eaeec508ff27a..e82799b9e59c3 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -341,6 +341,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML + PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index 19301ef2bef9f..a56fb97771fe5 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -492,6 +492,7 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels + #define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNSingleElement GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 6858889f9a603..1106f96ed1cb2 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -248,7 +248,8 @@ AddOption(applyNNclusterizer, int, 0, "", 0, "(bool, default = 0), if the neural AddOption(nnInferenceDevice, std::string, "CPU", "", 0, "(std::string) Specify inference device (cpu (default), rocm, cuda)") AddOption(nnInferenceDeviceId, unsigned int, 0, "", 0, "(unsigned int) Specify inference device id") AddOption(nnInferenceAllocateDevMem, int, 0, "", 0, "(bool, default = 0), if the device memory should be allocated for inference") -AddOption(nnInferenceDtype, std::string, "fp32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceInputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (FP32: default, fp16)") // fp32 or fp16 +AddOption(nnInferenceOutputDType, std::string, "FP32", "", 0, "(std::string) Specify the datatype for which inference is performed (fp32: default, fp16)") // fp32 or fp16 AddOption(nnInferenceIntraOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetIntraOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") AddOption(nnInferenceInterOpNumThreads, int, 1, "", 0, "Number of threads used to evaluate one neural network (ONNX: SetInterOpNumThreads). 0 = auto-detect, can lead to problems on SLURM systems.") AddOption(nnInferenceEnableOrtOptimization, unsigned int, 99, "", 0, "Enables graph optimizations in ONNX Runtime. Can be [0, 1, 2, 99] -> see https://github.com/microsoft/onnxruntime/blob/3f71d637a83dc3540753a8bb06740f67e926dc13/include/onnxruntime/core/session/onnxruntime_c_api.h#L347") @@ -269,6 +270,17 @@ AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The c AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.") AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path") AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") +AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)") +// CCDB +AddOption(nnLoadFromCCDB, int, 1, "", 0, "If 1 networks are fetched from ccdb, else locally") +AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched") +AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched") +AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks") +AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression") +AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN") +AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN") +AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp") +AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].") AddHelp("help", 'h') EndConfig() diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index c4dccb091fc95..1e99e3b73736f 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -83,6 +83,7 @@ class GPUChain inline GPUParam& param() { return mRec->param(); } inline const GPUConstantMem* processors() const { return mRec->processors(); } inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); } + inline void SetONNXGPUStream(Ort::SessionOptions& opt, int32_t stream, int32_t* deviceId) { mRec->SetONNXGPUStream(opt, stream, deviceId); } inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); } inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true) { diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 4047dcae0a6b3..7db0ba66305e9 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -611,49 +611,89 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) RunTPCClusterizer_prepare(true); // Restore some pointers, allocated by the other pipeline, and set to 0 by SetupGPUProcessor (since not allocated in this pipeline) } + if (doGPU && mIOPtrs.tpcZS) { + processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); + } + if (doGPU) { + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); + } + #ifdef GPUCA_HAS_ONNX + const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; + GPUTPCNNClusterizerHost nnApplications[GetProcessingSettings().nTPCClustererLanes]; + if (GetProcessingSettings().nn.applyNNclusterizer) { - uint32_t maxClusters = -1; - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters); - } - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { - GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; - const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; - clustererNN.nnClusterizerUseCfRegression = nn_settings.nnClusterizerUseCfRegression; - clustererNN.nnClusterizerSizeInputRow = nn_settings.nnClusterizerSizeInputRow; - clustererNN.nnClusterizerSizeInputPad = nn_settings.nnClusterizerSizeInputPad; - clustererNN.nnClusterizerSizeInputTime = nn_settings.nnClusterizerSizeInputTime; - clustererNN.nnClusterizerAddIndexData = nn_settings.nnClusterizerAddIndexData; - clustererNN.nnClusterizerElementSize = ((2 * nn_settings.nnClusterizerSizeInputRow + 1) * (2 * nn_settings.nnClusterizerSizeInputPad + 1) * (2 * nn_settings.nnClusterizerSizeInputTime + 1)) + (nn_settings.nnClusterizerAddIndexData ? 3 : 0); - clustererNN.nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode; - clustererNN.nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue; - clustererNN.nnClusterizerTotalClusters = maxClusters; - clustererNN.nnClassThreshold = nn_settings.nnClassThreshold; - clustererNN.nnSigmoidTrafoClassThreshold = nn_settings.nnSigmoidTrafoClassThreshold; - if (clustererNN.nnSigmoidTrafoClassThreshold) { - clustererNN.nnClassThreshold = (float)std::log(clustererNN.nnClassThreshold / (1.f - clustererNN.nnClassThreshold)); + int32_t deviceId = -1; + int32_t numLanes = GetProcessingSettings().nTPCClustererLanes; + int32_t maxThreads = mRec->getNKernelHostThreads(true); + // bool recreateMemoryAllocator = false; + mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) { + nnApplications[lane].init(nn_settings); + if (nnApplications[lane].modelsUsed[0]) { + SetONNXGPUStream(*(nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_class).setDeviceId(deviceId); + if (nnApplications[lane].model_class.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_class.setIntraOpNumThreads(maxThreads); + } + (nnApplications[lane].model_class).initEnvironment(); + // Registering this once seems to be enough, even with different environmnents / models. ONNX apparently uses this per device and stores the OrtAllocator internally. All models will then use the volatile allocation. + // But environment must be valid, so we init the model environment first and use it here afterwards. + // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. + // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + // recreateMemoryAllocator = true; + (nnApplications[lane].model_class).initSession(); } - if (nn_settings.nnClusterizerVerbosity < 0) { - clustererNN.nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity; - } else { - clustererNN.nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity; + if (nnApplications[lane].modelsUsed[1]) { + SetONNXGPUStream(*(nnApplications[lane].model_reg_1).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_reg_1).setDeviceId(deviceId); + if (nnApplications[lane].model_reg_1.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_reg_1.setIntraOpNumThreads(maxThreads); + } + // (nnApplications[lane].model_reg_1).setEnv((nnApplications[lane].model_class).getEnv()); + (nnApplications[lane].model_reg_1).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].model_reg_1).initSession(); + } + if (nnApplications[lane].modelsUsed[2]) { + SetONNXGPUStream(*(nnApplications[lane].model_reg_2).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].model_reg_2).setDeviceId(deviceId); + if (nnApplications[lane].model_reg_2.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].model_reg_2.setIntraOpNumThreads(maxThreads); + } + (nnApplications[lane].model_reg_2).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].model_reg_2).initSession(); + } + if (nn_settings.nnClusterizerVerbosity < 3) { + LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId; + } + }); + mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t sector) { + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector]; + GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN; + int32_t lane = sector % numLanes; + clustererNN.deviceId = deviceId; + clustererNN.mISector = sector; + clustererNN.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + nnApplications[lane].initClusterizer(nn_settings, clustererNN); + if (doGPU) { + clustererNNShadow.deviceId = deviceId; + clustererNNShadow.mISector = sector; + clustererNNShadow.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow); } - clustererNN.nnClusterizerDtype = nn_settings.nnInferenceDtype.find("32") != std::string::npos; - GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); AllocateRegisteredMemory(clustererNN.mMemoryId); + }); + if (doGPU) { + WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } + LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes"; + LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes"; } #endif - if (doGPU && mIOPtrs.tpcZS) { - processorsShadow()->ioPtrs.tpcZS = mInputsShadow->mPzsMeta; - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->ioPtrs - (char*)processors(), &processorsShadow()->ioPtrs, sizeof(processorsShadow()->ioPtrs), mRec->NStreams() - 1); - } - if (doGPU) { - WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)processors()->tpcClusterer - (char*)processors(), processorsShadow()->tpcClusterer, sizeof(GPUTPCClusterFinder) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); - } - size_t nClsTotal = 0; ClusterNativeAccess* tmpNativeAccess = mClusterNativeAccess.get(); ClusterNative* tmpNativeClusters = nullptr; @@ -914,58 +954,122 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (GetProcessingSettings().nn.applyNNclusterizer) { #ifdef GPUCA_HAS_ONNX - GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector]; - const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn; - GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN); + GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[lane]; + GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + + int withMC = (doGPU && propagateMCLabels); - if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } - float time_clusterizer = 0, time_fill = 0; - for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNN.nnClusterizerBatchedMode); batch++) { - uint batchStart = batch * clustererNN.nnClusterizerBatchedMode; - size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + float time_clusterizer = 0, time_fill = 0, time_networks = 0; + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); auto start0 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data + runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data - auto stop0 = std::chrono::high_resolution_clock::now(); - auto start1 = std::chrono::high_resolution_clock::now(); - nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype); + // auto stop0 = std::chrono::high_resolution_clock::now(); + // auto start1 = std::chrono::high_resolution_clock::now(); + + // NN evaluations + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32); + } + } + if (!clustererNNShadow.nnClusterizerUseCfRegression) { + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_32); + } + } + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { + if (clustererNNShadow.nnInferenceInputDType == 0) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_32); + } + } else if (clustererNNShadow.nnInferenceInputDType == 1) { + if (clustererNNShadow.nnInferenceOutputDType == 0) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_16); + } else if (clustererNNShadow.nnInferenceOutputDType == 1) { + (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_32); + } + } + } + } + + // auto stopNNs = std::chrono::high_resolution_clock::now(); + + // Publishing kernels if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels } - - if (!clustererNN.nnClusterizerUseCfRegression) { - nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1 - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) { - nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype); - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2 + if (!clustererNNShadow.nnClusterizerUseCfRegression) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results + if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results } } - auto stop1 = std::chrono::high_resolution_clock::now(); - time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; - time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; + // for(int i = 0; i < iSize; ++i) { + // if(clustererNNShadow.outputDataClass[i + batchStart] > 1) { + // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.modelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.outputDataClass[i + batchStart] << " thresh " << clustererNNShadow.nnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; + // // std::string input = "["; + // // for(int j = 0; j < clustererNNShadow.nnClusterizerElementSize; j++){ + // // input += std::to_string(clustererNNShadow.inputData_16[i * clustererNNShadow.nnClusterizerElementSize + j].ToFloat()) + ", "; + // // } + // // input += "]"; + // // LOG(info) << "Input is: " << input; + // } + // } + + // auto stop1 = std::chrono::high_resolution_clock::now(); + + // time_networks += std::chrono::duration_cast(stopNNs - start1).count() / 1e9; + // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; + // time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; } - auto start1 = std::chrono::high_resolution_clock::now(); - if (clustererNN.nnClusterizerUseCfRegression) { - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 - } - auto stop1 = std::chrono::high_resolution_clock::now(); - time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; - if (clustererNN.nnClusterizerVerbosity < 3) { - int acceptedClusters = 0; - for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { - acceptedClusters += clustererNN.outputDataClass[i]; - } - LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + if (clustererNNShadow.nnClusterizerUseCfRegression) { + // auto start1 = std::chrono::high_resolution_clock::now(); + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + // auto stop1 = std::chrono::high_resolution_clock::now(); + // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; } + // if (clustererNNShadow.nnClusterizerVerbosity < 3) { + // int acceptedClusters = 0; + // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { + // if(clustererNNShadow.outputDataClass[i] > 1 || clustererNNShadow.outputDataClass[i] < 0) { + // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.outputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; + // } + // acceptedClusters += clustererNNShadow.outputDataClass[i]; + // } + // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; + // } #else GPUFatal("Project not compiled with neural network clusterization. Aborting."); #endif @@ -1066,6 +1170,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { + // if (GetProcessingSettings().nn.applyNNclusterizer) { + // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; + // nnApplication.model_class.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.model_reg_1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.model_reg_2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index 6a9b6f546ae07..fe3202fe7b439 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -24,29 +24,73 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} void* GPUTPCNNClusterizer::setIOPointers(void* mem) { - if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize); - } else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize); - } - computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode); - computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); - computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode); - computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); - } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (nnClusterizerBatchedMode > 0) { + if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData_16, nnClusterizerBatchedMode * nnClusterizerElementSize); + } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, inputData_32, nnClusterizerBatchedMode * nnClusterizerElementSize); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + + if (nnInferenceOutputDType == 0 && nnClusterizerElementSize > 0) { + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities_16, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1_16, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2_16, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } + } else if (nnInferenceOutputDType == 1 && nnClusterizerElementSize > 0) { + if (nnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, modelProbabilities_32, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } + if (!nnClusterizerUseCfRegression) { + if (nnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg1_32, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + } + if (nnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, outputDataReg2_32, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + } + } } } + if (nnClusterizerTotalClusters > 0) { + computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + } return mem; } +// std::vector GPUTPCNNClusterizer::pointerSizes() { +// std::vector sizes(7, -1); +// if (nnClusterizerBatchedMode > 0) { +// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { +// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16 +// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { +// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32 +// } +// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags +// if (nnClusterizerModelClassNumOutputNodes > 0) { +// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities +// } +// if (!nnClusterizerUseCfRegression) { +// if (nnClusterizerModelReg1NumOutputNodes > 0) { +// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1 +// } +// if (nnClusterizerModelReg2NumOutputNodes > 0) { +// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2 +// } +// } +// } +// if (nnClusterizerTotalClusters > 0) { +// sizes[6] = nnClusterizerTotalClusters; // outputDataClass +// } +// return sizes; +// } + void GPUTPCNNClusterizer::RegisterMemoryAllocation() { AllocateAndInitializeLate(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index ea6340dfd48bc..da490b0f94d58 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -42,33 +42,38 @@ class GPUTPCNNClusterizer : public GPUProcessor int nnClusterizerSizeInputTime = 3; int nnClusterizerElementSize = -1; bool nnClusterizerAddIndexData = true; - float nnClassThreshold = 0.16; + float nnClassThreshold = 0.01; bool nnSigmoidTrafoClassThreshold = 1; int nnClusterizerUseCfRegression = 0; int nnClusterizerBatchedMode = 1; int nnClusterizerTotalClusters = 1; int nnClusterizerVerbosity = 0; int nnClusterizerBoundaryFillValue = -1; - int nnClusterizerDumpDigits = 0; - int nnClusterizerApplyCfDeconvolution = 0; int nnClusterizerModelClassNumOutputNodes = -1; int nnClusterizerModelReg1NumOutputNodes = -1; int nnClusterizerModelReg2NumOutputNodes = -1; - int nnClusterizerDtype = 0; // 0: float16, 1: float32 + int nnInferenceInputDType = 0; // 0: float16, 1: float32 + int nnInferenceOutputDType = 0; // 0: float16, 1: float32 int mISector = -1; + int deviceId = -1; // Memory allocation for neural network - uint class2_elements = 0; - float* inputData32 = nullptr; - OrtDataType::Float16_t* inputData16 = nullptr; - float* outputDataClass = nullptr; - float* modelProbabilities = nullptr; - float* outputDataReg1 = nullptr; - float* outputDataReg2 = nullptr; - ChargePos* peakPositions = nullptr; - bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx - float* centralCharges = nullptr; + bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr + int* outputDataClass = nullptr; + + // FP32 + float* inputData_32 = nullptr; + float* modelProbabilities_32 = nullptr; + float* outputDataReg1_32 = nullptr; + float* outputDataReg2_32 = nullptr; + + // FP16 + OrtDataType::Float16_t* inputData_16 = nullptr; + OrtDataType::Float16_t* modelProbabilities_16 = nullptr; + OrtDataType::Float16_t* outputDataReg1_16 = nullptr; + OrtDataType::Float16_t* outputDataReg2_16 = nullptr; + int16_t mMemoryId = -1; }; // class GPUTPCNNClusterizer diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index 5002c63524020..db2f05711f537 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -12,54 +12,205 @@ /// \file GPUTPCNNClusterizerHost.cxx /// \author Christian Sonnabend +#include + #include "GPUTPCNNClusterizerHost.h" #include "GPUTPCNNClusterizer.h" #include "GPUSettings.h" #include "ML/3rdparty/GPUORTFloat16.h" +#include "GPUReconstruction.h" + +#ifdef GPUCA_HAS_ONNX +#include +#endif using namespace o2::gpu; -GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clusterer) +void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings) { + std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath; + std::vector reg_model_paths; + std::vector evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':'); + + if (settings.nnLoadFromCCDB) { + reg_model_path = settings.nnLocalFolder + "/net_regression_c1.onnx"; // Needs to be set identical to NeuralNetworkClusterizer.cxx, otherwise the networks might be loaded from the wrong place + if (evalMode[0] == "c1") { + class_model_path = settings.nnLocalFolder + "/net_classification_c1.onnx"; + } else if (evalMode[0] == "c2") { + class_model_path = settings.nnLocalFolder + "/net_classification_c2.onnx"; + } + + if (evalMode[1] == "r2") { + reg_model_path += ":" + settings.nnLocalFolder + "/net_regression_c2.onnx"; + } + } + OrtOptions = { - {"model-path", settings.nnClassificationPath}, - {"device", settings.nnInferenceDevice}, - {"device-id", std::to_string(settings.nnInferenceDeviceId)}, + {"model-path", class_model_path}, + {"device-type", settings.nnInferenceDevice}, {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, - {"dtype", settings.nnInferenceDtype}, {"intra-op-num-threads", std::to_string(settings.nnInferenceIntraOpNumThreads)}, {"inter-op-num-threads", std::to_string(settings.nnInferenceInterOpNumThreads)}, {"enable-optimizations", std::to_string(settings.nnInferenceEnableOrtOptimization)}, {"enable-profiling", std::to_string(settings.nnInferenceOrtProfiling)}, {"profiling-output-path", settings.nnInferenceOrtProfilingPath}, - {"logging-level", std::to_string(settings.nnInferenceVerbosity)}}; + {"logging-level", std::to_string(settings.nnInferenceVerbosity)}, + {"onnx-environment-name", "c1"}}; - model_class.init(OrtOptions); - clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + model_class.initOptions(OrtOptions); + modelsUsed[0] = true; - reg_model_paths = splitString(settings.nnRegressionPath, ":"); + reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { - if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) { + if (reg_model_paths.size() == 1) { OrtOptions["model-path"] = reg_model_paths[0]; - model_reg_1.init(OrtOptions); - clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r1"; + model_reg_1.initOptions(OrtOptions); + modelsUsed[1] = true; } else { OrtOptions["model-path"] = reg_model_paths[0]; - model_reg_1.init(OrtOptions); - clusterer.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r1"; + model_reg_1.initOptions(OrtOptions); + modelsUsed[1] = true; OrtOptions["model-path"] = reg_model_paths[1]; - model_reg_2.init(OrtOptions); - clusterer.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + OrtOptions["onnx-environment-name"] = "r2"; + model_reg_2.initOptions(OrtOptions); + modelsUsed[2] = true; } } } -void GPUTPCNNClusterizerHost::networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype) +void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN) { - if (dtype == 0) { - model.inference(clusterer.inputData16, size * clusterer.nnClusterizerElementSize, output); + clustererNN.nnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; + clustererNN.nnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; + clustererNN.nnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; + clustererNN.nnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; + clustererNN.nnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; + clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; + clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; + clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.nnSigmoidTrafoClassThreshold) { + clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); + } else { + clustererNN.nnClassThreshold = settings.nnClassThreshold; + } + if (settings.nnClusterizerVerbosity < 0) { + clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity; } else { - model.inference(clusterer.inputData32, size * clusterer.nnClusterizerElementSize, output); + clustererNN.nnClusterizerVerbosity = settings.nnClusterizerVerbosity; + } + clustererNN.nnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; + clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; + clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + if (!settings.nnClusterizerUseCfRegression) { + if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) { + clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + } else { + clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + clustererNN.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + } + } +} + +// MockedOrtAllocator implementation to be able to use volatile assignment +struct MockedOrtAllocator : OrtAllocator { + MockedOrtAllocator(GPUReconstruction* = nullptr, OrtMemoryInfo* = nullptr); + ~MockedOrtAllocator(); + + void* Alloc(size_t size); + void Free(void* p); + const OrtMemoryInfo* Info() const; + void* Reserve(size_t size); + size_t NumAllocations() const; + size_t NumReserveAllocations() const; + + void LeakCheck(); + + private: + MockedOrtAllocator(const MockedOrtAllocator&) = delete; + MockedOrtAllocator& operator=(const MockedOrtAllocator&) = delete; + + std::atomic memory_inuse{0}; + std::atomic num_allocations{0}; + std::atomic num_reserve_allocations{0}; + OrtMemoryInfo* memory_info; + GPUReconstruction* rec; +}; + +MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) +{ + OrtAllocator::version = ORT_API_VERSION; + OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Alloc(size); }; + OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; + OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; + OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; + rec = r; + memory_info = info; +} + +MockedOrtAllocator::~MockedOrtAllocator() +{ + // Ort::GetApi().ReleaseMemoryInfo(memory_info); +} + +void* MockedOrtAllocator::Alloc(size_t size) +{ + // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes"; + return rec->AllocateVolatileDeviceMemory(size); +} + +void* MockedOrtAllocator::Reserve(size_t size) +{ + // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes"; + return rec->AllocateVolatileDeviceMemory(size); +} + +void MockedOrtAllocator::Free(void* p) +{ + // LOG(info) << "(ORT) Freeing volatile memory " << p; + rec->ReturnVolatileDeviceMemory(); +} + +const OrtMemoryInfo* MockedOrtAllocator::Info() const +{ + return memory_info; +} + +size_t MockedOrtAllocator::NumAllocations() const +{ + return num_allocations.load(); +} + +size_t MockedOrtAllocator::NumReserveAllocations() const +{ + return num_reserve_allocations.load(); +} + +void MockedOrtAllocator::LeakCheck() +{ + if (memory_inuse.load()) + LOG(warning) << "memory leak!!!"; +} + +void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) +{ + mockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); + if (recreate) { + Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo))); } + Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get())); + memInfo = (Ort::MemoryInfo*)mockedAlloc->Info(); +} + +const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo() +{ + return mockedAlloc->Info(); +} + +MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator() +{ + return mockedAlloc.get(); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index 7efa0edecb893..0379b83d0ae02 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -22,6 +22,15 @@ using namespace o2::ml; +class OrtMemoryInfo; +class OrtAllocator; +struct MockedOrtAllocator; +namespace Ort +{ +struct Env; +struct MemoryInfo; +} // namespace Ort + namespace o2::OrtDataType { struct Float16_t; @@ -30,6 +39,7 @@ struct Float16_t; namespace o2::gpu { +class GPUReconstruction; class GPUTPCNNClusterizer; struct GPUSettingsProcessingNNclusterizer; @@ -37,30 +47,23 @@ class GPUTPCNNClusterizerHost { public: GPUTPCNNClusterizerHost() = default; - GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); + GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings) { init(settings); } + + void init(const GPUSettingsProcessingNNclusterizer&); + void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); - void networkInference(o2::ml::OrtModel model, GPUTPCNNClusterizer& clusterer, size_t size, float* output, int32_t dtype); + // ONNX + void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); + MockedOrtAllocator* getMockedAllocator(); + const OrtMemoryInfo* getMockedMemoryInfo(); std::unordered_map OrtOptions; o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters + std::vector modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 + int32_t deviceId = -1; std::vector reg_model_paths; - private: - // Avoid including CommonUtils/StringUtils.h - std::vector splitString(const std::string& input, const std::string& delimiter) - { - std::vector tokens; - std::size_t pos = 0; - std::size_t found; - - while ((found = input.find(delimiter, pos)) != std::string::npos) { - tokens.push_back(input.substr(pos, found - pos)); - pos = found + delimiter.length(); - } - tokens.push_back(input.substr(pos)); - - return tokens; - } + std::shared_ptr mockedAlloc = nullptr; }; // class GPUTPCNNClusterizerHost } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 379ea27443fea..2cf9ab2037007 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -35,7 +35,7 @@ using namespace o2::gpu::tpccf; // Defining individual thread functions for data filling, determining the class label and running the CF clusterizer template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) { uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; @@ -45,109 +45,26 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem_new, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow); } template <> GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - GPUTPCNNClusterizerKernels::fillInputData(nBlocks, nThreads, iBlock, iThread, processors, sector, dtype, batchStart); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - auto& clusterer = processors.tpcNNClusterer[sector]; - uint glo_idx = get_global_id(0); - uint elem_iterator = glo_idx * clusterer.nnClusterizerModelClassNumOutputNodes; - float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] - uint class_label = 0; - for (int pIdx = elem_iterator; pIdx < elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes; pIdx++) { - if (pIdx == elem_iterator) { - current_max_prob = clusterer.modelProbabilities[pIdx]; - } else { - class_label = (clusterer.modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label); - } - } - // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clusterer.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" - clusterer.outputDataClass[glo_idx + batchStart] = class_label; -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { - return; - } - GPUTPCNNClusterizerKernels::publishClustersReg1(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); -} - -template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) -{ - uint glo_idx = get_global_id(0); - if (glo_idx >= processors.tpcClusterer[sector].mPmemory->counters.nClusters) { - return; - } - GPUTPCNNClusterizerKernels::publishClustersReg2(glo_idx, smem, processors, sector, dtype, onlyMC, batchStart); -} - -// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary -GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current) -{ - return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2); -} - -GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) -{ - return (row > 62 ? global_shift : 0); -} - -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) -{ - if (pad < 0 || row < 0) { // Faster short-circuit - return true; - } else if (row < 63) { - return (pad >= static_cast(GPUTPCGeometry::NPads(row))); - } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network - return true; - } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { - return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); - } else { - return true; - } -} - -// Filling the input data for the neural network where there is no boundary -GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& processors, uint8_t sector, int8_t dtype, uint batchStart) { uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - uint write_idx = glo_idx * clustererNN.nnClusterizerElementSize; // Potential optimization: Either choose nnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - - clustererNN.peakPositions[glo_idx] = peak; - clustererNN.centralCharges[glo_idx] = central_charge; - clustererNN.outputDataClass[glo_idx + batchStart] = -1; - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif @@ -160,20 +77,20 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n if (!is_boundary) { ChargePos tmp_pos(row + r, pad + p, time + t); if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization - clustererNN.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); - } else { - clustererNN.inputData32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else if (dtype == 1) { + clustererNN.inputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { // Filling boundary just to make sure that no values are left unintentionally if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.inputData_32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); } } write_idx++; @@ -182,66 +99,191 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n } if (clustererNN.nnClusterizerAddIndexData) { if (dtype == 0) { - clustererNN.inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISector / 36.f); - clustererNN.inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.inputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + } else { + clustererNN.inputData_32[write_idx] = sector / 36.f; + clustererNN.inputData_32[write_idx + 1] = row / 152.f; + clustererNN.inputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + } + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + auto& clusterer = processors.tpcClusterer[sector]; + auto& clustererNN = processors.tpcNNClusterer[sector]; + uint base_idx = CAMath::Floor(glo_idx / clustererNN.nnClusterizerElementSize); + uint transient_index = glo_idx % clustererNN.nnClusterizerElementSize; + + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + Array2D isPeakMap(clusterer.mPpeakMap); + ChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; + int row = static_cast(peak.row()), pad = static_cast(peak.pad()); + + if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { + uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; + for (uint16_t i = 0; i < 8; i++) { + Delta2 d = cfconsts::InnerNeighbors[i]; + ChargePos tmp_pos = peak.delta(d); + clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + } + if (dtype == 0) { + clustererNN.inputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.inputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.inputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData32[write_idx] = clusterer.mISector / 36.f; - clustererNN.inputData32[write_idx + 1] = row / 152.f; - clustererNN.inputData32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.inputData_32[top_idx - 3] = sector / 36.f; + clustererNN.inputData_32[top_idx - 2] = row / 152.f; + clustererNN.inputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); + } + } else if (transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + int time = static_cast(peak.time()); + int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; + bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); + if (is_row_boundary) { + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } else { + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); + int rest_1 = transient_index % ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1)); + int p = CAMath::Floor(rest_1 / (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputPad + pad_offset; + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); + + if (!is_boundary) { + float central_charge = static_cast(chargeMap[peak].unpack()); + int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; + ChargePos tmp_pos(row + r, pad + p, time + t); + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + } else if (dtype == 1) { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + } + } else { + if (dtype == 0) { + clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + } else { + clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + } + } } } } -GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); + if (dtype == 0) { + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].modelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].nnClassThreshold); + } else if (dtype == 1) { + processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) { + auto& clustererNN = processors.tpcNNClusterer[sector]; + uint glo_idx = get_global_id(0); + uint elem_iterator = glo_idx * clustererNN.nnClusterizerModelClassNumOutputNodes; + float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] + uint class_label = 0; + for (int pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes; pIdx++) { + if (pIdx == elem_iterator) { + if (dtype == 0) { + current_max_prob = static_cast(clustererNN.modelProbabilities_16[pIdx]); + } else if (dtype == 1) { + current_max_prob = clustererNN.modelProbabilities_32[pIdx]; + } + } else { + if (dtype == 0) { + current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_16[pIdx].ToFloat()); + } else if (dtype == 1) { + current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_32[pIdx]); + } + } + } + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clustererNN.outputDataClass[glo_idx + batchStart] = class_label; + if (class_label > 1) { + clustererNN.clusterFlags[2 * glo_idx] = 1; + clustererNN.clusterFlags[2 * glo_idx + 1] = 1; + } +} + +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +{ + uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + float central_charge = static_cast(chargeMap[peak].unpack()); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; uint full_glo_idx = glo_idx + batchStart; int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg1NumOutputNodes; // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); - if (clustererNN.outputDataClass[full_glo_idx] == 1) { + if (clustererNN.outputDataClass[full_glo_idx] == 1 || (clustererNN.nnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.outputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; // Publishing logic is taken from default clusterizer - if (onlyMC) { + if (withMC) { ClusterAccumulator dummy_pc; - CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + CPU_ONLY(labelAcc->collect(peak, central_charge)); GPUTPCCFClusterizer::buildCluster( clusterer.Param().rec, chargeMap, - clustererNN.peakPositions[glo_idx], + peak, smem.posBcast, smem.buf, smem.innerAboveThreshold, &dummy_pc, labelAcc); } - - if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; } return; } - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg1[model_output_index + 4], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg1[model_output_index], - clustererNN.outputDataReg1[model_output_index + 2], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg1[model_output_index + 1], - clustererNN.outputDataReg1[model_output_index + 3], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg1_16[model_output_index + 4].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg1_16[model_output_index].ToFloat(), + clustererNN.outputDataReg1_16[model_output_index + 2].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_16[model_output_index + 1].ToFloat(), + clustererNN.outputDataReg1_16[model_output_index + 3].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg1_32[model_output_index + 4], + static_cast(peak.pad()) + clustererNN.outputDataReg1_32[model_output_index], + clustererNN.outputDataReg1_32[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_32[model_output_index + 1], + clustererNN.outputDataReg1_32[model_output_index + 3], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -250,11 +292,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } uint rowIndex = 0; - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -264,7 +306,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); } else { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -273,38 +315,41 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha } } -GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +template <> +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) { + uint glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + float central_charge = static_cast(chargeMap[peak].unpack()); + CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); - tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; + tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; uint full_glo_idx = glo_idx + batchStart; int model_output_index = glo_idx * clustererNN.nnClusterizerModelReg2NumOutputNodes; - // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.nnClusterizerModelReg2NumOutputNodes << " -- " << clustererNN.peakPositions.size() << " -- " << clustererNN.centralCharges.size(); - if (clustererNN.outputDataClass[full_glo_idx] > 0) { ClusterAccumulator pc; - if (onlyMC) { + if (withMC) { ClusterAccumulator dummy_pc; - CPU_ONLY(labelAcc->collect(clustererNN.peakPositions[glo_idx], chargeMap[clustererNN.peakPositions[glo_idx]].unpack())); + CPU_ONLY(labelAcc->collect(peak, central_charge)); GPUTPCCFClusterizer::buildCluster( clusterer.Param().rec, chargeMap, - clustererNN.peakPositions[glo_idx], + peak, smem.posBcast, smem.buf, smem.innerAboveThreshold, &dummy_pc, labelAcc); } - - if ((clusterer.mPmemory->fragment).isOverlap(clustererNN.peakPositions[glo_idx].time())) { + if ((clusterer.mPmemory->fragment).isOverlap(peak.time())) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; } @@ -312,16 +357,26 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } // Cluster 1 - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 8], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index], - clustererNN.outputDataReg2[model_output_index + 4], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 2], - clustererNN.outputDataReg2[model_output_index + 6], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg2_16[model_output_index + 8].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 4].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 2].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 6].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 8], + static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index], + clustererNN.outputDataReg2_32[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 2], + clustererNN.outputDataReg2_32[model_output_index + 6], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } tpc::ClusterNative myCluster; - bool rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -330,11 +385,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } uint rowIndex = 0; - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -344,18 +399,28 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); + CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); // Cluster 2 - pc.setFull(clustererNN.centralCharges[glo_idx] * clustererNN.outputDataReg2[model_output_index + 9], - static_cast(clustererNN.peakPositions[glo_idx].pad()) + clustererNN.outputDataReg2[model_output_index + 1], - clustererNN.outputDataReg2[model_output_index + 5], - (clusterer.mPmemory->fragment).start + static_cast(clustererNN.peakPositions[glo_idx].time()) + clustererNN.outputDataReg2[model_output_index + 3], - clustererNN.outputDataReg2[model_output_index + 7], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); - - rejectCluster = !pc.toNative(clustererNN.peakPositions[glo_idx], clustererNN.centralCharges[glo_idx], myCluster, clusterer.Param(), chargeMap); + if (dtype == 0) { + pc.setFull(central_charge * clustererNN.outputDataReg2_16[model_output_index + 9].ToFloat(), + static_cast(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index + 1].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 5].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 3].ToFloat(), + clustererNN.outputDataReg2_16[model_output_index + 7].ToFloat(), + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } else if (dtype == 1) { + pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 9], + static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index + 1], + clustererNN.outputDataReg2_32[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 3], + clustererNN.outputDataReg2_32[model_output_index + 7], + clustererNN.clusterFlags[2 * glo_idx], + clustererNN.clusterFlags[2 * glo_idx + 1]); + } + + rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); if (rejectCluster) { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -363,11 +428,11 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha return; } - if (clusterer.mPclusterByRow != nullptr) { + if (clusterOut != nullptr) { rowIndex = GPUTPCCFClusterizer::sortIntoBuckets( clusterer, myCluster, - clustererNN.peakPositions[glo_idx].row(), + peak.row(), clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut); @@ -377,7 +442,7 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha } else if (clusterer.mPclusterPosInRow) { rowIndex = clusterer.mPclusterPosInRow[full_glo_idx]; } - // CPU_ONLY(labelAcc->commit(clustererNN.peakPositions[glo_idx].row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? + // CPU_ONLY(labelAcc->commit(peak.row(), rowIndex, clusterer.mNMaxClusterPerRow)); // -> Is this needed? How to handle MC labels for split clusters? } else { if (clusterer.mPclusterPosInRow) { clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow; @@ -385,3 +450,29 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha return; } } + +// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary +GPUd() int GPUTPCNNClusterizerKernels::padOffset(int row_ref, int row_current) +{ + return (int)((GPUTPCGeometry::NPads(row_current) - GPUTPCGeometry::NPads(row_ref)) / 2); +} + +GPUd() int GPUTPCNNClusterizerKernels::rowOffset(int row, int global_shift) +{ + return (row > 62 ? global_shift : 0); +} + +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) +{ + if (pad < 0 || row < 0) { // Faster short-circuit + return true; + } else if (row < 63) { + return (pad >= static_cast(GPUTPCGeometry::NPads(row))); + } else if (row < (63 + global_shift)) { // to account for the gap between IROC and OROC. Charge will be set to -1 in order to signal boundary to the neural network + return true; + } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + global_shift)) { + return (pad >= static_cast(GPUTPCGeometry::NPads(row - global_shift))); + } else { + return true; + } +} diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index a1d641fdb0b93..27cfba2487aed 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -39,6 +39,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate public: // Must all have same number of threads, since they use a common SCRATCH_PAD_WORK_GROUP_SIZE below static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); + static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNNSingleElement) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer)); @@ -59,10 +60,11 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate enum K : int32_t { runCfClusterizer = 0, fillInputNN = 1, - determineClass1Labels = 2, - determineClass2Labels = 3, - publishClass1Regression = 4, - publishClass2Regression = 5, + fillInputNNSingleElement = 2, + determineClass1Labels = 3, + determineClass2Labels = 4, + publishClass1Regression = 5, + publishClass2Regression = 6, }; template diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index fcf576d828b7f..7e3ddf868af2a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -116,6 +116,7 @@ o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUS if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) diff --git a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h index 0038233f1c376..73f1f208e8889 100644 --- a/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h +++ b/GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h @@ -83,6 +83,7 @@ class GPUO2Interface; struct TPCPadGainCalib; struct TPCZSLinkMapping; struct GPUSettingsO2; +struct GPUSettingsProcessingNNclusterizer; class GPUO2InterfaceQA; struct GPUTrackingInOutPointers; struct GPUTrackingInOutZS; @@ -225,6 +226,8 @@ class GPURecoWorkflowSpec : public o2::framework::Task uint32_t mNextThreadIndex = 0; bool mUpdateGainMapCCDB = true; std::unique_ptr mTFSettings; + std::unique_ptr mNNClusterizerSettings; + Config mSpecConfig; std::shared_ptr mGGR; bool mGRPGeomUpdated = false; diff --git a/GPU/Workflow/src/GPUWorkflowSpec.cxx b/GPU/Workflow/src/GPUWorkflowSpec.cxx index 7ad03ec58ae80..8a755a703705f 100644 --- a/GPU/Workflow/src/GPUWorkflowSpec.cxx +++ b/GPU/Workflow/src/GPUWorkflowSpec.cxx @@ -78,6 +78,7 @@ #include "DetectorsRaw/RDHUtils.h" #include "ITStracking/TrackingInterface.h" #include "GPUWorkflowInternal.h" +#include "TPCCalibration/NeuralNetworkClusterizer.h" // #include "Framework/ThreadPool.h" #include @@ -132,6 +133,50 @@ void GPURecoWorkflowSpec::init(InitContext& ic) { GRPGeomHelper::instance().setRequest(mGGR); GPUO2InterfaceConfiguration& config = *mConfig.get(); + GPUSettingsProcessingNNclusterizer& mNNClusterizerSettings = mConfig->configProcessing.nn; + + if (mNNClusterizerSettings.nnLoadFromCCDB) { + LOG(info) << "Loading neural networks from CCDB"; + o2::tpc::NeuralNetworkClusterizer nnClusterizerFetcher; + nnClusterizerFetcher.initCcdbApi(mNNClusterizerSettings.nnCCDBURL); + std::map ccdbSettings = { + {"nnCCDBURL", mNNClusterizerSettings.nnCCDBURL}, + {"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath}, + {"inputDType", mNNClusterizerSettings.nnInferenceInputDType}, + {"outputDType", mNNClusterizerSettings.nnInferenceOutputDType}, + {"outputFolder", mNNClusterizerSettings.nnLocalFolder}, + {"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath}, + {"nnCCDBWithMomentum", std::to_string(mNNClusterizerSettings.nnCCDBWithMomentum)}, + {"nnCCDBBeamType", mNNClusterizerSettings.nnCCDBBeamType}, + {"nnCCDBInteractionRate", std::to_string(mNNClusterizerSettings.nnCCDBInteractionRate)}}; + + std::string nnFetchFolder = mNNClusterizerSettings.nnLocalFolder; + std::vector evalMode = o2::utils::Str::tokenize(mNNClusterizerSettings.nnEvalMode, ':'); + + if (evalMode[0] == "c1") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType; + ccdbSettings["nnCCDBEvalType"] = "classification_c1"; + ccdbSettings["outputFile"] = "net_classification_c1.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } else if (evalMode[0] == "c2") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType; + ccdbSettings["nnCCDBEvalType"] = "classification_c2"; + ccdbSettings["outputFile"] = "net_classification_c2.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } + + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType; + ccdbSettings["nnCCDBEvalType"] = "regression_c1"; + ccdbSettings["outputFile"] = "net_regression_c1.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + if (evalMode[1] == "r2") { + ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType; + ccdbSettings["nnCCDBEvalType"] = "regression_c2"; + ccdbSettings["outputFile"] = "net_regression_c2.onnx"; + nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings); + } + LOG(info) << "Neural network loading done!"; + } // Create configuration object and fill settings mConfig->configGRP.solenoidBzNominalGPU = 0; From 760831f9229c18525c9442ae053b8b348afb2cf3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sat, 19 Apr 2025 15:24:00 +0200 Subject: [PATCH 0421/1914] GPU: Simplify kernel Call interface, remove intermediate classes and headers where possible --- .../workflow/src/TRDGlobalTrackingSpec.cxx | 2 + GPU/GPUTracking/Base/GPUProcessor.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstruction.cxx | 257 ++++++++++-------- GPU/GPUTracking/Base/GPUReconstruction.h | 107 +++----- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 49 ++-- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 111 +------- .../Base/GPUReconstructionCPUKernels.h | 98 +++++++ .../Base/GPUReconstructionConvert.cxx | 2 +- .../Base/GPUReconstructionDeviceBase.cxx | 21 +- .../Base/GPUReconstructionDeviceBase.h | 8 +- GPU/GPUTracking/Base/GPUReconstructionIO.h | 40 ++- .../Base/GPUReconstructionKernelMacros.h | 2 +- .../Base/GPUReconstructionKernels.h | 115 -------- .../Base/GPUReconstructionLibrary.cxx | 1 + .../Base/GPUReconstructionProcessing.cxx | 19 +- .../Base/GPUReconstructionProcessing.h | 98 ++++++- .../GPUReconstructionProcessingKernels.inc | 41 +++ .../Base/GPUReconstructionTimeframe.cxx | 2 + .../Base/cuda/GPUReconstructionCUDA.cu | 101 ++++--- .../Base/cuda/GPUReconstructionCUDA.h | 24 +- .../Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 62 ++--- .../cuda/GPUReconstructionCUDAInternals.h | 8 +- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 14 +- .../Base/opencl/GPUReconstructionOCL.cxx | 109 ++++---- .../Base/opencl/GPUReconstructionOCL.h | 13 +- .../opencl/GPUReconstructionOCLIncludesHost.h | 6 +- .../opencl/GPUReconstructionOCLKernels.cxx | 19 +- GPU/GPUTracking/CMakeLists.txt | 8 +- .../DataCompression/GPUTPCCompression.cxx | 3 +- .../DataTypes/GPUKernelClassesFwd.h | 40 +++ GPU/GPUTracking/Global/GPUChain.h | 46 +++- GPU/GPUTracking/Global/GPUChainITS.cxx | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 4 +- GPU/GPUTracking/Global/GPUChainTracking.h | 41 +-- .../Global/GPUChainTrackingClusterizer.cxx | 37 ++- .../Global/GPUChainTrackingCompression.cxx | 3 + .../GPUChainTrackingDebugAndProfiling.cxx | 6 +- GPU/GPUTracking/Global/GPUChainTrackingDefs.h | 2 +- .../Global/GPUChainTrackingGetters.inc | 36 +++ GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 1 + .../Global/GPUChainTrackingMerger.cxx | 5 + .../Global/GPUChainTrackingRefit.cxx | 4 + .../Global/GPUChainTrackingSectorTracker.cxx | 10 + .../Global/GPUChainTrackingTRD.cxx | 10 +- .../Global/GPUChainTrackingTransformation.cxx | 4 + .../Global/GPUTrackingInputProvider.cxx | 3 + GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 + GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 8 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 1 + GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../SectorTracker/GPUTPCTrackerDump.cxx | 1 + .../Standalone/Benchmark/standalone.cxx | 25 +- GPU/GPUTracking/Standalone/tools/createGeo.C | 1 + GPU/GPUTracking/Standalone/tools/createLUT.C | 1 + .../{Array2D.h => CfArray2D.h} | 18 +- .../{ChargePos.h => CfChargePos.h} | 14 +- GPU/GPUTracking/TPCClusterFinder/CfUtils.h | 18 +- .../TPCClusterFinder/ClusterAccumulator.cxx | 4 +- .../TPCClusterFinder/ClusterAccumulator.h | 8 +- .../GPUTPCCFChargeMapFiller.cxx | 16 +- .../GPUTPCCFChargeMapFiller.h | 8 +- .../GPUTPCCFCheckPadBaseline.cxx | 14 +- .../GPUTPCCFCheckPadBaseline.h | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.cxx | 2 +- .../TPCClusterFinder/GPUTPCCFClusterizer.h | 12 +- .../TPCClusterFinder/GPUTPCCFClusterizer.inc | 16 +- .../TPCClusterFinder/GPUTPCCFDecodeZS.cxx | 14 +- .../GPUTPCCFDeconvolution.cxx | 14 +- .../TPCClusterFinder/GPUTPCCFDeconvolution.h | 6 +- .../GPUTPCCFNoiseSuppression.cxx | 32 +-- .../GPUTPCCFNoiseSuppression.h | 12 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.cxx | 20 +- .../TPCClusterFinder/GPUTPCCFPeakFinder.h | 10 +- .../GPUTPCCFStreamCompaction.cxx | 4 +- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 6 +- .../TPCClusterFinder/GPUTPCClusterFinder.h | 8 +- .../GPUTPCClusterFinderDump.cxx | 8 +- .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 1 + .../TPCClusterFinder/GPUTPCNNClusterizer.h | 2 +- .../GPUTPCNNClusterizerKernels.cxx | 28 +- .../GPUTPCNNClusterizerKernels.h | 4 +- .../TPCClusterFinder/MCLabelAccumulator.cxx | 2 +- .../TPCClusterFinder/MCLabelAccumulator.h | 6 +- GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx | 1 + GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx | 1 + GPU/GPUTracking/display/GPUDisplay.cxx | 3 +- GPU/GPUTracking/display/GPUDisplay.h | 16 +- .../display/backend/GPUDisplayBackend.cxx | 1 + .../backend/GPUDisplayBackendOpenGL.cxx | 2 + .../backend/GPUDisplayBackendVulkan.cxx | 2 + .../display/frontend/GPUDisplayFrontend.cxx | 3 + .../frontend/GPUDisplayFrontendWayland.cxx | 1 + .../display/frontend/GPUDisplayInfo.inc | 36 +++ .../display/frontend/GPUDisplayKeys.cxx | 1 + .../display/helpers/GPUDisplayAnimation.cxx | 2 + .../GPUDisplayBackendOpenGLMagneticField.cxx | 1 + .../display/helpers/GPUDisplayHelpers.cxx | 3 + .../helpers/GPUDisplayInterpolation.cxx | 1 + .../display/helpers/GPUDisplayLoader.cxx | 1 + .../display/helpers/GPUDisplayROOT.cxx | 3 + .../display/render/GPUDisplayDraw.cxx | 3 +- .../display/render/GPUDisplayImportEvent.cxx | 1 + GPU/GPUTracking/kernels.cmake | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 3 +- GPU/GPUTracking/qa/genEvents.cxx | 3 +- 105 files changed, 1136 insertions(+), 899 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h delete mode 100644 GPU/GPUTracking/Base/GPUReconstructionKernels.h create mode 100644 GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc create mode 100644 GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h create mode 100644 GPU/GPUTracking/Global/GPUChainTrackingGetters.inc rename GPU/GPUTracking/TPCClusterFinder/{Array2D.h => CfArray2D.h} (81%) rename GPU/GPUTracking/TPCClusterFinder/{ChargePos.h => CfChargePos.h} (80%) create mode 100644 GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc diff --git a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx index 424657ac19426..375fa732007cc 100644 --- a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx +++ b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx @@ -51,6 +51,8 @@ #include "GPUTRDTrackletWord.h" #include "GPUTRDInterfaces.h" #include "GPUTRDGeometry.h" +#include "GPUConstantMem.h" +#include "GPUTRDTrackerKernels.h" #ifdef ENABLE_UPGRADES #include "ITS3Reconstruction/IOUtils.h" diff --git a/GPU/GPUTracking/Base/GPUProcessor.cxx b/GPU/GPUTracking/Base/GPUProcessor.cxx index 8a18f71d535e3..82627fb00723c 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.cxx +++ b/GPU/GPUTracking/Base/GPUProcessor.cxx @@ -14,7 +14,7 @@ #include "GPUProcessor.h" #include "GPUReconstruction.h" -#include "GPUReconstructionDeviceBase.h" +#include "GPUSettings.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index acca74e57a80e..c79c743e96ce5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -30,7 +30,9 @@ #include "GPUROOTDumpCore.h" #include "GPUConfigDump.h" #include "GPUChainTracking.h" +#include "GPUConstantMem.h" #include "GPUCommonHelpers.h" +#include "GPUSettings.h" #include "GPUMemoryResource.h" #include "GPUChain.h" @@ -75,10 +77,10 @@ constexpr GPUReconstruction::GeometryType GPUReconstruction::geometryType; static ptrdiff_t ptrDiff(void* a, void* b) { return (char*)a - (char*)b; } -GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mDeviceBackendSettings(cfg) +GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHostConstantMem(new GPUConstantMem), mGRPSettings(new GPUSettingsGRP), mDeviceBackendSettings(new GPUSettingsDeviceBackend(cfg)), mProcessingSettings(new GPUSettingsProcessing) { if (cfg.master) { - if (cfg.master->mDeviceBackendSettings.deviceType != cfg.deviceType) { + if (cfg.master->GetDeviceBackendSettings().deviceType != cfg.deviceType) { throw std::invalid_argument("device type of master and slave GPUReconstruction does not match"); } if (cfg.master->mMaster) { @@ -87,7 +89,7 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos mMaster = cfg.master; cfg.master->mSlaves.emplace_back(this); } - param().SetDefaults(&mGRPSettings); + param().SetDefaults(mGRPSettings.get()); mMemoryScalers.reset(new GPUMemorySizeScalers); for (uint32_t i = 0; i < NSECTORS; i++) { processors()->tpcTrackers[i].SetSector(i); // TODO: Move to a better place @@ -148,7 +150,7 @@ int32_t GPUReconstruction::Init() if (InitDevice()) { return 1; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPoolEnd = (char*)mHostMemoryBase + mHostMemorySize; mDeviceMemoryPoolEnd = (char*)mDeviceMemoryBase + mDeviceMemorySize; } else { @@ -213,7 +215,7 @@ static uint32_t getDefaultNThreads() int32_t GPUReconstruction::InitPhaseBeforeDevice() { - if (mProcessingSettings.printSettings) { + if (GetProcessingSettings().printSettings) { if (mSlaves.size() || mMaster) { printf("\nConfig Dump %s\n", mMaster ? "Slave" : "Master"); } @@ -223,7 +225,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() break; } } - GPUConfigDump::dumpConfig(¶m().rec, &mProcessingSettings, chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, &mDeviceBackendSettings, &mRecoSteps); + GPUConfigDump::dumpConfig(¶m().rec, mProcessingSettings.get(), chTrk ? chTrk->GetQAConfig() : nullptr, chTrk ? chTrk->GetEventDisplayConfig() : nullptr, mDeviceBackendSettings.get(), &mRecoSteps); } mRecoSteps.stepsGPUMask &= mRecoSteps.steps; mRecoSteps.stepsGPUMask &= AvailableGPURecoSteps(); @@ -231,95 +233,95 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mRecoSteps.stepsGPUMask.set((uint8_t)0); } - if (mProcessingSettings.forceMemoryPoolSize >= 1024 || mProcessingSettings.forceHostMemoryPoolSize >= 1024) { - mProcessingSettings.memoryAllocationStrategy = GPUMemoryResource::ALLOCATION_GLOBAL; + if (GetProcessingSettings().forceMemoryPoolSize >= 1024 || GetProcessingSettings().forceHostMemoryPoolSize >= 1024) { + mProcessingSettings->memoryAllocationStrategy = GPUMemoryResource::ALLOCATION_GLOBAL; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) { - mProcessingSettings.memoryAllocationStrategy = IsGPU() ? GPUMemoryResource::ALLOCATION_GLOBAL : GPUMemoryResource::ALLOCATION_INDIVIDUAL; + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_AUTO) { + mProcessingSettings->memoryAllocationStrategy = IsGPU() ? GPUMemoryResource::ALLOCATION_GLOBAL : GPUMemoryResource::ALLOCATION_INDIVIDUAL; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mProcessingSettings.forceMemoryPoolSize = mProcessingSettings.forceHostMemoryPoolSize = 0; + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + mProcessingSettings->forceMemoryPoolSize = mProcessingSettings->forceHostMemoryPoolSize = 0; } - if (mProcessingSettings.debugLevel >= 4) { - mProcessingSettings.keepAllMemory = true; + if (GetProcessingSettings().debugLevel >= 4) { + mProcessingSettings->keepAllMemory = true; } - if (mProcessingSettings.debugLevel >= 5 && mProcessingSettings.allocDebugLevel < 2) { - mProcessingSettings.allocDebugLevel = 2; + if (GetProcessingSettings().debugLevel >= 5 && GetProcessingSettings().allocDebugLevel < 2) { + mProcessingSettings->allocDebugLevel = 2; } - if (mProcessingSettings.eventDisplay || mProcessingSettings.keepAllMemory) { - mProcessingSettings.keepDisplayMemory = true; + if (GetProcessingSettings().eventDisplay || GetProcessingSettings().keepAllMemory) { + mProcessingSettings->keepDisplayMemory = true; } - if (mProcessingSettings.debugLevel < 6) { - mProcessingSettings.debugMask = 0; + if (GetProcessingSettings().debugLevel < 6) { + mProcessingSettings->debugMask = 0; } - if (mProcessingSettings.debugLevel < 1) { - mProcessingSettings.deviceTimers = false; + if (GetProcessingSettings().debugLevel < 1) { + mProcessingSettings->deviceTimers = false; } - if (mProcessingSettings.debugLevel > 0) { - mProcessingSettings.recoTaskTiming = true; + if (GetProcessingSettings().debugLevel > 0) { + mProcessingSettings->recoTaskTiming = true; } - if (mProcessingSettings.deterministicGPUReconstruction == -1) { - mProcessingSettings.deterministicGPUReconstruction = mProcessingSettings.debugLevel >= 6; + if (GetProcessingSettings().deterministicGPUReconstruction == -1) { + mProcessingSettings->deterministicGPUReconstruction = GetProcessingSettings().debugLevel >= 6; } - if (mProcessingSettings.deterministicGPUReconstruction) { + if (GetProcessingSettings().deterministicGPUReconstruction) { #ifndef GPUCA_DETERMINISTIC_MODE GPUError("WARNING, deterministicGPUReconstruction needs GPUCA_DETERMINISTIC_MODE for being fully deterministic, without only most indeterminism by concurrency is removed, but floating point effects remain!"); #endif - mProcessingSettings.overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; + mProcessingSettings->overrideClusterizerFragmentLen = TPC_MAX_FRAGMENT_LEN_GPU; param().rec.tpc.nWaysOuter = true; if (param().rec.tpc.looperInterpolationInExtraPass == -1) { param().rec.tpc.looperInterpolationInExtraPass = 0; } - if (mProcessingSettings.createO2Output > 1) { - mProcessingSettings.createO2Output = 1; + if (GetProcessingSettings().createO2Output > 1) { + mProcessingSettings->createO2Output = 1; } - mProcessingSettings.rtc.deterministic = 1; + mProcessingSettings->rtc.deterministic = 1; } else { #ifdef GPUCA_DETERMINISTIC_MODE GPUError("WARNING, compiled with GPUCA_DETERMINISTIC_MODE but deterministicGPUReconstruction not set, only compile-time determinism and deterministic math enforced, not fully deterministic!"); #endif } - if (mProcessingSettings.deterministicGPUReconstruction && mProcessingSettings.debugLevel >= 6) { - mProcessingSettings.nTPCClustererLanes = 1; + if (GetProcessingSettings().deterministicGPUReconstruction && GetProcessingSettings().debugLevel >= 6) { + mProcessingSettings->nTPCClustererLanes = 1; } - if (mProcessingSettings.createO2Output > 1 && mProcessingSettings.runQA && mProcessingSettings.qcRunFraction == 100.f) { - mProcessingSettings.createO2Output = 1; + if (GetProcessingSettings().createO2Output > 1 && GetProcessingSettings().runQA && GetProcessingSettings().qcRunFraction == 100.f) { + mProcessingSettings->createO2Output = 1; } - if (!mProcessingSettings.createO2Output || !IsGPU()) { - mProcessingSettings.clearO2OutputFromGPU = false; + if (!GetProcessingSettings().createO2Output || !IsGPU()) { + mProcessingSettings->clearO2OutputFromGPU = false; } if (!(mRecoSteps.stepsGPUMask & GPUDataTypes::RecoStep::TPCMerging)) { - mProcessingSettings.mergerSortTracks = false; + mProcessingSettings->mergerSortTracks = false; } - if (mProcessingSettings.debugLevel > 3 || !IsGPU() || mProcessingSettings.deterministicGPUReconstruction) { - mProcessingSettings.delayedOutput = false; + if (GetProcessingSettings().debugLevel > 3 || !IsGPU() || GetProcessingSettings().deterministicGPUReconstruction) { + mProcessingSettings->delayedOutput = false; } - if (!mProcessingSettings.rtc.enable) { - mProcessingSettings.rtc.optConstexpr = false; + if (!GetProcessingSettings().rtc.enable) { + mProcessingSettings->rtc.optConstexpr = false; } - mMemoryScalers->factor = mProcessingSettings.memoryScalingFactor; - mMemoryScalers->conservative = mProcessingSettings.conservativeMemoryEstimate; - mMemoryScalers->returnMaxVal = mProcessingSettings.forceMaxMemScalers != 0; - if (mProcessingSettings.forceMaxMemScalers > 1) { - mMemoryScalers->rescaleMaxMem(mProcessingSettings.forceMaxMemScalers); + mMemoryScalers->factor = GetProcessingSettings().memoryScalingFactor; + mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; + mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0; + if (GetProcessingSettings().forceMaxMemScalers > 1) { + mMemoryScalers->rescaleMaxMem(GetProcessingSettings().forceMaxMemScalers); } - if (mProcessingSettings.nHostThreads != -1 && mProcessingSettings.ompThreads != -1) { + if (GetProcessingSettings().nHostThreads != -1 && GetProcessingSettings().ompThreads != -1) { GPUFatal("Must not use both nHostThreads and ompThreads at the same time!"); - } else if (mProcessingSettings.ompThreads != -1) { - mProcessingSettings.nHostThreads = mProcessingSettings.ompThreads; + } else if (GetProcessingSettings().ompThreads != -1) { + mProcessingSettings->nHostThreads = GetProcessingSettings().ompThreads; GPUWarning("You are using the deprecated ompThreads option, please switch to nHostThreads!"); } - if (mProcessingSettings.nHostThreads <= 0) { - mProcessingSettings.nHostThreads = internal::getDefaultNThreads(); + if (GetProcessingSettings().nHostThreads <= 0) { + mProcessingSettings->nHostThreads = internal::getDefaultNThreads(); } else { - mProcessingSettings.autoAdjustHostThreads = false; + mProcessingSettings->autoAdjustHostThreads = false; } - mMaxHostThreads = mProcessingSettings.nHostThreads; + mMaxHostThreads = GetProcessingSettings().nHostThreads; if (mMaster == nullptr) { mThreading = std::make_shared(); mThreading->control = std::make_unique(tbb::global_control::max_allowed_parallelism, mMaxHostThreads); @@ -330,26 +332,26 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() } mMaxBackendThreads = std::max(mMaxBackendThreads, mMaxHostThreads); if (IsGPU()) { - mNStreams = std::max(mProcessingSettings.nStreams, 3); + mNStreams = std::max(GetProcessingSettings().nStreams, 3); } - if (mProcessingSettings.nTPCClustererLanes == -1) { - mProcessingSettings.nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, mProcessingSettings.inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); + if (GetProcessingSettings().nTPCClustererLanes == -1) { + mProcessingSettings->nTPCClustererLanes = (GetRecoStepsGPU() & RecoStep::TPCClusterFinding) ? 3 : std::max(1, std::min(GPUCA_NSECTORS, GetProcessingSettings().inKernelParallel ? (mMaxHostThreads >= 4 ? std::min(mMaxHostThreads / 2, mMaxHostThreads >= 32 ? GPUCA_NSECTORS : 4) : 1) : mMaxHostThreads)); } - if (mProcessingSettings.overrideClusterizerFragmentLen == -1) { - mProcessingSettings.overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / mProcessingSettings.nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; + if (GetProcessingSettings().overrideClusterizerFragmentLen == -1) { + mProcessingSettings->overrideClusterizerFragmentLen = ((GetRecoStepsGPU() & RecoStep::TPCClusterFinding) || (mMaxHostThreads / GetProcessingSettings().nTPCClustererLanes >= 3)) ? TPC_MAX_FRAGMENT_LEN_GPU : TPC_MAX_FRAGMENT_LEN_HOST; } - if (mProcessingSettings.nTPCClustererLanes > GPUCA_NSECTORS) { - GPUError("Invalid value for nTPCClustererLanes: %d", mProcessingSettings.nTPCClustererLanes); - mProcessingSettings.nTPCClustererLanes = GPUCA_NSECTORS; + if (GetProcessingSettings().nTPCClustererLanes > GPUCA_NSECTORS) { + GPUError("Invalid value for nTPCClustererLanes: %d", GetProcessingSettings().nTPCClustererLanes); + mProcessingSettings->nTPCClustererLanes = GPUCA_NSECTORS; } - if (mProcessingSettings.doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || mProcessingSettings.memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { + if (GetProcessingSettings().doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); return 1; } - if (mMaster == nullptr && mProcessingSettings.doublePipeline) { + if (mMaster == nullptr && GetProcessingSettings().doublePipeline) { mPipelineContext.reset(new GPUReconstructionPipelineContext); } @@ -367,16 +369,16 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mDeviceMemorySize += memPrimary; mHostMemorySize += memPageLocked; } - if (mProcessingSettings.forceMemoryPoolSize && mProcessingSettings.forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) { - mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize; - } else if (mProcessingSettings.forceMemoryPoolSize > 2) { - mDeviceMemorySize = mProcessingSettings.forceMemoryPoolSize; + if (GetProcessingSettings().forceMemoryPoolSize && GetProcessingSettings().forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) { + mDeviceMemorySize = GetProcessingSettings().forceMemoryPoolSize; + } else if (GetProcessingSettings().forceMemoryPoolSize > 2) { + mDeviceMemorySize = GetProcessingSettings().forceMemoryPoolSize; if (!IsGPU() || mOutputControl.useInternal()) { mHostMemorySize = mDeviceMemorySize; } } - if (mProcessingSettings.forceHostMemoryPoolSize) { - mHostMemorySize = mProcessingSettings.forceHostMemoryPoolSize; + if (GetProcessingSettings().forceHostMemoryPoolSize) { + mHostMemorySize = GetProcessingSettings().forceHostMemoryPoolSize; } for (uint32_t i = 0; i < mProcessors.size(); i++) { @@ -399,7 +401,7 @@ int32_t GPUReconstruction::InitPhasePermanentMemory() int32_t GPUReconstruction::InitPhaseAfterDevice() { - if (mProcessingSettings.forceMaxMemScalers <= 1 && mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().forceMaxMemScalers <= 1 && GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mMemoryScalers->rescaleMaxMem(IsGPU() ? mDeviceMemorySize : mHostMemorySize); } for (uint32_t i = 0; i < mChains.size(); i++) { @@ -446,7 +448,7 @@ int32_t GPUReconstruction::Exit() mChains.clear(); // Make sure we destroy a possible ITS GPU tracker before we call the destructors mHostConstantMem.reset(); // Reset these explicitly before the destruction of other members unloads the library - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { for (uint32_t i = 0; i < mMemoryResources.size(); i++) { if (mMemoryResources[i].mReuse >= 0) { continue; @@ -481,9 +483,38 @@ void GPUReconstruction::ComputeReuseMax(GPUProcessor* proc) } } +int16_t GPUReconstruction::RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re) +{ + if (!(type & (GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU))) { + if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !GetProcessingSettings().keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory + type |= (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU ? GPUMemoryResource::MEMORY_HOST : GPUMemoryResource::MEMORY_GPU); + } else { + type |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; + } + } + if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) { + type &= ~GPUMemoryResource::MEMORY_GPU; + } + mMemoryResources.emplace_back(proc, setPtr, (GPUMemoryResource::MemoryType)type, name); + if (mMemoryResources.size() >= 32768) { + throw std::bad_alloc(); + } + uint16_t retVal = mMemoryResources.size() - 1; + if (re.type != GPUMemoryReuse::NONE && !GetProcessingSettings().disableMemoryReuse) { + const auto& it = mMemoryReuse1to1.find(re.id); + if (it == mMemoryReuse1to1.end()) { + mMemoryReuse1to1[re.id] = {proc, retVal}; + } else { + mMemoryResources[retVal].mReuse = it->second.res[0]; + it->second.res.emplace_back(retVal); + } + } + return retVal; +} + size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool resetCustom) { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating memory %p", (void*)proc); } size_t total = 0; @@ -496,7 +527,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool rese } } } - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating memory done"); } return total; @@ -504,7 +535,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(GPUProcessor* proc, bool rese size_t GPUReconstruction::AllocateRegisteredPermanentMemory() { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating Permanent Memory"); } int32_t total = 0; @@ -515,7 +546,7 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory() } mHostMemoryPermanent = mHostMemoryPool; mDeviceMemoryPermanent = mDeviceMemoryPool; - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Permanent Memory Done"); } return total; @@ -534,7 +565,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, GPUError("Insufficient reuse memory %lu < %lu (%s) (%s)", mMemoryResources[res->mReuse].mSize, retVal, res->mName, device); throw std::bad_alloc(); } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Reused (" << device << ") " << res->mName << ": " << retVal << "\n"; } return retVal; @@ -568,7 +599,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, std::cerr << "Memory pool size exceeded (" << device << ") (" << res->mName << ": " << (memorypoolend ? (memorysize + ptrDiff(memorypool, memorypoolend)) : ptrDiff(memorypool, memorybase)) << " > " << memorysize << "\n"; throw std::bad_alloc(); } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (" << device << ") " << res->mName << ": " << retVal << " - available: " << (memorypoolend ? ptrDiff(memorypoolend, memorypool) : (memorysize - ptrDiff(memorypool, memorybase))) << "\n"; } return retVal; @@ -576,7 +607,7 @@ size_t GPUReconstruction::AllocateRegisteredMemoryHelper(GPUMemoryResource* res, void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUOutputControl* control, GPUReconstruction* recPool) { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && (control == nullptr || control->useInternal())) { if (!(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { if (res->mPtrDevice && res->mReuse < 0) { operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); @@ -593,7 +624,7 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, } res->mPtr = GPUProcessor::alignPointer(res->mPtrDevice); res->SetPointers(res->mPtr); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << (res->mReuse >= 0 ? "Reused " : "Allocated ") << res->mName << ": " << res->mSize << "\n"; } if (res->mType & GPUMemoryResource::MEMORY_STACK) { @@ -612,13 +643,13 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, if (IsGPU() && res->mOverrideSize < GPUCA_BUFFER_ALIGNMENT) { res->mOverrideSize = GPUCA_BUFFER_ALIGNMENT; } - if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || mProcessingSettings.keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory + if ((!IsGPU() || (res->mType & GPUMemoryResource::MEMORY_HOST) || GetProcessingSettings().keepDisplayMemory) && !(res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { // keepAllMemory --> keepDisplayMemory if (control && control->useExternal()) { if (control->allocator) { res->mSize = std::max((size_t)res->SetPointers((void*)1) - 1, res->mOverrideSize); res->mPtr = control->allocator(CAMath::nextMultipleOf(res->mSize)); res->mSize = std::max(ptrDiff(res->SetPointers(res->mPtr), res->mPtr), res->mOverrideSize); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (from callback) " << res->mName << ": " << res->mSize << "\n"; } } else { @@ -676,7 +707,7 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); } else { @@ -689,7 +720,7 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) throw std::bad_alloc(); } UpdateMaxMemoryUsed(); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; } return retVal; @@ -711,7 +742,7 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) throw std::bad_alloc(); } UpdateMaxMemoryUsed(); - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } @@ -773,10 +804,10 @@ void GPUReconstruction::FreeRegisteredMemory(int16_t ires) void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) { - if (mProcessingSettings.allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) { + if (GetProcessingSettings().allocDebugLevel >= 2 && (res->mPtr || res->mPtrDevice)) { std::cout << "Freeing " << res->mName << ": size " << res->mSize << " (reused " << res->mReuse << ")\n"; } - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL && res->mReuse < 0) { operator delete(res->mPtrDevice, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } res->mPtr = nullptr; @@ -789,7 +820,7 @@ void GPUReconstruction::ReturnVolatileDeviceMemory() mDeviceMemoryPool = mVolatileMemoryStart; mVolatileMemoryStart = nullptr; } - if (mProcessingSettings.allocDebugLevel >= 2) { + if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } } @@ -807,7 +838,7 @@ void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) { - if (mProcessingSettings.keepDisplayMemory || mProcessingSettings.disableMemoryReuse) { + if (GetProcessingSettings().keepDisplayMemory || GetProcessingSettings().disableMemoryReuse) { return; } if (mNonPersistentMemoryStack.size() == 0) { @@ -816,7 +847,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) { GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); } - if ((mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) && (IsGPU() || mProcessingSettings.forceHostMemoryPoolSize)) { + if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) { printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); PrintMemoryOverview(); printf("%76s", ""); @@ -872,7 +903,7 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); mVolatileMemoryStart = nullptr; - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); mDeviceMemoryPool = GPUProcessor::alignPointer(mDeviceMemoryPermanent); mHostMemoryPoolEnd = mHostMemoryPoolBlocked ? mHostMemoryPoolBlocked : ((char*)mHostMemoryBase + mHostMemorySize); @@ -895,7 +926,7 @@ void GPUReconstruction::PrintMemoryMax() void GPUReconstruction::PrintMemoryOverview() { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { printf("Memory Allocation: Host %'13zd / %'13zu (Permanent %'13zd, Data %'13zd, Scratch %'13zd), Device %'13zd / %'13zu, (Permanent %'13zd, Data %'13zd, Scratch %'13zd) %zu chunks\n", ptrDiff(mHostMemoryPool, mHostMemoryBase) + ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), mHostMemorySize, ptrDiff(mHostMemoryPermanent, mHostMemoryBase), ptrDiff(mHostMemoryPool, mHostMemoryPermanent), ptrDiff((char*)mHostMemoryBase + mHostMemorySize, mHostMemoryPoolEnd), ptrDiff(mDeviceMemoryPool, mDeviceMemoryBase) + ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), mDeviceMemorySize, ptrDiff(mDeviceMemoryPermanent, mDeviceMemoryBase), ptrDiff(mDeviceMemoryPool, mDeviceMemoryPermanent), ptrDiff((char*)mDeviceMemoryBase + mDeviceMemorySize, mDeviceMemoryPoolEnd), @@ -934,7 +965,7 @@ void GPUReconstruction::PrintMemoryStatistics() int32_t GPUReconstruction::registerMemoryForGPU(const void* ptr, size_t size) { - if (mProcessingSettings.noGPUMemoryRegistration) { + if (GetProcessingSettings().noGPUMemoryRegistration) { return 0; } int32_t retVal = registerMemoryForGPU_internal(ptr, size); @@ -946,7 +977,7 @@ int32_t GPUReconstruction::registerMemoryForGPU(const void* ptr, size_t size) int32_t GPUReconstruction::unregisterMemoryForGPU(const void* ptr) { - if (mProcessingSettings.noGPUMemoryRegistration) { + if (GetProcessingSettings().noGPUMemoryRegistration) { return 0; } const auto& pos = mRegisteredMemoryPtrs.find(ptr); @@ -982,10 +1013,10 @@ int32_t GPUReconstruction::getGeneralStepNum(GeneralStep step, bool validCheck) void GPUReconstruction::RunPipelineWorker() { - if (!mInitialized || !mProcessingSettings.doublePipeline || mMaster != nullptr || !mSlaves.size()) { + if (!mInitialized || !GetProcessingSettings().doublePipeline || mMaster != nullptr || !mSlaves.size()) { throw std::invalid_argument("Cannot start double pipeline mode"); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Pipeline worker started"); } bool terminate = false; @@ -1011,7 +1042,7 @@ void GPUReconstruction::RunPipelineWorker() } q->c.notify_one(); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Pipeline worker ended"); } } @@ -1107,7 +1138,7 @@ void GPUReconstruction::DumpSettings(const char* dir) std::string f; f = dir; f += "settings.dump"; - DumpStructToFile(&mGRPSettings, f.c_str()); + DumpStructToFile(mGRPSettings.get(), f.c_str()); for (uint32_t i = 0; i < mChains.size(); i++) { mChains[i]->DumpSettings(dir); } @@ -1121,11 +1152,11 @@ void GPUReconstruction::UpdateDynamicSettings(const GPUSettingsRecDynamic* d) void GPUReconstruction::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p, const GPUSettingsRecDynamic* d) { if (g) { - mGRPSettings = *g; + *mGRPSettings = *g; } if (p) { - mProcessingSettings.debugLevel = p->debugLevel; - mProcessingSettings.resetTimers = p->resetTimers; + mProcessingSettings->debugLevel = p->debugLevel; + mProcessingSettings->resetTimers = p->resetTimers; } GPURecoStepConfiguration* w = nullptr; if (mRecoSteps.steps.isSet(GPUDataTypes::RecoStep::TPCdEdx)) { @@ -1142,11 +1173,11 @@ int32_t GPUReconstruction::ReadSettings(const char* dir) std::string f; f = dir; f += "settings.dump"; - new (&mGRPSettings) GPUSettingsGRP; - if (ReadStructFromFile(f.c_str(), &mGRPSettings)) { + new (mGRPSettings.get()) GPUSettingsGRP; + if (ReadStructFromFile(f.c_str(), mGRPSettings.get())) { return 1; } - param().UpdateSettings(&mGRPSettings); + param().UpdateSettings(mGRPSettings.get()); for (uint32_t i = 0; i < mChains.size(); i++) { mChains[i]->ReadSettings(dir); } @@ -1173,9 +1204,9 @@ void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettings GPUError("Cannot update settings while initialized"); throw std::runtime_error("Settings updated while initialized"); } - mGRPSettings = *grp; + *mGRPSettings = *grp; if (proc) { - mProcessingSettings = *proc; + *mProcessingSettings = *proc; } if (workflow) { mRecoSteps.steps = workflow->steps; @@ -1183,7 +1214,7 @@ void GPUReconstruction::SetSettings(const GPUSettingsGRP* grp, const GPUSettings mRecoSteps.inputs = workflow->inputs; mRecoSteps.outputs = workflow->outputs; } - param().SetDefaults(&mGRPSettings, rec, proc, workflow); + param().SetDefaults(mGRPSettings.get(), rec, proc, workflow); } void GPUReconstruction::SetOutputControl(void* ptr, size_t size) @@ -1193,10 +1224,14 @@ void GPUReconstruction::SetOutputControl(void* ptr, size_t size) SetOutputControl(outputControl); } -void GPUReconstruction::SetInputControl(void* ptr, size_t size) -{ - mInputControl.set(ptr, size); -} +void GPUReconstruction::SetInputControl(void* ptr, size_t size) { mInputControl.set(ptr, size); } +GPUReconstruction::DeviceType GPUReconstruction::GetDeviceType() const { return (DeviceType)GetDeviceBackendSettings().deviceType; } +const GPUParam& GPUReconstruction::GetParam() const { return mHostConstantMem->param; } +void GPUReconstruction::SetResetTimers(bool reset) { mProcessingSettings->resetTimers = reset; } +void GPUReconstruction::SetDebugLevelTmp(int32_t level) { mProcessingSettings->debugLevel = level; } +GPUParam& GPUReconstruction::param() { return mHostConstantMem->param; } +const GPUTrackingInOutPointers GPUReconstruction::GetIOPtrs() const { return mHostConstantMem->ioPtrs; } +const GPUCalibObjectsConst& GPUReconstruction::GetCalib() const { return processors()->calibObjects; } ThrustVolatileAllocator::ThrustVolatileAllocator(GPUReconstruction* r) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 23fb6e4d9ff06..b6256f7f8ad82 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -25,13 +25,13 @@ #include #include -#include "GPUTRDDef.h" -#include "GPUParam.h" -#include "GPUSettings.h" -#include "GPUOutputControl.h" +#include "GPUDataTypes.h" #include "GPUMemoryResource.h" -#include "GPUConstantMem.h" -#include "GPULogging.h" +#include "GPUOutputControl.h" + +/*#include "GPUParam.h" +#include "GPUSettings.h" +#include "GPULogging.h"*/ namespace o2::its { @@ -49,6 +49,13 @@ struct GPUReconstructionThreading; class GPUROOTDumpCore; class ThrustVolatileAllocator; struct GPUDefParameters; +class GPUMemoryResource; +struct GPUSettingsDeviceBackend; +struct GPUSettingsGRP; +struct GPUSettingsProcessing; +struct GPUSettingsRec; +struct GPUSettingsRecDynamic; +struct GPUMemoryReuse; namespace gpu_reconstruction_kernels { @@ -186,18 +193,20 @@ class GPUReconstruction bool slavesExist() { return mSlaves.size() || mMaster; } // Getters / setters for parameters - DeviceType GetDeviceType() const { return (DeviceType)mDeviceBackendSettings.deviceType; } + DeviceType GetDeviceType() const; bool IsGPU() const { return GetDeviceType() != DeviceType::INVALID_DEVICE && GetDeviceType() != DeviceType::CPU; } - const GPUParam& GetParam() const { return mHostConstantMem->param; } + const GPUParam& GetParam() const; const GPUConstantMem& GetConstantMem() const { return *mHostConstantMem; } - const GPUSettingsGRP& GetGRPSettings() const { return mGRPSettings; } - const GPUSettingsDeviceBackend& GetDeviceBackendSettings() { return mDeviceBackendSettings; } - const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; } + const GPUTrackingInOutPointers GetIOPtrs() const; + const GPUSettingsGRP& GetGRPSettings() const { return *mGRPSettings; } + const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return *mDeviceBackendSettings; } + const GPUSettingsProcessing& GetProcessingSettings() const { return *mProcessingSettings; } + const GPUCalibObjectsConst& GetCalib() const; bool IsInitialized() const { return mInitialized; } void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr); void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr); - void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init() - void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings() + void SetResetTimers(bool reset); // May update also after Init() + void SetDebugLevelTmp(int32_t level); // Temporarily, before calling SetSettings() void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr); void UpdateDynamicSettings(const GPUSettingsRecDynamic* d); void SetOutputControl(const GPUOutputControl& v) { mOutputControl = v; } @@ -272,6 +281,7 @@ class GPUReconstruction size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr* mem, InOutPointerType type, T** nonConstPtrs = nullptr); template T* AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u); + int16_t RegisterMemoryAllocationHelper(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re); // Private helper functions to dump / load flat objects template @@ -292,17 +302,17 @@ class GPUReconstruction // Pointers to tracker classes GPUConstantMem* processors() { return mHostConstantMem.get(); } const GPUConstantMem* processors() const { return mHostConstantMem.get(); } - GPUParam& param() { return mHostConstantMem->param; } + GPUParam& param(); std::unique_ptr mHostConstantMem; GPUConstantMem* mDeviceConstantMem = nullptr; // Settings - GPUSettingsGRP mGRPSettings; // Global Run Parameters - GPUSettingsDeviceBackend mDeviceBackendSettings; // Processing Parameters (at constructor level) - GPUSettingsProcessing mProcessingSettings; // Processing Parameters (at init level) - GPUOutputControl mOutputControl; // Controls the output of the individual components - GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps - std::unique_ptr mMemoryScalers; // Scalers how much memory will be needed + std::unique_ptr mGRPSettings; // Global Run Parameters + std::unique_ptr mDeviceBackendSettings; // Processing Parameters (at constructor level) + std::unique_ptr mProcessingSettings; // Processing Parameters (at init level) + GPUOutputControl mOutputControl; // Controls the output of the individual components + GPUOutputControl mInputControl; // Prefefined input memory location for reading standalone dumps + std::unique_ptr mMemoryScalers; // Scalers how much memory will be needed GPURecoStepConfiguration mRecoSteps; @@ -392,35 +402,6 @@ class GPUReconstruction static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; -template -inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u) -{ - if (n == 0) { - u.reset(nullptr); - return nullptr; - } - T* retVal; - if (mInputControl.useExternal()) { - u.reset(nullptr); - mInputControl.checkCurrent(); - GPUProcessor::computePointerWithAlignment(mInputControl.ptrCurrent, retVal, n); - if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) { - throw std::bad_alloc(); - } - } else { - u.reset(new T[n]); - retVal = u.get(); - if (mProcessingSettings.registerStandaloneInputMemory) { - if (registerMemoryForGPU(u.get(), n * sizeof(T))) { - GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T))); - throw std::bad_alloc(); - } - } - } - ptr = retVal; - return retVal; -} - template inline T* GPUReconstruction::AddChain(Args... args) { @@ -431,31 +412,7 @@ inline T* GPUReconstruction::AddChain(Args... args) template inline int16_t GPUReconstruction::RegisterMemoryAllocation(T* proc, void* (T::*setPtr)(void*), int32_t type, const char* name, const GPUMemoryReuse& re) { - if (!(type & (GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU))) { - if ((type & GPUMemoryResource::MEMORY_SCRATCH) && !mProcessingSettings.keepDisplayMemory) { // keepAllMemory --> keepDisplayMemory - type |= (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU ? GPUMemoryResource::MEMORY_HOST : GPUMemoryResource::MEMORY_GPU); - } else { - type |= GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU; - } - } - if (proc->mGPUProcessorType == GPUProcessor::PROCESSOR_TYPE_CPU) { - type &= ~GPUMemoryResource::MEMORY_GPU; - } - mMemoryResources.emplace_back(proc, static_cast(setPtr), (GPUMemoryResource::MemoryType)type, name); - if (mMemoryResources.size() >= 32768) { - throw std::bad_alloc(); - } - uint16_t retVal = mMemoryResources.size() - 1; - if (re.type != GPUMemoryReuse::NONE && !mProcessingSettings.disableMemoryReuse) { - const auto& it = mMemoryReuse1to1.find(re.id); - if (it == mMemoryReuse1to1.end()) { - mMemoryReuse1to1[re.id] = {proc, retVal}; - } else { - mMemoryResources[retVal].mReuse = it->second.res[0]; - it->second.res.emplace_back(retVal); - } - } - return retVal; + return RegisterMemoryAllocationHelper(proc, static_cast(setPtr), type, name, re); } template @@ -471,7 +428,7 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate) { static_assert(sizeof(T) > sizeof(GPUProcessor), "Need to setup derived class"); if (allocate) { - proc->SetMaxData(mHostConstantMem->ioPtrs); + proc->SetMaxData(GetIOPtrs()); } if (proc->mGPUProcessorType != GPUProcessor::PROCESSOR_TYPE_DEVICE && proc->mLinkedProcessor) { std::memcpy((void*)proc->mLinkedProcessor, (const void*)proc, sizeof(*proc)); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 39507beda8a55..2453ce4a2328f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -30,19 +30,18 @@ #include "GPUTRDTrackletLabels.h" #include "GPUMemoryResource.h" #include "GPUConstantMem.h" +#include "GPULogging.h" #include "GPUMemorySizeScalers.h" +#include "GPUReconstructionProcessingKernels.inc" + #include #include -#define GPUCA_LOGGING_PRINTF -#include "GPULogging.h" - #ifndef _WIN32 #include #endif using namespace o2::gpu; -using namespace o2::gpu::gpu_reconstruction_kernels; constexpr GPUReconstructionCPU::krnlRunRange GPUReconstructionCPU::krnlRunRangeNone; constexpr GPUReconstructionCPU::krnlEvent GPUReconstructionCPU::krnlEventNone; @@ -55,7 +54,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU() } template -inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -67,7 +66,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu } int32_t nThreads = getNKernelHostThreads(false); if (nThreads > 1) { - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { printf("Running %d Threads\n", mThreading->activeThreads->max_concurrency()); } tbb::this_task_arena::isolate([&] { @@ -89,7 +88,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetu } template <> -inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); if (nThreads > 1) { @@ -110,7 +109,7 @@ inline void GPUReconstructionCPUBackend::runKernelBackendInternal -void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args) { #pragma GCC diagnostic push #if defined(__clang__) @@ -121,14 +120,14 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs -gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) +GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { if (gpu == -1) { gpu = IsGPU(); } const auto num = GetKernelNum(); const auto* p = gpu ? mParDevice : mParCPU; - gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; + GPUReconstructionProcessing::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]}; if (ret.nThreads == 0) { ret.nThreads = gpu ? mThreadCount : 1u; } @@ -138,9 +137,9 @@ gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProper return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - template void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs& args); \ - template krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + template void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args); \ + template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL @@ -169,7 +168,7 @@ size_t GPUReconstructionCPU::TransferMemoryResourcesHelper(GPUProcessor* proc, i if (!(res.mType & GPUMemoryResource::MEMORY_GPU) || (res.mType & GPUMemoryResource::MEMORY_CUSTOM_TRANSFER)) { continue; } - if (!mProcessingSettings.keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) { + if (!GetProcessingSettings().keepAllMemory && !all && (res.mType & exc) && !(res.mType & inc)) { continue; } if (toGPU) { @@ -197,7 +196,7 @@ int32_t GPUReconstructionCPU::InitDevice() { mActiveHostKernelThreads = mMaxHostThreads; mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { if (mDeviceMemorySize > mHostMemorySize) { mHostMemorySize = mDeviceMemorySize; @@ -207,7 +206,7 @@ int32_t GPUReconstructionCPU::InitDevice() mHostMemoryPermanent = mHostMemoryBase; ClearAllocatedMemory(); } - if (mProcessingSettings.inKernelParallel) { + if (GetProcessingSettings().inKernelParallel) { mBlockCount = mMaxHostThreads; } mProcShadow.mProcessorsProc = processors(); @@ -216,7 +215,7 @@ int32_t GPUReconstructionCPU::InitDevice() int32_t GPUReconstructionCPU::ExitDevice() { - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { if (mMaster == nullptr) { operator delete(mHostMemoryBase, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); } @@ -232,13 +231,13 @@ int32_t GPUReconstructionCPU::RunChains() mStatNEvents++; mNEventsProcessed++; - if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) { printf("Allocated memory when starting processing %34s", ""); PrintMemoryOverview(); } mTimerTotal.Start(); const std::clock_t cpuTimerStart = std::clock(); - if (mProcessingSettings.doublePipeline) { + if (GetProcessingSettings().doublePipeline) { int32_t retVal = EnqueuePipeline(); if (retVal) { return retVal; @@ -259,7 +258,7 @@ int32_t GPUReconstructionCPU::RunChains() } mTimerTotal.Stop(); mStatCPUTime += (double)(std::clock() - cpuTimerStart) / CLOCKS_PER_SEC; - if (mProcessingSettings.debugLevel >= 3 || mProcessingSettings.allocDebugLevel) { + if (GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) { printf("Allocated memory when ending processing %36s", ""); PrintMemoryOverview(); } @@ -281,7 +280,7 @@ int32_t GPUReconstructionCPU::RunChains() for (int32_t j = 0; j < mTimers[i]->num; j++) { HighResTimer& timer = mTimers[i]->timer[j]; time += timer.GetElapsedTime(); - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { timer.Reset(); } } @@ -297,7 +296,7 @@ int32_t GPUReconstructionCPU::RunChains() snprintf(bandwidth, 256, " (%8.3f GB/s - %'14zu bytes - %'14zu per call)", mTimers[i]->memSize / time * 1e-9, mTimers[i]->memSize / mStatNEvents, mTimers[i]->memSize / mStatNEvents / mTimers[i]->count); } printf("Execution Time: Task (%c %8ux): %50s Time: %'10.0f us%s\n", type == 0 ? 'K' : 'C', mTimers[i]->count, mTimers[i]->name.c_str(), time * 1000000 / mStatNEvents, bandwidth); - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mTimers[i]->count = 0; mTimers[i]->memSize = 0; } @@ -317,7 +316,7 @@ int32_t GPUReconstructionCPU::RunChains() printf("Execution Time: Step (D %8ux): %11s %38s Time: %'10.0f us (%8.3f GB/s - %'14zu bytes - %'14zu per call)\n", mTimersRecoSteps[i].countToHost, "DMA to Host", GPUDataTypes::RECO_STEP_NAMES[i], mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1000000 / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].timerToHost.GetElapsedTime() * 1e-9, mTimersRecoSteps[i].bytesToHost / mStatNEvents, mTimersRecoSteps[i].bytesToHost / mTimersRecoSteps[i].countToHost); } - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mTimersRecoSteps[i].bytesToGPU = mTimersRecoSteps[i].bytesToHost = 0; mTimersRecoSteps[i].timerToGPU.Reset(); mTimersRecoSteps[i].timerToHost.Reset(); @@ -340,7 +339,7 @@ int32_t GPUReconstructionCPU::RunChains() } else if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("Total Wall Time: %10.0f us%s", mStatWallTime, nEventReport.c_str()); } - if (mProcessingSettings.resetTimers) { + if (GetProcessingSettings().resetTimers) { mStatNEvents = 0; mStatCPUTime = 0; mTimerTotal.Reset(); @@ -366,7 +365,7 @@ void GPUReconstructionCPU::UpdateParamOccupancyMap(const uint32_t* mapHost, cons if (!((size_t)¶m().occupancyTotal - (size_t)¶m().occupancyMap == sizeof(param().occupancyMap) && sizeof(param().occupancyMap) == sizeof(size_t) && sizeof(param().occupancyTotal) < sizeof(size_t))) { throw std::runtime_error("occupancy data not consecutive in GPUParam"); } - const auto threadContext = GetThreadContext(); + const auto holdContext = GetThreadContext(); size_t tmp[2] = {(size_t)mapGPU, 0}; memcpy(&tmp[1], &occupancyTotal, sizeof(occupancyTotal)); WriteToConstantMemory((char*)&processors()->param.occupancyMap - (char*)processors(), &tmp, sizeof(param().occupancyMap) + sizeof(param().occupancyTotal), stream); diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index b37bf2b75f01c..d0d8b05c4af0e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -16,14 +16,9 @@ #define GPURECONSTRUCTIONICPU_H #include "GPUReconstructionProcessing.h" -#include "GPUConstantMem.h" #include #include -#include "GPUGeneralKernels.h" -#include "GPUReconstructionKernelIncludes.h" -#include "GPUReconstructionKernels.h" - namespace Ort { struct SessionOptions; @@ -32,20 +27,7 @@ struct SessionOptions; namespace o2::gpu { -class GPUReconstructionCPUBackend : public GPUReconstructionProcessing -{ - public: - ~GPUReconstructionCPUBackend() override = default; - - protected: - GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {} - template - void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs& args); - template - void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args); -}; - -class GPUReconstructionCPU : public GPUReconstructionKernels +class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface { friend GPUReconstruction* GPUReconstruction::GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); friend class GPUChain; @@ -55,10 +37,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels - void runKernel(krnlSetup&& setup, Args&&... args); template - gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1); + krnlProperties getKernelProperties(int gpu = -1); + template + void runKernelBackend(const krnlSetupArgs& args); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } @@ -75,21 +57,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ - { \ - krnlSetupArgs args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \ - const uint32_t num = GetKernelNum(); \ - if (cpuFallback) { \ - GPUReconstructionCPU::runKernelImpl(num, &args); \ - } else { \ - runKernelImpl(num, &args); \ - } \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL + GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg) {} + + template + void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; } int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; } @@ -132,72 +103,10 @@ class GPUReconstructionCPU : public GPUReconstructionKernels + void runKernelInterface(krnlSetup&& setup, Args const&... args); }; -template -inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args) -{ - HighResTimer* t = nullptr; - GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); - if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { - throw std::runtime_error("Failure running general kernel without defining RecoStep"); - } - int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; - uint32_t& nThreads = setup.x.nThreads; - uint32_t& nBlocks = setup.x.nBlocks; - const uint32_t stream = setup.x.stream; - auto prop = getKernelProperties(); - const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads; - const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount)); - if (nBlocks == (uint32_t)-1) { - nBlocks = (nThreads + autoThreads - 1) / autoThreads; - nThreads = autoThreads; - } else if (nBlocks == (uint32_t)-2) { - nBlocks = nThreads; - nThreads = autoThreads; - } else if (nBlocks == (uint32_t)-3) { - nBlocks = autoBlocks; - nThreads = autoThreads; - } else if ((int32_t)nThreads < 0) { - nThreads = cpuFallback ? 1 : -nThreads; - } - if (nThreads > GPUCA_MAX_THREADS) { - throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); - } - if (mProcessingSettings.debugLevel >= 3) { - GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str()); - } - if (nThreads == 0 || nBlocks == 0) { - return; - } - if (mProcessingSettings.debugLevel >= 1) { - t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); - if ((!mProcessingSettings.deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) { - t->Start(); - } - } - double deviceTimerTime = 0.; - runKernelImplWrapper(gpu_reconstruction_kernels::classArgument(), cpuFallback, deviceTimerTime, std::forward(setup), std::forward(args)...); - if (GPUDebug(GetKernelName(), stream, mProcessingSettings.serializeGPU & 1)) { - throw std::runtime_error("kernel failure"); - } - if (mProcessingSettings.debugLevel >= 1) { - if (t) { - if (deviceTimerTime != 0.) { - t->AddTime(deviceTimerTime); - if (t->IsRunning()) { - t->Abort(); - } - } else if (t->IsRunning()) { - t->Stop(); - } - } - if (CheckErrorCodes(cpuFallback) && !mProcessingSettings.ignoreNonFatalGPUErrors) { - throw std::runtime_error("kernel error code"); - } - } -} - } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h new file mode 100644 index 0000000000000..837516a93b6ae --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionCPUKernels.h @@ -0,0 +1,98 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCPUKernels.h +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONICPUKERNELS_H +#define GPURECONSTRUCTIONICPUKERNELS_H + +#include "GPUReconstructionCPU.h" +#include "GPUSettings.h" +#include "GPULogging.h" + +namespace o2::gpu +{ + +template +inline void GPUReconstructionCPU::runKernelInterface(krnlSetup&& setup, Args const&... args) +{ + HighResTimer* t = nullptr; + GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep(); + if (myStep == GPUDataTypes::RecoStep::NoRecoStep) { + throw std::runtime_error("Failure running general kernel without defining RecoStep"); + } + int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0; + uint32_t& nThreads = setup.x.nThreads; + uint32_t& nBlocks = setup.x.nBlocks; + const uint32_t stream = setup.x.stream; + auto prop = getKernelProperties(); + const int32_t autoThreads = cpuFallback ? 1 : prop.nThreads; + const int32_t autoBlocks = cpuFallback ? 1 : (prop.forceBlocks ? prop.forceBlocks : (prop.minBlocks * mBlockCount)); + if (nBlocks == (uint32_t)-1) { + nBlocks = (nThreads + autoThreads - 1) / autoThreads; + nThreads = autoThreads; + } else if (nBlocks == (uint32_t)-2) { + nBlocks = nThreads; + nThreads = autoThreads; + } else if (nBlocks == (uint32_t)-3) { + nBlocks = autoBlocks; + nThreads = autoThreads; + } else if ((int32_t)nThreads < 0) { + nThreads = cpuFallback ? 1 : -nThreads; + } + if (nThreads > GPUCA_MAX_THREADS) { + throw std::runtime_error("GPUCA_MAX_THREADS exceeded"); + } + if (GetProcessingSettings().debugLevel >= 3) { + GPUInfo("Running kernel %s (Stream %d, Index %d, Grid %d/%d) on %s", GetKernelName(), stream, setup.y.index, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : (cpuFallback ? "CPU (fallback)" : mDeviceName.c_str())); + } + if (nThreads == 0 || nBlocks == 0) { + return; + } + if (GetProcessingSettings().debugLevel >= 1) { + t = &getKernelTimer(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream); + if ((!GetProcessingSettings().deviceTimers || !IsGPU() || cpuFallback) && (mNActiveThreadsOuterLoop < 2 || getHostThreadIndex() == 0)) { + t->Start(); + } + } + double deviceTimerTime = 0.; + krnlSetupArgs argPack{{}, {{setup.x, setup.y, setup.z}, deviceTimerTime}, {args...}}; + const uint32_t num = GetKernelNum(); + if (cpuFallback) { + GPUReconstructionCPU::runKernelVirtual(num, &argPack); + } else { + runKernelVirtual(num, &argPack); + } + + if (GPUDebug(GetKernelName(), stream, GetProcessingSettings().serializeGPU & 1)) { + throw std::runtime_error("kernel failure"); + } + if (GetProcessingSettings().debugLevel >= 1) { + if (t) { + if (deviceTimerTime != 0.) { + t->AddTime(deviceTimerTime); + if (t->IsRunning()) { + t->Abort(); + } + } else if (t->IsRunning()) { + t->Stop(); + } + } + if (CheckErrorCodes(cpuFallback) && !GetProcessingSettings().ignoreNonFatalGPUErrors) { + throw std::runtime_error("kernel error code"); + } + } +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx index e12ca7ec601ad..2dec88393f632 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionConvert.cxx @@ -25,7 +25,7 @@ #include "GPUO2DataTypes.h" #include "GPUDataTypes.h" #include "GPUTPCGeometry.h" -#include "AliHLTTPCRawCluster.h" +#include "AliHLTTPCRawCluster.h" // TODO: Is this still needed at all, or can it be removed? #include "GPUParam.h" #include "GPULogging.h" #include diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx index b389e99a0b2bb..9962bdf3922c1 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx @@ -14,6 +14,7 @@ #include "GPUReconstructionDeviceBase.h" #include "GPUReconstructionIncludes.h" +#include "GPUConstantMem.h" #include "GPUTPCTracker.h" @@ -93,21 +94,21 @@ int32_t GPUReconstructionDeviceBase::InitDevice() // CPU_SET(0, &mask); // sched_setaffinity(0, sizeof(mask), &mask); - if (mProcessingSettings.memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { + if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { GPUError("Individual memory allocation strategy unsupported for device\n"); return (1); } - if (mProcessingSettings.nStreams > GPUCA_MAX_STREAMS) { - GPUError("Too many straems requested %d > %d\n", mProcessingSettings.nStreams, GPUCA_MAX_STREAMS); + if (GetProcessingSettings().nStreams > GPUCA_MAX_STREAMS) { + GPUError("Too many straems requested %d > %d\n", GetProcessingSettings().nStreams, GPUCA_MAX_STREAMS); return (1); } void* semLock = nullptr; - if (mProcessingSettings.globalInitMutex && GetGlobalLock(semLock)) { + if (GetProcessingSettings().globalInitMutex && GetGlobalLock(semLock)) { return (1); } - if (mProcessingSettings.deviceTimers) { + if (GetProcessingSettings().deviceTimers) { AddGPUEvents(mDebugEvents); } @@ -117,7 +118,7 @@ int32_t GPUReconstructionDeviceBase::InitDevice() return (1); } - if (mProcessingSettings.globalInitMutex) { + if (GetProcessingSettings().globalInitMutex) { ReleaseGlobalLock(semLock); } @@ -129,7 +130,7 @@ int32_t GPUReconstructionDeviceBase::InitDevice() mProcShadow.mMemoryResProcessors = RegisterMemoryAllocation(&mProcShadow, &GPUProcessorProcessors::SetPointersDeviceProcessor, GPUMemoryResource::MEMORY_PERMANENT | GPUMemoryResource::MEMORY_HOST, "Processors"); AllocateRegisteredMemory(mProcShadow.mMemoryResProcessors); - if (mMaster == nullptr || mProcessingSettings.debugLevel >= 2) { + if (mMaster == nullptr || GetProcessingSettings().debugLevel >= 2) { GPUInfo("GPU Tracker initialization successfull"); // Verbosity reduced because GPU backend will print GPUImportant message! } @@ -186,13 +187,15 @@ void GPUReconstructionDeviceBase::runConstantRegistrators() size_t GPUReconstructionDeviceBase::TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) { if (!(res->Type() & GPUMemoryResource::MEMORY_GPU)) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Skipped transfer of non-GPU memory resource: %s", res->Name()); } return 0; } - if (mProcessingSettings.debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || mProcessingSettings.debugLevel >= 4)) { + if (GetProcessingSettings().debugLevel >= 3 && (strcmp(res->Name(), "ErrorCodes") || GetProcessingSettings().debugLevel >= 4)) { GPUInfo("Copying to %s: %s - %ld bytes", toGPU ? "GPU" : "Host", res->Name(), (int64_t)res->Size()); } return GPUMemCpy(dst, src, res->Size(), stream, toGPU, ev, evList, nEvents); } + +const GPUParam* GPUReconstructionDeviceBase::DeviceParam() const { return &mDeviceConstantMem->param; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h index f0e19f588e0f1..c8288f978f6ae 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h +++ b/GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h @@ -22,16 +22,12 @@ namespace o2::gpu { -#if !(defined(__CLING__) || defined(__ROOTCLING__) || defined(G__ROOT)) -extern template class GPUReconstructionKernels; -#endif - class GPUReconstructionDeviceBase : public GPUReconstructionCPU { public: ~GPUReconstructionDeviceBase() override; - const GPUParam* DeviceParam() const { return &mDeviceConstantMem->param; } + const GPUParam* DeviceParam() const; struct deviceConstantMemRegistration { deviceConstantMemRegistration(void* (*reg)()) { @@ -51,8 +47,6 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU int32_t unregisterMemoryForGPU_internal(const void* ptr) override; void unregisterRemainingRegisteredMemory(); - virtual const GPUTPCTracker* CPUTracker(int32_t iSector) { return &processors()->tpcTrackers[iSector]; } - int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override = 0; size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst) override; size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override = 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionIO.h b/GPU/GPUTracking/Base/GPUReconstructionIO.h index 2208c15846e09..810ebfffe1703 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionIO.h +++ b/GPU/GPUTracking/Base/GPUReconstructionIO.h @@ -16,10 +16,40 @@ #define GPURECONSTRUCTIONIO_H #include "GPUReconstruction.h" +#include "GPUSettings.h" namespace o2::gpu { +template +inline T* GPUReconstruction::AllocateIOMemoryHelper(size_t n, const T*& ptr, std::unique_ptr& u) +{ + if (n == 0) { + u.reset(nullptr); + return nullptr; + } + T* retVal; + if (mInputControl.useExternal()) { + u.reset(nullptr); + mInputControl.checkCurrent(); + GPUProcessor::computePointerWithAlignment(mInputControl.ptrCurrent, retVal, n); + if ((size_t)((char*)mInputControl.ptrCurrent - (char*)mInputControl.ptrBase) > mInputControl.size) { + throw std::bad_alloc(); + } + } else { + u.reset(new T[n]); + retVal = u.get(); + if (GetProcessingSettings().registerStandaloneInputMemory) { + if (registerMemoryForGPU(u.get(), n * sizeof(T))) { + GPUError("Error registering memory for GPU: %p - %ld bytes\n", (void*)u.get(), (int64_t)(n * sizeof(T))); + throw std::bad_alloc(); + } + } + } + ptr = retVal; + return retVal; +} + template inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type) { @@ -38,7 +68,7 @@ inline uint32_t GPUReconstruction::DumpData(FILE* fp, const T* const* entries, c fwrite(entries[i], sizeof(*entries[i]), num[i], fp); } } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Dumped %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); } return numTotal; @@ -72,7 +102,7 @@ inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, s numTotal += num[i]; } (void)r; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld %s", (int64_t)numTotal, IOTYPENAMES[type]); } return numTotal; @@ -112,7 +142,7 @@ inline std::unique_ptr GPUReconstruction::ReadFlatObjectFromFile(const char* r = fread((void*)retVal.get(), 1, size[0], fp); r = fread(buf, 1, size[1], fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } retVal->clearInternalBufferPtr(); @@ -151,7 +181,7 @@ inline std::unique_ptr GPUReconstruction::ReadStructFromFile(const char* file std::unique_ptr newObj(new T); r = fread(newObj.get(), 1, size, fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } return newObj; @@ -172,7 +202,7 @@ inline int32_t GPUReconstruction::ReadStructFromFile(const char* file, T* obj) } r = fread(obj, 1, size, fp); fclose(fp); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Read %ld bytes from %s", (int64_t)r, file); } return 0; diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index b3f6c6ec817fd..2b16dfb32fe14 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -65,7 +65,7 @@ // GPU Host wrappers for kernel #define GPUCA_KRNL_HOST(x_class, ...) \ GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal { \ + template <> class GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::backendInternal { \ public: \ template \ static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernels.h b/GPU/GPUTracking/Base/GPUReconstructionKernels.h deleted file mode 100644 index 7f500d471de1f..0000000000000 --- a/GPU/GPUTracking/Base/GPUReconstructionKernels.h +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUReconstructionKernels.h -/// \author David Rohr - -#ifndef GPURECONSTRUCTIONKERNELS_H -#define GPURECONSTRUCTIONKERNELS_H - -#include "GPUReconstruction.h" - -namespace o2::gpu -{ - -namespace gpu_reconstruction_kernels -{ - -template -struct classArgument { - using t = T; - static constexpr int32_t i = I; -}; - -struct krnlExec { - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} - constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} - uint32_t nBlocks; - uint32_t nThreads; - int32_t stream; - GPUReconstruction::krnlDeviceType device; - GPUDataTypes::RecoStep step; -}; -struct krnlRunRange { - constexpr krnlRunRange() = default; - constexpr krnlRunRange(uint32_t v) : index(v) {} - uint32_t index = 0; -}; -struct krnlEvent { - constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} - deviceEvent* ev; - deviceEvent* evList; - int32_t nEvents; -}; - -struct krnlProperties { - krnlProperties(int32_t t = 0, int32_t b = 1, int32_t b2 = 0) : nThreads(t), minBlocks(b), forceBlocks(b2) {} - uint32_t nThreads; - uint32_t minBlocks; - uint32_t forceBlocks; - uint32_t total() { return forceBlocks ? forceBlocks : (nThreads * minBlocks); } -}; - -struct krnlSetup { - krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} - krnlExec x; - krnlRunRange y; - krnlEvent z; -}; - -struct krnlSetupTime : public krnlSetup { - double& t; -}; - -template -struct krnlSetupArgs : public gpu_reconstruction_kernels::classArgument { - krnlSetupArgs(const krnlExec& xx, const krnlRunRange& yy, const krnlEvent& zz, double& tt, const Args&... args) : s{{xx, yy, zz}, tt}, v(args...) {} - const krnlSetupTime s; - std::tuple sizeof(void*)), const Args&, const Args>::type...> v; -}; - -} // namespace gpu_reconstruction_kernels - -template -class GPUReconstructionKernels : public T -{ - public: - GPUReconstructionKernels(const GPUSettingsDeviceBackend& cfg) : T(cfg) {} - - protected: - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; - using krnlExec = gpu_reconstruction_kernels::krnlExec; - using krnlRunRange = gpu_reconstruction_kernels::krnlRunRange; - using krnlEvent = gpu_reconstruction_kernels::krnlEvent; - using krnlSetup = gpu_reconstruction_kernels::krnlSetup; - using krnlSetupTime = gpu_reconstruction_kernels::krnlSetupTime; - template - using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs; - - virtual void runKernelImpl(const int num, const void* args) - { - switch (num) { // clang-format off -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ - case x_num: { \ - const auto& args2 = *(const krnlSetupArgs*)args; \ - T::template runKernelBackend(args2); \ - break; \ - } -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL - } // clang-format on - } -}; - -} // namespace o2::gpu - -#endif diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index aa01d26446b56..89517c612403b 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -24,6 +24,7 @@ #include "GPUReconstruction.h" #include "GPUReconstructionAvailableBackends.h" +#include "GPUSettings.h" #include "utils/qlibload.h" diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx index d02309f66c762..a511102a492ef 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx @@ -15,6 +15,9 @@ #include "GPUReconstructionProcessing.h" #include "GPUReconstructionThreading.h" #include "GPUDefParametersLoad.inc" +#include "GPUReconstructionKernelIncludes.h" +#include "GPUSettings.h" +#include "GPULogging.h" using namespace o2::gpu; @@ -41,7 +44,7 @@ GPUReconstructionProcessing::~GPUReconstructionProcessing() int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) { int32_t nThreads = 0; - if (mProcessingSettings.inKernelParallel == 2 && mNActiveThreadsOuterLoop) { + if (GetProcessingSettings().inKernelParallel == 2 && mNActiveThreadsOuterLoop) { if (splitCores) { nThreads = mMaxHostThreads / mNActiveThreadsOuterLoop; nThreads += (uint32_t)getHostThreadIndex() < mMaxHostThreads % mNActiveThreadsOuterLoop; @@ -50,7 +53,7 @@ int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores) } nThreads = std::max(1, nThreads); } else { - nThreads = mProcessingSettings.inKernelParallel ? mMaxHostThreads : 1; + nThreads = GetProcessingSettings().inKernelParallel ? mMaxHostThreads : 1; } return nThreads; } @@ -59,7 +62,7 @@ void GPUReconstructionProcessing::SetNActiveThreads(int32_t n) { mActiveHostKernelThreads = std::max(1, n < 0 ? mMaxHostThreads : std::min(n, mMaxHostThreads)); mThreading->activeThreads = std::make_unique(mActiveHostKernelThreads); - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Set number of active parallel kernels threads on host to %d (%d requested)", mActiveHostKernelThreads, n); } } @@ -80,12 +83,12 @@ void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThr uint32_t GPUReconstructionProcessing::SetAndGetNActiveThreadsOuterLoop(bool condition, uint32_t max) { - if (condition && mProcessingSettings.inKernelParallel != 1) { - mNActiveThreadsOuterLoop = mProcessingSettings.inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; + if (condition && GetProcessingSettings().inKernelParallel != 1) { + mNActiveThreadsOuterLoop = GetProcessingSettings().inKernelParallel == 2 ? std::min(max, mMaxHostThreads) : mMaxHostThreads; } else { mNActiveThreadsOuterLoop = 1; } - if (mProcessingSettings.debugLevel >= 5) { + if (GetProcessingSettings().debugLevel >= 5) { printf("Running %d threads in outer loop\n", mNActiveThreadsOuterLoop); } return mNActiveThreadsOuterLoop; @@ -132,9 +135,9 @@ uint32_t GPUReconstructionProcessing::getNextTimerId() return id.fetch_add(1); } -std::unique_ptr GPUReconstructionProcessing::GetThreadContext() +std::unique_ptr GPUReconstructionProcessing::GetThreadContext() { - return std::make_unique(); + return std::make_unique(); } gpu_reconstruction_kernels::threadContext::threadContext() = default; diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h index 4ce8bc1b42743..9e611e57148c6 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessing.h +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessing.h @@ -16,7 +16,6 @@ #define GPURECONSTRUCTIONPROCESSING_H #include "GPUReconstruction.h" -#include "GPUReconstructionKernelIncludes.h" #include "utils/timer.h" #include @@ -32,7 +31,7 @@ namespace o2::gpu struct GPUDefParameters; -namespace gpu_reconstruction_kernels +namespace gpu_reconstruction_kernels // TODO: Get rid of this namespace { struct deviceEvent { constexpr deviceEvent() = default; @@ -72,6 +71,86 @@ class GPUReconstructionProcessing : public GPUReconstruction public: ~GPUReconstructionProcessing() override; + using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + using threadContext = gpu_reconstruction_kernels::threadContext; + + struct RecoStepTimerMeta { + HighResTimer timerToGPU; + HighResTimer timerToHost; + HighResTimer timerTotal; + double timerCPU = 0.; + size_t bytesToGPU = 0; + size_t bytesToHost = 0; + uint32_t countToGPU = 0; + uint32_t countToHost = 0; + }; + + template + struct kernelInterfaceArguments { + using t = T; + static constexpr int32_t i = I; + }; + + struct krnlExec { + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {} + constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {} + uint32_t nBlocks; + uint32_t nThreads; + int32_t stream; + GPUReconstruction::krnlDeviceType device; + GPUDataTypes::RecoStep step; + }; + struct krnlRunRange { + constexpr krnlRunRange() = default; + constexpr krnlRunRange(uint32_t v) : index(v) {} + uint32_t index = 0; + }; + struct krnlEvent { + constexpr krnlEvent(deviceEvent* e = nullptr, deviceEvent* el = nullptr, int32_t n = 1) : ev(e), evList(el), nEvents(n) {} + deviceEvent* ev; + deviceEvent* evList; + int32_t nEvents; + }; + + struct krnlProperties { + krnlProperties(int32_t t = 0, int32_t b = 1, int32_t b2 = 0) : nThreads(t), minBlocks(b), forceBlocks(b2) {} + uint32_t nThreads; + uint32_t minBlocks; + uint32_t forceBlocks; + uint32_t total() { return forceBlocks ? forceBlocks : (nThreads * minBlocks); } + }; + + struct krnlSetup { + krnlSetup(const krnlExec& xx, const krnlRunRange& yy = {0}, const krnlEvent& zz = {nullptr, nullptr, 0}) : x(xx), y(yy), z(zz) {} + krnlExec x; + krnlRunRange y; + krnlEvent z; + }; + + struct krnlSetupTime : public krnlSetup { + double& t; + }; + + template + struct krnlSetupArgs : public kernelInterfaceArguments { + const krnlSetupTime s; + std::tuple sizeof(void*)), const Args&, const Args>::type...> v; + }; + + template + class KernelInterface : public S + { + public: + template + KernelInterface(const Args&... args) : S(args...) + { + } + + protected: + virtual void runKernelVirtual(const int num, const void* args); + }; + // Threading int32_t getNKernelHostThreads(bool splitCores); uint32_t getNActiveThreadsOuterLoop() const { return mNActiveThreadsOuterLoop; } @@ -94,23 +173,12 @@ class GPUReconstructionProcessing : public GPUReconstruction template void AddGPUEvents(T*& events); - virtual std::unique_ptr GetThreadContext() override; + virtual std::unique_ptr GetThreadContext() override; - struct RecoStepTimerMeta { - HighResTimer timerToGPU; - HighResTimer timerToHost; - HighResTimer timerTotal; - double timerCPU = 0.; - size_t bytesToGPU = 0; - size_t bytesToHost = 0; - uint32_t countToGPU = 0; - uint32_t countToHost = 0; - }; const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); } protected: GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg); - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; static const std::vector mKernelNames; @@ -181,7 +249,7 @@ HighResTimer& GPUReconstructionProcessing::getTimer(const char* name, int32_t nu static int32_t id = getNextTimerId(); timerMeta* timer = getTimerById(id); if (timer == nullptr) { - int32_t max = std::max({mMaxHostThreads, mProcessingSettings.nStreams}); + int32_t max = std::max({mMaxHostThreads, GPUCA_MAX_STREAMS}); timer = insertTimer(id, name, J, max, 1, RecoStep::NoRecoStep); } if (num == -1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc new file mode 100644 index 0000000000000..49d02515372b8 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc @@ -0,0 +1,41 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionProcessingKernels.h +/// \author David Rohr + +#ifndef GPURECONSTRUCTIONPROCESSINGKERNELS_H +#define GPURECONSTRUCTIONPROCESSINGKERNELS_H + +#include "GPUReconstructionProcessing.h" +#include "GPUReconstructionKernelIncludes.h" + +namespace o2::gpu +{ + +template +void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args) +{ + switch (num) { // clang-format off +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ + case x_num: { \ + const auto& args2 = *(const krnlSetupArgs*)args; \ + ((T*)this)->template runKernelBackend(args2); \ + break; \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + } // clang-format on +} + +} // namespace o2::gpu + +#endif // GPURECONSTRUCTIONPROCESSINGKERNELS_H diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index 4693a1eff24f2..b25b93e957b15 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -14,6 +14,8 @@ #include "GPUReconstructionTimeframe.h" #include "GPUReconstruction.h" +#include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "display/GPUDisplayInterface.h" #include "GPUQA.h" #include "AliHLTTPCClusterMCData.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index fe2906caace80..970b331ea99fb 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -24,6 +24,7 @@ #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" #include "GPUDefParametersLoad.inc" +#include "GPUReconstructionProcessingKernels.inc" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" @@ -53,16 +54,23 @@ __global__ void dummyInitKernel(void*) {} #include "GPUReconstructionIncludesITS.h" -GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) +GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg, sizeof(GPUReconstructionDeviceBase)) { if (mMaster == nullptr) { mInternals = new GPUReconstructionCUDAInternals; *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } - mDeviceBackendSettings.deviceType = DeviceType::CUDA; + mDeviceBackendSettings->deviceType = DeviceType::CUDA; +#ifndef __HIPCC__ // CUDA + mRtcSrcExtension = ".cu"; + mRtcBinExtension = ".fatbin"; +#else // HIP + mRtcSrcExtension = ".hip"; + mRtcBinExtension = ".o"; +#endif } -GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() +GPUReconstructionCUDA::~GPUReconstructionCUDA() { Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA if (mMaster == nullptr) { @@ -71,23 +79,11 @@ GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend() } static_assert(sizeof(cudaError_t) <= sizeof(int64_t) && cudaSuccess == 0); -int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionCUDA::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return internal::GPUReconstructionCUDAChkErr(error, file, line); } -GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) -{ -#ifndef __HIPCC__ // CUDA - mRtcSrcExtension = ".cu"; - mRtcBinExtension = ".fatbin"; -#else // HIP - mRtcSrcExtension = ".hip"; - mRtcBinExtension = ".o"; -#endif -} -GPUReconstructionCUDA::~GPUReconstructionCUDA() = default; - GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); } void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame) @@ -109,7 +105,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() constexpr int32_t reqVerMaj = 2; constexpr int32_t reqVerMin = 0; #endif - if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) { + if (GetProcessingSettings().rtc.enable && GetProcessingSettings().rtctech.runTest == 2) { mWarpSize = GPUCA_WARP_SIZE; genAndLoadRTC(); exit(0); @@ -123,14 +119,14 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUError("Error getting CUDA Device Count"); return (1); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Available CUDA devices:"); } std::vector devicesOK(count, false); std::vector devMemory(count, 0); bool contextCreated = false; for (int32_t i = 0; i < count; i++) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; @@ -139,14 +135,14 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() #else // HIP if (GPUChkErrI(hipSetDevice(i))) { #endif - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } contextCreated = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } GPUChkErr(cudaDeviceReset()); @@ -156,13 +152,13 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErr(cudaDeviceReset()); contextCreated = false; } - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } if (GPUChkErrI(cudaGetDeviceProperties(&deviceProp, i))) { continue; } - if (mProcessingSettings.debugLevel >= 4) { + if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained device properties for device %d", i); } int32_t deviceOK = true; @@ -179,7 +175,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } deviceSpeed = (double)deviceProp.multiProcessorCount * (double)deviceProp.clockRate * (double)deviceProp.warpSize * (double)free * (double)deviceProp.major * (double)deviceProp.major; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUImportant("Device %s%2d: %s (Rev: %d.%d - Mem Avail %lu / %lu)%s %s", deviceOK ? " " : "[", i, deviceProp.name, deviceProp.major, deviceProp.minor, free, (size_t)deviceProp.totalGlobalMem, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure); } if (!deviceOK) { @@ -191,7 +187,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = i; bestDeviceSpeed = deviceSpeed; } else { - if (mProcessingSettings.debugLevel >= 2 && mProcessingSettings.deviceNum < 0) { + if (GetProcessingSettings().debugLevel >= 2 && GetProcessingSettings().deviceNum < 0) { GPUInfo("Skipping: Speed %f < %f\n", deviceSpeed, bestDeviceSpeed); } } @@ -204,15 +200,15 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUImportant("Requiring Revision %d.%d, Mem: %lu", reqVerMaj, reqVerMin, std::max(mDeviceMemorySize, REQUIRE_MIN_MEMORY)); #endif noDevice = true; - } else if (mProcessingSettings.deviceNum > -1) { - if (mProcessingSettings.deviceNum >= (signed)count) { - GPUError("Requested device ID %d does not exist", mProcessingSettings.deviceNum); + } else if (GetProcessingSettings().deviceNum > -1) { + if (GetProcessingSettings().deviceNum >= (signed)count) { + GPUError("Requested device ID %d does not exist", GetProcessingSettings().deviceNum); noDevice = true; - } else if (!devicesOK[mProcessingSettings.deviceNum]) { - GPUError("Unsupported device requested (%d)", mProcessingSettings.deviceNum); + } else if (!devicesOK[GetProcessingSettings().deviceNum]) { + GPUError("Unsupported device requested (%d)", GetProcessingSettings().deviceNum); noDevice = true; } else { - bestDevice = mProcessingSettings.deviceNum; + bestDevice = GetProcessingSettings().deviceNum; } } if (noDevice) { @@ -225,7 +221,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Using CUDA Device %s with Properties:", deviceProp.name); GPUInfo("\ttotalGlobalMem = %ld", (uint64_t)deviceProp.totalGlobalMem); GPUInfo("\tsharedMemPerBlock = %ld", (uint64_t)deviceProp.sharedMemPerBlock); @@ -244,7 +240,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment); GPUInfo(" "); } - if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) { + if (deviceProp.warpSize != GPUCA_WARP_SIZE && !GetProcessingSettings().rtc.enable) { throw std::runtime_error("Invalid warp size on GPU"); } mWarpSize = deviceProp.warpSize; @@ -280,7 +276,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, mProcessingSettings.deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { + if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitMallocHeapSize, GetProcessingSettings().deterministicGPUReconstruction ? std::max(1024 * 1024 * 1024, GPUCA_GPU_HEAP_SIZE) : GPUCA_GPU_HEAP_SIZE))) { GPUError("Error setting CUDA stack size"); GPUChkErrI(cudaDeviceReset()); return (1); @@ -302,7 +298,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() mDeviceMemorySize = mDeviceMemorySize * 2 / 3; // Leave 1/3 of GPU memory for event display } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Allocating memory on GPU"); } if (mDeviceMemorySize > deviceProp.totalGlobalMem || GPUChkErrI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) { @@ -312,7 +308,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Allocating memory on Host"); } if (GPUChkErrI(cudaMallocHost(&mHostMemoryBase, mHostMemorySize))) { @@ -320,7 +316,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() GPUChkErrI(cudaDeviceReset()); return (1); } - if (mProcessingSettings.debugLevel >= 1) { + if (GetProcessingSettings().debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); if (GPUChkErrI(cudaMemset(mDeviceMemoryBase, 0xDD, mDeviceMemorySize))) { @@ -344,7 +340,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() hipLaunchKernelGGL(HIP_KERNEL_NAME(dummyInitKernel), dim3(mBlockCount), dim3(256), 0, 0, mDeviceMemoryBase); #endif - if (mProcessingSettings.rtc.enable) { + if (GetProcessingSettings().rtc.enable) { genAndLoadRTC(); } #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 @@ -419,17 +415,17 @@ void GPUReconstructionCUDA::genAndLoadRTC() throw std::runtime_error("Runtime compilation failed"); } for (uint32_t i = 0; i < nCompile; i++) { - if (mProcessingSettings.rtctech.runTest != 2) { + if (GetProcessingSettings().rtctech.runTest != 2) { mInternals->kernelModules.emplace_back(std::make_unique()); GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); } - if (mProcessingSettings.rtctech.runTest == 2) { + if (GetProcessingSettings().rtctech.runTest == 2) { return; } - loadKernelModules(mProcessingSettings.rtc.compilePerKernel); + loadKernelModules(GetProcessingSettings().rtc.compilePerKernel); } int32_t GPUReconstructionCUDA::ExitDevice_Runtime() @@ -472,7 +468,7 @@ int32_t GPUReconstructionCUDA::ExitDevice_Runtime() size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { stream = -1; } if (stream == -1) { @@ -490,7 +486,7 @@ size_t GPUReconstructionCUDA::GPUMemCpy(void* dst, const void* src, size_t size, if (ev) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream == -1 ? 0 : stream])); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); } return size; @@ -512,7 +508,7 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s if (ev && stream != -1) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; @@ -521,7 +517,7 @@ size_t GPUReconstructionCUDA::WriteToConstantMemory(size_t offset, const void* s void GPUReconstructionCUDA::ReleaseEvent(deviceEvent ev) {} void GPUReconstructionCUDA::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(cudaEventRecord(ev->get(), mInternals->Streams[stream])); } -std::unique_ptr GPUReconstructionCUDA::GetThreadContext() +std::unique_ptr GPUReconstructionCUDA::GetThreadContext() { GPUChkErr(cudaSetDevice(mDeviceId)); return GPUReconstructionProcessing::GetThreadContext(); @@ -565,14 +561,14 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool GPUError("CUDA Error %s while running (%s) (Stream %d)", cudaGetErrorString(cuErr), state, stream); return (1); } - if (!force && mProcessingSettings.debugLevel <= 0) { + if (!force && GetProcessingSettings().debugLevel <= 0) { return (0); } if (GPUChkErrI(stream == -1 ? cudaDeviceSynchronize() : cudaStreamSynchronize(mInternals->Streams[stream]))) { GPUError("CUDA Error while synchronizing (%s) (Stream %d)", state, stream); return (1); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("GPU Sync Done"); } return (0); @@ -580,7 +576,7 @@ int32_t GPUReconstructionCUDA::GPUDebug(const char* state, int32_t stream, bool int32_t GPUReconstructionCUDA::registerMemoryForGPU_internal(const void* ptr, size_t size) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Registering %zu bytes of memory for GPU", size); } return GPUChkErrI(cudaHostRegister((void*)ptr, size, cudaHostRegisterDefault)); @@ -591,7 +587,7 @@ int32_t GPUReconstructionCUDA::unregisterMemoryForGPU_internal(const void* ptr) return GPUChkErrI(cudaHostUnregister((void*)ptr)); } -void GPUReconstructionCUDABackend::PrintKernelOccupancies() +void GPUReconstructionCUDA::PrintKernelOccupancies() { int32_t maxBlocks = 0, threads = 0, suggestedBlocks = 0, nRegs = 0, sMem = 0; GPUChkErr(cudaSetDevice(mDeviceId)); @@ -612,7 +608,7 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) GPUFatal("kernel numbers out of sync"); \ } \ mInternals->kernelFunctions.emplace_back(new CUfunction); \ - if (mProcessingSettings.debugLevel >= 3) { \ + if (GetProcessingSettings().debugLevel >= 3) { \ GPUInfo("Loading kernel %s (j = %u)", GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), j); \ } \ GPUChkErr(cuModuleGetFunction(mInternals->kernelFunctions.back().get(), *mInternals->kernelModules[perKernel ? j : 0], GPUCA_M_STR(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))))); \ @@ -680,8 +676,3 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options #endif // ORT_ROCM_BUILD } #endif // __HIPCC__ - -namespace o2::gpu -{ -template class GPUReconstructionKernels; -} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 2fc4d14bba491..ed75100dfe351 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -16,6 +16,7 @@ #define GPURECONSTRUCTIONCUDA_H #include "GPUReconstructionDeviceBase.h" +#include "GPUCommonAlgorithm.h" #include #include @@ -34,38 +35,31 @@ namespace o2::gpu { struct GPUReconstructionCUDAInternals; -class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase +class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterface { public: - ~GPUReconstructionCUDABackend() override; - - protected: - GPUReconstructionCUDABackend(const GPUSettingsDeviceBackend& cfg); + GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg); + ~GPUReconstructionCUDA() override; void PrintKernelOccupancies() override; virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template void runKernelBackend(const krnlSetupArgs& args); - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); + + protected: GPUReconstructionCUDAInternals* mInternals; -}; -class GPUReconstructionCUDA : public GPUReconstructionKernels -{ - public: - ~GPUReconstructionCUDA() override; - GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg); + template + void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - protected: int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; - std::unique_ptr GetThreadContext() override; + std::unique_ptr GetThreadContext() override; void SynchronizeGPU() override; int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false) override; void SynchronizeStream(int32_t stream) override; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index abcd47ca01c90..5706f32e73e96 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -37,8 +37,8 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_no_fast_math); int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + - std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + - GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr); + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; } @@ -53,12 +53,12 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) kernelsall += kernels[i] + "\n"; } - std::string baseCommand = (mProcessingSettings.rtctech.prependCommand != "" ? (mProcessingSettings.rtctech.prependCommand + " ") : ""); + std::string baseCommand = (GetProcessingSettings().rtctech.prependCommand != "" ? (GetProcessingSettings().rtctech.prependCommand + " ") : ""); baseCommand += (getenv("O2_GPU_RTC_OVERRIDE_CMD") ? std::string(getenv("O2_GPU_RTC_OVERRIDE_CMD")) : std::string(_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len)); - baseCommand += std::string(" ") + (mProcessingSettings.rtctech.overrideArchitecture != "" ? mProcessingSettings.rtctech.overrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); + baseCommand += std::string(" ") + (GetProcessingSettings().rtctech.overrideArchitecture != "" ? GetProcessingSettings().rtctech.overrideArchitecture : std::string(_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len)); - if (mProcessingSettings.rtctech.loadLaunchBoundsFromFile.size()) { - FILE* fp = fopen(mProcessingSettings.rtctech.loadLaunchBoundsFromFile.c_str(), "rb"); + if (GetProcessingSettings().rtctech.loadLaunchBoundsFromFile.size()) { + FILE* fp = fopen(GetProcessingSettings().rtctech.loadLaunchBoundsFromFile.c_str(), "rb"); if (fp == nullptr) { throw std::runtime_error("Cannot open launch bounds parameter module file"); } @@ -75,12 +75,12 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) + "#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n"; - if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().rtctech.printLaunchBounds || GetProcessingSettings().debugLevel >= 3) { GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str()); } char shasource[21], shaparam[21], shacmd[21], shakernels[21], shabounds[21]; - if (mProcessingSettings.rtc.cacheOutput) { + if (GetProcessingSettings().rtc.cacheOutput) { o2::framework::internal::SHA1(shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len); o2::framework::internal::SHA1(shaparam, rtcparam.c_str(), rtcparam.size()); o2::framework::internal::SHA1(shacmd, baseCommand.c_str(), baseCommand.size()); @@ -88,16 +88,16 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) o2::framework::internal::SHA1(shabounds, launchBounds.c_str(), launchBounds.size()); } - nCompile = mProcessingSettings.rtc.compilePerKernel ? kernels.size() : 1; + nCompile = GetProcessingSettings().rtc.compilePerKernel ? kernels.size() : 1; bool cacheLoaded = false; int32_t fd = 0; - if (mProcessingSettings.rtc.cacheOutput) { - if (mProcessingSettings.rtctech.cacheFolder != ".") { - std::filesystem::create_directories(mProcessingSettings.rtctech.cacheFolder); + if (GetProcessingSettings().rtc.cacheOutput) { + if (GetProcessingSettings().rtctech.cacheFolder != ".") { + std::filesystem::create_directories(GetProcessingSettings().rtctech.cacheFolder); } - if (mProcessingSettings.rtctech.cacheMutex) { + if (GetProcessingSettings().rtctech.cacheMutex) { mode_t mask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; - fd = open((mProcessingSettings.rtctech.cacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); + fd = open((GetProcessingSettings().rtctech.cacheFolder + "/cache.lock").c_str(), O_RDWR | O_CREAT | O_CLOEXEC, mask); if (fd == -1) { throw std::runtime_error("Error opening rtc cache mutex lock file"); } @@ -107,7 +107,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } } - FILE* fp = fopen((mProcessingSettings.rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "rb"); + FILE* fp = fopen((GetProcessingSettings().rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "rb"); char sharead[20]; if (fp) { size_t len; @@ -116,7 +116,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (fread(sharead, 1, 20, fp) != 20) { throw std::runtime_error("Cache file corrupt"); } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { char shaprint1[41], shaprint2[41]; for (uint32_t i = 0; i < 20; i++) { sprintf(shaprint1 + 2 * i, "%02X ", shacmp[i]); @@ -124,7 +124,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } GPUInfo("SHA for %s: expected %s, read %s", name, shaprint1, shaprint2); } - if (!mProcessingSettings.rtctech.ignoreCacheValid && memcmp(sharead, shacmp, 20)) { + if (!GetProcessingSettings().rtctech.ignoreCacheValid && memcmp(sharead, shacmp, 20)) { GPUInfo("Cache file content outdated (%s)", name); return 1; } @@ -142,7 +142,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) if (fread(&cachedSettings, sizeof(cachedSettings), 1, fp) != 1) { throw std::runtime_error("Cache file corrupt"); } - if (!mProcessingSettings.rtctech.ignoreCacheValid && !(cachedSettings == mProcessingSettings.rtc)) { + if (!GetProcessingSettings().rtctech.ignoreCacheValid && !(cachedSettings == GetProcessingSettings().rtc)) { GPUInfo("Cache file content outdated (rtc parameters)"); break; } @@ -172,13 +172,13 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } } if (!cacheLoaded) { - if (mProcessingSettings.debugLevel >= 0) { + if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("Starting CUDA RTC Compilation"); } HighResTimer rtcTimer; rtcTimer.ResetStart(); tbb::parallel_for(0, nCompile, [&](auto i) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Compiling %s\n", (filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); } FILE* fp = fopen((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str(), "w+b"); @@ -187,10 +187,10 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) } std::string kernel = "extern \"C\" {"; - kernel += mProcessingSettings.rtc.compilePerKernel ? kernels[i] : kernelsall; + kernel += GetProcessingSettings().rtc.compilePerKernel ? kernels[i] : kernelsall; kernel += "}"; - bool deterministic = mProcessingSettings.rtc.deterministic || (mProcessingSettings.rtc.compilePerKernel && o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end()); + bool deterministic = GetProcessingSettings().rtc.deterministic || (GetProcessingSettings().rtc.compilePerKernel && o2::gpu::internal::noFastMathKernels.find(GetKernelName(i)) != o2::gpu::internal::noFastMathKernels.end()); const std::string deterministicStr = std::string(deterministic ? "#define GPUCA_DETERMINISTIC_CODE(det, indet) det\n" : "#define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n"); if (fwrite(deterministicStr.c_str(), 1, deterministicStr.size(), fp) != deterministicStr.size() || @@ -206,26 +206,26 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) command += std::string(" ") + std::string(_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len); } command += " -c " + filename + "_" + std::to_string(i) + mRtcSrcExtension + " -o " + filename + "_" + std::to_string(i) + mRtcBinExtension; - if (mProcessingSettings.debugLevel < 0) { + if (GetProcessingSettings().debugLevel < 0) { command += " &> /dev/null"; - } else if (mProcessingSettings.debugLevel < 2) { + } else if (GetProcessingSettings().debugLevel < 2) { command += " > /dev/null"; } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Running command %s\n", command.c_str()); } if (system(command.c_str())) { - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { printf("Source code file: %s", filename.c_str()); } throw std::runtime_error("Error during CUDA compilation"); } // clang-format off }, tbb::simple_partitioner()); // clang-format on - if (mProcessingSettings.debugLevel >= 0) { + if (GetProcessingSettings().debugLevel >= 0) { GPUInfo("RTC Compilation finished (%f seconds)", rtcTimer.GetCurrentElapsedTime()); } - if (mProcessingSettings.rtc.cacheOutput) { - FILE* fp = fopen((mProcessingSettings.rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); + if (GetProcessingSettings().rtc.cacheOutput) { + FILE* fp = fopen((GetProcessingSettings().rtctech.cacheFolder + "/rtc.cuda.cache").c_str(), "w+b"); if (fp == nullptr) { throw std::runtime_error("Cannot open cache file for writing"); } @@ -236,7 +236,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) fwrite(shacmd, 1, 20, fp) != 20 || fwrite(shakernels, 1, 20, fp) != 20 || fwrite(shabounds, 1, 20, fp) != 20 || - fwrite(&mProcessingSettings.rtc, sizeof(mProcessingSettings.rtc), 1, fp) != 1) { + fwrite(&GetProcessingSettings().rtc, sizeof(GetProcessingSettings().rtc), 1, fp) != 1) { throw std::runtime_error("Error writing cache file"); } @@ -263,7 +263,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) fclose(fp); } } - if (mProcessingSettings.rtc.cacheOutput && mProcessingSettings.rtctech.cacheMutex) { + if (GetProcessingSettings().rtc.cacheOutput && GetProcessingSettings().rtctech.cacheMutex) { if (lockf(fd, F_ULOCK, 0)) { throw std::runtime_error("Error unlocking RTC cache mutex file"); } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index f3fc21243ef0e..0813c9d22ea09 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -44,7 +44,7 @@ struct GPUReconstructionCUDAInternals { class GPUDebugTiming { public: - GPUDebugTiming(bool d, gpu_reconstruction_kernels::deviceEvent* t, cudaStream_t* s, const gpu_reconstruction_kernels::krnlSetupTime& x, GPUReconstructionCUDABackend* r) : mDeviceTimers(t), mStreams(s), mXYZ(x), mRec(r), mDo(d) + GPUDebugTiming(bool d, GPUReconstructionProcessing::deviceEvent* t, cudaStream_t* s, const GPUReconstructionProcessing::krnlSetupTime& x, GPUReconstructionCUDA* r) : mDeviceTimers(t), mStreams(s), mXYZ(x), mRec(r), mDo(d) { if (mDo) { if (mDeviceTimers) { @@ -71,10 +71,10 @@ class GPUDebugTiming } private: - gpu_reconstruction_kernels::deviceEvent* mDeviceTimers; + GPUReconstructionProcessing::deviceEvent* mDeviceTimers; cudaStream_t* mStreams; - const gpu_reconstruction_kernels::krnlSetupTime& mXYZ; - GPUReconstructionCUDABackend* mRec; + const GPUReconstructionProcessing::krnlSetupTime& mXYZ; + GPUReconstructionCUDA* mRec; HighResTimer mTimer; bool mDo; }; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index d53f7cbd81ca9..758ab1b0e36c3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -27,16 +27,16 @@ __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUC #endif template <> -inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } template -inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 - if (!mProcessingSettings.rtc.enable) { + if (!GetProcessingSettings().rtc.enable) { backendInternal::runKernelBackendMacro(_xyz, this, args...); } else #endif @@ -56,7 +56,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet } template -void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args) { auto& x = args.s.x; auto& z = args.s.z; @@ -66,7 +66,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); + GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); } GPUChkErr(cudaGetLastError()); @@ -79,7 +79,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); #else // ---------- COMPILE_MODE = onefile | rdc ---------- #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc @@ -87,7 +87,7 @@ void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs(const krnlSetupArgs& args); + template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); #ifndef __HIPCC__ // CUDA version #define GPUCA_KRNL_CALL(x_class, ...) \ diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index e276f83413bbc..28c809dd4a09a 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUReconstructionProcessingKernels.inc" #include "GPUDefParametersLoad.inc" #include @@ -33,16 +34,16 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionOCLCode_spirv); GPUReconstruction* GPUReconstruction_Create_OCL(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL(cfg); } -GPUReconstructionOCLBackend::GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof(GPUReconstructionDeviceBase)) +GPUReconstructionOCL::GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg, sizeof(GPUReconstructionDeviceBase)) { if (mMaster == nullptr) { mInternals = new GPUReconstructionOCLInternals; *mParDevice = o2::gpu::internal::GPUDefParametersLoad(); } - mDeviceBackendSettings.deviceType = DeviceType::OCL; + mDeviceBackendSettings->deviceType = DeviceType::OCL; } -GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() +GPUReconstructionOCL::~GPUReconstructionOCL() { Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit if (mMaster == nullptr) { @@ -51,7 +52,7 @@ GPUReconstructionOCLBackend::~GPUReconstructionOCLBackend() } static_assert(sizeof(cl_int) <= sizeof(int64_t) && CL_SUCCESS == 0); -int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const +int32_t GPUReconstructionOCL::GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { // Check for OPENCL Error and in the case of an error display the corresponding error string if (error != CL_SUCCESS) { @@ -60,7 +61,7 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons return error != CL_SUCCESS; } -int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() +int32_t GPUReconstructionOCL::InitDevice_Runtime() { if (mMaster == nullptr) { cl_int ocl_error; @@ -71,7 +72,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() if (num_platforms == 0) { GPUErrorReturn("No OpenCL Platform found"); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("%d OpenCL Platforms found", num_platforms); } @@ -118,17 +119,17 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() cl_uint deviceCount, bestDevice = (cl_uint)-1, bestPlatform = (cl_uint)-1; for (uint32_t iPlatform = 0; iPlatform < num_platforms; iPlatform++) { - if (mProcessingSettings.oclPlatformNum >= 0) { - if (mProcessingSettings.oclPlatformNum >= (int32_t)num_platforms) { + if (GetProcessingSettings().oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= (int32_t)num_platforms) { GPUErrorReturn("Invalid platform specified"); } - iPlatform = mProcessingSettings.oclPlatformNum; + iPlatform = GetProcessingSettings().oclPlatformNum; } std::string platformUsageInfo; bool platformCompatible = false; queryPlatform(platforms[iPlatform]); if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, 0, nullptr, &deviceCount) != CL_SUCCESS) { - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { GPUErrorReturn("No device in requested platform or error obtaining device count"); } platformUsageInfo += " - no devices"; @@ -139,32 +140,32 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } } - if (mProcessingSettings.oclPlatformNum >= 0 || mProcessingSettings.debugLevel >= 2) { - GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", mProcessingSettings.oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", mProcessingSettings.debugLevel >= 2 ? platformUsageInfo.c_str() : ""); + if (GetProcessingSettings().oclPlatformNum >= 0 || GetProcessingSettings().debugLevel >= 2) { + GPUInfo("%s Platform %d: (%s %s) %s %s (Compatible: %s)%s", GetProcessingSettings().oclPlatformNum >= 0 ? "Enforced" : "Available", iPlatform, platform_profile.c_str(), platform_version.c_str(), platform_vendor.c_str(), platform_name.c_str(), platformCompatible ? "yes" : "no", GetProcessingSettings().debugLevel >= 2 ? platformUsageInfo.c_str() : ""); } - if (platformCompatible || mProcessingSettings.oclPlatformNum >= 0 || (mProcessingSettings.oclPlatformNum == -2 && deviceCount)) { + if (platformCompatible || GetProcessingSettings().oclPlatformNum >= 0 || (GetProcessingSettings().oclPlatformNum == -2 && deviceCount)) { if (deviceCount > devices.size()) { devices.resize(deviceCount); } if (clGetDeviceIDs(platforms[iPlatform], CL_DEVICE_TYPE_ALL, deviceCount, devices.data(), nullptr) != CL_SUCCESS) { - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { GPUErrorReturn("Error getting OpenCL devices"); } continue; } for (uint32_t i = 0; i < deviceCount; i++) { - if (mProcessingSettings.deviceNum >= 0) { - if (mProcessingSettings.deviceNum >= (signed)deviceCount) { - GPUErrorReturn("Requested device ID %d does not exist", mProcessingSettings.deviceNum); + if (GetProcessingSettings().deviceNum >= 0) { + if (GetProcessingSettings().deviceNum >= (signed)deviceCount) { + GPUErrorReturn("Requested device ID %d does not exist", GetProcessingSettings().deviceNum); } - i = mProcessingSettings.deviceNum; + i = GetProcessingSettings().deviceNum; } bool deviceOK = true; queryDevice(devices[i]); std::string deviceFailure; - if (mProcessingSettings.gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { + if (GetProcessingSettings().gpuDeviceOnly && ((device_type & CL_DEVICE_TYPE_CPU) || !(device_type & CL_DEVICE_TYPE_GPU))) { deviceOK = false; deviceFailure += " - No GPU device"; } @@ -193,12 +194,12 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } double bestDeviceSpeed = -1, deviceSpeed = (double)device_freq * (double)device_shaders; - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo(" Device %s%2d: %s %s (Frequency %d, Shaders %d, %d bit) (Speed Value: %ld)%s %s", deviceOK ? " " : "[", i, device_vendor.c_str(), device_name.c_str(), (int32_t)device_freq, (int32_t)device_shaders, (int32_t)device_nbits, (int64_t)deviceSpeed, deviceOK ? " " : " ]", deviceOK ? "" : deviceFailure.c_str()); } if (!deviceOK) { - if (mProcessingSettings.deviceNum >= 0) { - GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, mProcessingSettings.deviceNum); + if (GetProcessingSettings().deviceNum >= 0) { + GPUInfo("Unsupported device requested on platform %d: (%d)", iPlatform, GetProcessingSettings().deviceNum); break; } continue; @@ -209,12 +210,12 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() bestDeviceSpeed = deviceSpeed; mOclVersion = platform_version_f; } - if (mProcessingSettings.deviceNum >= 0) { + if (GetProcessingSettings().deviceNum >= 0) { break; } } } - if (mProcessingSettings.oclPlatformNum >= 0) { + if (GetProcessingSettings().oclPlatformNum >= 0) { break; } } @@ -238,7 +239,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() deviceVersion = query(clGetDeviceInfo, mInternals->device, CL_DEVICE_VERSION); int versionMajor, versionMinor; sscanf(deviceVersion.c_str(), "OpenCL %d.%d", &versionMajor, &versionMinor); - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Using OpenCL platform %d / device %d: %s %s with properties:", bestPlatform, bestDevice, device_vendor.c_str(), device_name.c_str()); GPUInfo("\tVersion = %s", deviceVersion); GPUInfo("\tFrequency = %d", (int32_t)device_freq); @@ -271,7 +272,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() return 1; } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("OpenCL program and kernels loaded successfully"); } @@ -289,21 +290,21 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() } if (device_type & CL_DEVICE_TYPE_CPU) { - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel >= 2) { GPUInfo("Disabling device timers for CPU device"); } - mProcessingSettings.deviceTimers = 0; + mProcessingSettings->deviceTimers = 0; } for (int32_t i = 0; i < mNStreams; i++) { #ifdef CL_VERSION_2_0 cl_queue_properties prop = 0; - if (versionMajor >= 2 && IsGPU() && mProcessingSettings.deviceTimers) { + if (versionMajor >= 2 && IsGPU() && GetProcessingSettings().deviceTimers) { prop |= CL_QUEUE_PROFILING_ENABLE; } mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error); - if (mProcessingSettings.deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) { + if (GetProcessingSettings().deviceTimers && ocl_error == CL_INVALID_QUEUE_PROPERTIES) { GPUError("GPU device timers not supported by OpenCL platform, disabling"); - mProcessingSettings.deviceTimers = 0; + mProcessingSettings->deviceTimers = 0; prop = 0; mInternals->command_queue[i] = clCreateCommandQueueWithProperties(mInternals->context, mInternals->device, &prop, &ocl_error); } @@ -351,7 +352,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() GPUErrorReturn("Error obtaining device memory ptr"); } - if (mProcessingSettings.debugLevel >= 2) { + if (GetProcessingSettings().debugLevel >= 2) { GPUInfo("Mapping hostmemory"); } mHostMemoryBase = clEnqueueMapBuffer(mInternals->command_queue[0], mInternals->mem_host, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, mHostMemorySize, 0, nullptr, nullptr, &ocl_error); @@ -362,7 +363,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() mDeviceMemoryBase = ((void**)mHostMemoryBase)[0]; mDeviceConstantMem = (GPUConstantMem*)((void**)mHostMemoryBase)[1]; - if (mProcessingSettings.debugLevel >= 1) { + if (GetProcessingSettings().debugLevel >= 1) { GPUInfo("Memory ptrs: GPU (%ld bytes): %p - Host (%ld bytes): %p", (int64_t)mDeviceMemorySize, mDeviceMemoryBase, (int64_t)mHostMemorySize, mHostMemoryBase); memset(mHostMemoryBase, 0xDD, mHostMemorySize); } @@ -386,7 +387,7 @@ int32_t GPUReconstructionOCLBackend::InitDevice_Runtime() return (0); } -int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() +int32_t GPUReconstructionOCL::ExitDevice_Runtime() { // Uninitialize OPENCL SynchronizeGPU(); @@ -418,12 +419,12 @@ int32_t GPUReconstructionOCLBackend::ExitDevice_Runtime() return (0); } -size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) +size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents) { if (evList == nullptr) { nEvents = 0; } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { stream = -1; } if (stream == -1) { @@ -440,33 +441,33 @@ size_t GPUReconstructionOCLBackend::GPUMemCpy(void* dst, const void* src, size_t } else { GPUChkErr(clEnqueueReadBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_gpu, stream == -1, (char*)src - (char*)mDeviceMemoryBase, size, dst, nEvents, evList->getEventList(), ev->getEventList())); } - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug(("GPUMemCpy " + std::to_string(toGPU)).c_str(), stream, true); } return size; } -size_t GPUReconstructionOCLBackend::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) +size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream, deviceEvent* ev) { if (stream == -1) { SynchronizeGPU(); } GPUChkErr(clEnqueueWriteBuffer(mInternals->command_queue[stream == -1 ? 0 : stream], mInternals->mem_constant, stream == -1, offset, size, src, 0, nullptr, ev->getEventList())); - if (mProcessingSettings.serializeGPU & 2) { + if (GetProcessingSettings().serializeGPU & 2) { GPUDebug("WriteToConstantMemory", stream, true); } return size; } -void GPUReconstructionOCLBackend::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } +void GPUReconstructionOCL::ReleaseEvent(deviceEvent ev) { GPUChkErr(clReleaseEvent(ev.get())); } -void GPUReconstructionOCLBackend::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } +void GPUReconstructionOCL::RecordMarker(deviceEvent* ev, int32_t stream) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], 0, nullptr, ev->getEventList())); } -int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEvent event) +int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent event) { - if (mProcessingSettings.stuckProtection) { + if (GetProcessingSettings().stuckProtection) { cl_int tmp = 0; - for (int32_t i = 0; i <= mProcessingSettings.stuckProtection / 50; i++) { + for (int32_t i = 0; i <= GetProcessingSettings().stuckProtection / 50; i++) { usleep(50); clGetEventInfo(event.get(), CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(tmp), &tmp, nullptr); if (tmp == CL_COMPLETE) { @@ -483,25 +484,25 @@ int32_t GPUReconstructionOCLBackend::DoStuckProtection(int32_t stream, deviceEve return 0; } -void GPUReconstructionOCLBackend::SynchronizeGPU() +void GPUReconstructionOCL::SynchronizeGPU() { for (int32_t i = 0; i < mNStreams; i++) { GPUChkErr(clFinish(mInternals->command_queue[i])); } } -void GPUReconstructionOCLBackend::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } +void GPUReconstructionOCL::SynchronizeStream(int32_t stream) { GPUChkErr(clFinish(mInternals->command_queue[stream])); } -void GPUReconstructionOCLBackend::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } +void GPUReconstructionOCL::SynchronizeEvents(deviceEvent* evList, int32_t nEvents) { GPUChkErr(clWaitForEvents(nEvents, evList->getEventList())); } -void GPUReconstructionOCLBackend::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) +void GPUReconstructionOCL::StreamWaitForEvents(int32_t stream, deviceEvent* evList, int32_t nEvents) { if (nEvents) { GPUChkErr(clEnqueueMarkerWithWaitList(mInternals->command_queue[stream], nEvents, evList->getEventList(), nullptr)); } } -bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEvents) +bool GPUReconstructionOCL::IsEventDone(deviceEvent* evList, int32_t nEvents) { cl_int eventdone; for (int32_t i = 0; i < nEvents; i++) { @@ -513,10 +514,10 @@ bool GPUReconstructionOCLBackend::IsEventDone(deviceEvent* evList, int32_t nEven return true; } -int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, bool force) +int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool force) { // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode - if (!force && mProcessingSettings.debugLevel <= 0) { + if (!force && GetProcessingSettings().debugLevel <= 0) { return (0); } for (int32_t i = 0; i < mNStreams; i++) { @@ -524,13 +525,13 @@ int32_t GPUReconstructionOCLBackend::GPUDebug(const char* state, int32_t stream, GPUError("OpenCL Error while synchronizing (%s) (Stream %d/%d)", state, stream, i); } } - if (mProcessingSettings.debugLevel >= 3) { + if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("GPU Sync Done"); } return (0); } -int32_t GPUReconstructionOCLBackend::GetOCLPrograms() +int32_t GPUReconstructionOCL::GetOCLPrograms() { cl_int ocl_error; @@ -571,7 +572,7 @@ int32_t GPUReconstructionOCLBackend::GetOCLPrograms() return AddKernels(); } -const char* GPUReconstructionOCLBackend::convertErrorToString(int32_t errorcode) +const char* GPUReconstructionOCL::convertErrorToString(int32_t errorcode) { static const std::map error_map = { {CL_SUCCESS, "CL_SUCCESS"}, diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index abde42f01f073..091bc0409630d 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -27,14 +27,16 @@ namespace o2::gpu { struct GPUReconstructionOCLInternals; -class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase +class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface { public: - ~GPUReconstructionOCLBackend() override; + GPUReconstructionOCL(const GPUSettingsDeviceBackend& cfg); + ~GPUReconstructionOCL() override; - protected: - GPUReconstructionOCLBackend(const GPUSettingsDeviceBackend& cfg); + template + void runKernelBackend(const krnlSetupArgs& args); + protected: int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; @@ -61,8 +63,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase GPUReconstructionOCLInternals* mInternals; float mOclVersion; - template - void runKernelBackend(const krnlSetupArgs& args); template S& getKernelObject(); @@ -78,7 +78,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase int32_t AddKernels(); }; -using GPUReconstructionOCL = GPUReconstructionKernels; } // namespace o2::gpu #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h index 0bb2f25093789..919791948d6c3 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLIncludesHost.h @@ -54,7 +54,7 @@ struct GPUReconstructionOCLInternals { } // namespace o2::gpu template -inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) +inline int64_t GPUReconstructionOCL::OCLsetKernelParameters_helper(cl_kernel& kernel, int32_t i, const T& firstParameter, const Args&... restOfParameters) { int64_t retVal = clSetKernelArg(kernel, i, sizeof(T), &firstParameter); if (retVal) { @@ -67,12 +67,12 @@ inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters_helper(cl_ker } template -inline int64_t GPUReconstructionOCLBackend::OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) +inline int64_t GPUReconstructionOCL::OCLsetKernelParameters(cl_kernel& kernel, const Args&... args) { return OCLsetKernelParameters_helper(kernel, 0, args...); } -inline int64_t GPUReconstructionOCLBackend::clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait, cl_int nWaitEvents) +inline int64_t GPUReconstructionOCL::clExecuteKernelA(cl_command_queue queue, cl_kernel krnl, size_t local_size, size_t global_size, cl_event* pEvent, cl_event* wait, cl_int nWaitEvents) { return clEnqueueNDRangeKernel(queue, krnl, 1, nullptr, &global_size, &local_size, wait == nullptr ? 0 : nWaitEvents, wait, pEvent); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index cca634fba65fc..29b71017e9f73 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -13,16 +13,17 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" +#include "GPUReconstructionKernelIncludes.h" template <> -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); } template -inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { cl_kernel k = getKernelObject(); auto& x = _xyz.x; @@ -33,14 +34,14 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu cl_event ev; cl_event* evr; bool tmpEvent = false; - if (z.ev == nullptr && mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + if (z.ev == nullptr && GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0) { evr = &ev; tmpEvent = true; } else { evr = (cl_event*)z.ev; } GPUChkErr(clExecuteKernelA(mInternals->command_queue[x.stream], k, x.nThreads, x.nThreads * x.nBlocks, evr, (cl_event*)z.evList, z.nEvents)); - if (mProcessingSettings.deviceTimers && mProcessingSettings.debugLevel > 0) { + if (GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0) { cl_ulong time_start, time_end; GPUChkErr(clWaitForEvents(1, evr)); GPUChkErr(clGetEventProfilingInfo(*evr, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, nullptr)); @@ -53,13 +54,13 @@ inline void GPUReconstructionOCLBackend::runKernelBackendInternal(const krnlSetu } template -void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args) +void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args) { std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); } template -int32_t GPUReconstructionOCLBackend::AddKernel() +int32_t GPUReconstructionOCL::AddKernel() { std::string name(GetKernelName()); std::string kname("krnl_" + name); @@ -75,12 +76,12 @@ int32_t GPUReconstructionOCLBackend::AddKernel() } template -S& GPUReconstructionOCLBackend::getKernelObject() +S& GPUReconstructionOCL::getKernelObject() { return mInternals->kernels[GetKernelNum()]; } -int32_t GPUReconstructionOCLBackend::AddKernels() +int32_t GPUReconstructionOCL::AddKernels() { #define GPUCA_KRNL(x_class, ...) \ if (AddKernel()) { \ @@ -91,6 +92,6 @@ int32_t GPUReconstructionOCLBackend::AddKernels() return 0; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCLBackend::runKernelBackend(const krnlSetupArgs& args); +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index e82799b9e59c3..0cd302cc0be94 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -113,7 +113,7 @@ set(HDRS_INSTALL Base/GPUReconstructionIO.h Base/GPUReconstructionIncludesITS.h Base/GPUReconstructionKernelMacros.h - Base/GPUReconstructionKernels.h + Base/GPUReconstructionCPUKernels.h DataCompression/GPUTPCClusterRejection.h DataCompression/GPUTPCCompressionKernels.inc DataCompression/TPCClusterDecompressionCore.inc @@ -127,6 +127,7 @@ set(HDRS_INSTALL DataTypes/GPUTRDDef.h DataTypes/GPUTRDInterfaceO2Track.h DataTypes/GPUTriggerOutputs.h + DataTypes/GPUKernelClassesFwd.h Debug/GPUROOTDump.h Definitions/GPUDefConstantsAndSettings.h Definitions/GPUDefParametersWrapper.h @@ -137,6 +138,7 @@ set(HDRS_INSTALL Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h + Global/GPUChainTrackingGetters.inc Global/GPUErrorCodes.h Merger/GPUTPCGMBorderTrack.h Merger/GPUTPCGMMergedTrack.h @@ -217,11 +219,11 @@ set(SRCS_NO_H ${SRCS_NO_H} set(HDRS_INSTALL ${HDRS_INSTALL} ITS/GPUITSTrack.h - TPCClusterFinder/Array2D.h + TPCClusterFinder/CfArray2D.h TPCClusterFinder/CfConsts.h TPCClusterFinder/CfFragment.h TPCClusterFinder/CfUtils.h - TPCClusterFinder/ChargePos.h + TPCClusterFinder/CfChargePos.h Definitions/clusterFinderDefs.h TPCClusterFinder/PackedCharge.h TPCClusterFinder/GPUTPCCFChainContext.h) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 8a22545314252..2a0c5b58d8a83 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -17,6 +17,7 @@ #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" #include "GPUDefParametersRuntime.h" +#include "GPUConstantMem.h" using namespace o2::gpu; @@ -123,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h b/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h new file mode 100644 index 0000000000000..405eb339dea3b --- /dev/null +++ b/GPU/GPUTracking/DataTypes/GPUKernelClassesFwd.h @@ -0,0 +1,40 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUKernelClassesFwd.h +/// \author David Rohr + +#ifndef GPUKERNELCLASSESFWDN_H +#define GPUKERNELCLASSESFWDN_H + +#include "GPUTRDDef.h" + +namespace o2::gpu +{ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) class GPUCA_M_FIRST(GPUCA_M_STRIP(x_class)); +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL + +struct GPUTPCClusterOccupancyMapBin; +namespace gputpcgmmergertypes +{ +struct GPUTPCGMBorderRange; +} +struct GPUTPCLinearLabels; +struct CfChargePos; +} // namespace o2::gpu + +namespace o2::tpc +{ +struct ClusterNative; +} // namespace o2::tpc + +#endif diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 1e99e3b73736f..5df324fcba648 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -16,11 +16,14 @@ #define GPUCHAIN_H #include "GPUReconstructionCPU.h" +#include "GPUReconstructionCPUKernels.h" +#include "GPUKernelClassesFwd.h" #include namespace o2::gpu { + class GPUChain { friend class GPUReconstruction; @@ -30,10 +33,10 @@ class GPUChain using GeneralStep = GPUReconstruction::GeneralStep; using InOutPointerType = GPUReconstruction::InOutPointerType; using GeometryType = GPUReconstruction::GeometryType; - using krnlRunRange = gpu_reconstruction_kernels::krnlRunRange; - using krnlExec = gpu_reconstruction_kernels::krnlExec; - using krnlEvent = gpu_reconstruction_kernels::krnlEvent; - using deviceEvent = gpu_reconstruction_kernels::deviceEvent; + using krnlRunRange = GPUReconstructionProcessing::krnlRunRange; + using krnlExec = GPUReconstructionProcessing::krnlExec; + using krnlEvent = GPUReconstructionProcessing::krnlEvent; + using deviceEvent = GPUReconstructionProcessing::deviceEvent; static constexpr krnlRunRange krnlRunRangeNone{0}; static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0}; @@ -56,20 +59,20 @@ class GPUChain virtual void DumpSettings(const char* dir = "") {} virtual void ReadSettings(const char* dir = "") {} - const GPUParam& GetParam() const { return mRec->mHostConstantMem->param; } - const GPUSettingsGRP& GetGRPSettings() const { return mRec->mGRPSettings; } - const GPUCalibObjectsConst& calib() const { return processors()->calibObjects; } + const GPUParam& GetParam() const { return mRec->GetParam(); } + const GPUSettingsGRP& GetGRPSettings() const { return mRec->GetGRPSettings(); } + const GPUCalibObjectsConst& GetCalib() const { return mRec->GetCalib(); } GPUReconstruction* rec() { return mRec; } const GPUReconstruction* rec() const { return mRec; } - inline const GPUConstantMem* GetProcessors() { return mRec->processors(); } + inline const GPUConstantMem* GetProcessors() const { return mRec->processors(); } // Make functions from GPUReconstruction*** available GPUReconstruction::RecoStepField GetRecoSteps() const { return mRec->GetRecoSteps(); } GPUReconstruction::RecoStepField GetRecoStepsGPU() const { return mRec->GetRecoStepsGPU(); } GPUReconstruction::InOutTypeField GetRecoStepsInputs() const { return mRec->GetRecoStepsInputs(); } GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const { return mRec->GetRecoStepsOutputs(); } - inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; } - inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; } + inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->GetDeviceBackendSettings(); } + inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->GetProcessingSettings(); } protected: GPUReconstructionCPU* mRec; @@ -102,7 +105,7 @@ class GPUChain } inline bool IsEventDone(deviceEvent* evList, int32_t nEvents = 1) { return mRec->IsEventDone(evList, nEvents); } inline void RecordMarker(deviceEvent* ev, int32_t stream) { mRec->RecordMarker(ev, stream); } - virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } + virtual inline std::unique_ptr GetThreadContext() { return mRec->GetThreadContext(); } inline void SynchronizeGPU() { mRec->SynchronizeGPU(); } inline void ReleaseEvent(deviceEvent ev, bool doGPU = true) { @@ -171,13 +174,16 @@ class GPUChain { mRec->ReadStructFromFile(file, obj); } + template - inline void runKernel(gpu_reconstruction_kernels::krnlSetup&& setup, Args&&... args) + requires(sizeof(S) >= 0) // Yields better incomplete type errors than calling runKernelCallInterface directly + inline void runKernel(GPUReconstructionProcessing::krnlSetup&& setup, Args const&... args) { - return mRec->runKernel(std::forward(setup), std::forward(args)...); + runKernelCallInterface(std::forward(setup), args...); } + template - gpu_reconstruction_kernels::krnlProperties getKernelProperties() + GPUReconstructionProcessing::krnlProperties getKernelProperties() { return mRec->getKernelProperties(); } @@ -233,6 +239,16 @@ class GPUChain private: template void timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... args); + +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + template \ + requires(std::is_same_v && I == S::GPUCA_M_FIRST(GPUCA_M_SHIFT(GPUCA_M_STRIP(x_class), defaultKernel))) \ + inline void runKernelCallInterface(GPUReconstructionProcessing::krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \ + { \ + mRec->runKernelInterface(std::forward(setup) GPUCA_M_STRIP(x_forward)); \ + } +#include "GPUReconstructionKernelList.h" +#undef GPUCA_KRNL }; template @@ -243,7 +259,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } HighResTimer* timer = nullptr; size_t* bytes = nullptr; - if (mRec->mProcessingSettings.debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0 + if (mRec->GetProcessingSettings().debugLevel >= 1 && toGPU >= 0) { // Todo: time special cases toGPU < 0 int32_t id = mRec->getRecoStepNum(step, false); if (id != -1) { auto& tmp = mRec->mTimersRecoSteps[id]; diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx index 640b92a0eb0f4..eeead79b1840b 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.cxx +++ b/GPU/GPUTracking/Global/GPUChainITS.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainITS.h" +#include "GPUConstantMem.h" #include "DataFormatsITS/TrackITS.h" #include "ITStracking/ExternalAllocator.h" #include "GPUReconstructionIncludesITS.h" diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index 43fa49ff74817..a3f9b996e070d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -18,6 +18,8 @@ #include #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" +#include "GPUReconstructionIO.h" #include "GPUChainTrackingDefs.h" #include "GPUTPCClusterData.h" #include "GPUTPCSectorOutCluster.h" @@ -755,7 +757,7 @@ int32_t GPUChainTracking::RunChain() } } - if (GetProcessingSettings().trdTrackModelO2 ? runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking) : runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking)) { + if (runRecoStep(RecoStep::TRDTracking, &GPUChainTracking::RunTRDTracking)) { return 1; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 5779cec31130c..8664652b549e3 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -24,6 +24,12 @@ #include #include +namespace o2::dataformats +{ +template +class ConstMCTruthContainer; +} // namespace o2::dataformats + namespace o2::trd { class GeometryFlat; @@ -39,6 +45,9 @@ class CalibdEdxContainer; namespace o2::base { class MatLayerCylSet; +template +class PropagatorImpl; +using Propagator = PropagatorImpl; } // namespace o2::base namespace o2::gpu @@ -55,6 +64,8 @@ struct GPUChainTrackingFinalContext; struct GPUTPCCFChainContext; struct GPUNewCalibValues; struct GPUTriggerOutputs; +struct CfFragment; +class GPUTPCClusterFinder; class GPUChainTracking : public GPUChain { @@ -137,11 +148,6 @@ class GPUChainTracking : public GPUChain void ConvertZSFilter(bool zs12bit); // Getters for external usage of tracker classes - GPUTRDTrackerGPU* GetTRDTrackerGPU() { return &processors()->trdTrackerGPU; } - GPUTPCTracker* GetTPCSectorTrackers() { return processors()->tpcTrackers; } - const GPUTPCTracker* GetTPCSectorTrackers() const { return processors()->tpcTrackers; } - const GPUTPCGMMerger& GetTPCMerger() const { return processors()->tpcMerger; } - GPUTPCGMMerger& GetTPCMerger() { return processors()->tpcMerger; } GPUDisplayInterface* GetEventDisplay() { return mEventDisplay.get(); } const GPUQA* GetQA() const { return mQAFromForeignChain ? mQAFromForeignChain->mQA.get() : mQA.get(); } GPUQA* GetQA() { return mQAFromForeignChain ? mQAFromForeignChain->mQA.get() : mQA.get(); } @@ -155,7 +161,6 @@ class GPUChainTracking : public GPUChain int32_t ForwardTPCDigits(); int32_t RunTPCTrackingSectors(); int32_t RunTPCTrackingMerger(bool synchronizeOutput = true); - template int32_t RunTRDTracking(); template int32_t DoTRDGPUTracking(T* externalInstance = nullptr); @@ -164,22 +169,22 @@ class GPUChainTracking : public GPUChain int32_t RunRefit(); // Getters / setters for parameters - const CorrectionMapsHelper* GetTPCTransformHelper() const { return processors()->calibObjects.fastTransformHelper; } - const TPCPadGainCalib* GetTPCPadGainCalib() const { return processors()->calibObjects.tpcPadGain; } - const TPCZSLinkMapping* GetTPCZSLinkMapping() const { return processors()->calibObjects.tpcZSLinkMapping; } - const o2::tpc::CalibdEdxContainer* GetdEdxCalibContainer() const { return processors()->calibObjects.dEdxCalibContainer; } - const o2::base::MatLayerCylSet* GetMatLUT() const { return processors()->calibObjects.matLUT; } - const GPUTRDGeometry* GetTRDGeometry() const { return (GPUTRDGeometry*)processors()->calibObjects.trdGeometry; } - const o2::base::Propagator* GetO2Propagator() const { return processors()->calibObjects.o2Propagator; } + const CorrectionMapsHelper* GetTPCTransformHelper() const; + const TPCPadGainCalib* GetTPCPadGainCalib() const; + const TPCZSLinkMapping* GetTPCZSLinkMapping() const; + const o2::tpc::CalibdEdxContainer* GetdEdxCalibContainer() const; + const o2::base::MatLayerCylSet* GetMatLUT() const; + const GPUTRDGeometry* GetTRDGeometry() const; + const o2::base::Propagator* GetO2Propagator() const; const o2::base::Propagator* GetDeviceO2Propagator(); void SetTPCFastTransform(std::unique_ptr&& tpcFastTransform, std::unique_ptr&& tpcTransformHelper); void SetMatLUT(std::unique_ptr&& lut); void SetTRDGeometry(std::unique_ptr&& geo); - void SetMatLUT(const o2::base::MatLayerCylSet* lut) { processors()->calibObjects.matLUT = lut; } - void SetTRDGeometry(const o2::trd::GeometryFlat* geo) { processors()->calibObjects.trdGeometry = geo; } + void SetMatLUT(const o2::base::MatLayerCylSet* lut); + void SetTRDGeometry(const o2::trd::GeometryFlat* geo); void SetO2Propagator(const o2::base::Propagator* prop); - void SetCalibObjects(const GPUCalibObjectsConst& obj) { processors()->calibObjects = obj; } - void SetCalibObjects(const GPUCalibObjects& obj) { memcpy((void*)&processors()->calibObjects, (const void*)&obj, sizeof(obj)); } + void SetCalibObjects(const GPUCalibObjectsConst& obj); + void SetCalibObjects(const GPUCalibObjects& obj); void SetUpdateCalibObjects(const GPUCalibObjectsConst& obj, const GPUNewCalibValues& vals); void SetSubOutputControl(int32_t i, GPUOutputControl* v) { mSubOutputControls[i] = v; } void SetFinalInputCallback(std::function v) { mWaitForFinalInputs = v; } @@ -298,6 +303,8 @@ class GPUChainTracking : public GPUChain void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType); void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function allocator, bool applyClusterCuts); bool NeedTPCClustersOnGPU(); + template + int32_t RunTRDTrackingInternal(); uint32_t StreamForSector(uint32_t sector) const; std::mutex mMutexUpdateCalib; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 7db0ba66305e9..981d565852d28 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -19,11 +19,20 @@ #include "GPUMemorySizeScalers.h" #include "GPUTrackingInputProvider.h" #include "GPUNewCalibValues.h" -#include - -#ifdef GPUCA_O2_LIB -#include "CommonDataFormat/InteractionRecord.h" -#endif +#include "GPUConstantMem.h" +#include "CfChargePos.h" +#include "CfArray2D.h" +#include "GPUGeneralKernels.h" +#include "GPUTPCCFStreamCompaction.h" +#include "GPUTPCCFChargeMapFiller.h" +#include "GPUTPCCFDecodeZS.h" +#include "GPUTPCCFCheckPadBaseline.h" +#include "GPUTPCCFPeakFinder.h" +#include "GPUTPCCFNoiseSuppression.h" +#include "GPUTPCCFDeconvolution.h" +#include "GPUTPCCFClusterizer.h" +#include "GPUTPCCFGather.h" +#include "GPUTPCCFMCLabelFlattener.h" #include "GPUTriggerOutputs.h" #include "GPUHostDataTypes.h" #include "GPUTPCCFChainContext.h" @@ -32,18 +41,24 @@ #include "DataFormatsTPC/Digit.h" #include "DataFormatsTPC/Constants.h" #include "TPCBase/RDHUtils.h" +#include "GPULogging.h" + +#ifdef GPUCA_HAS_ONNX +#include "GPUTPCNNClusterizerKernels.h" +#include "GPUTPCNNClusterizerHost.h" +#endif + +#ifdef GPUCA_O2_LIB +#include "CommonDataFormat/InteractionRecord.h" +#endif #include "utils/strtag.h" +#include #ifndef GPUCA_NO_VC #include #endif -#ifdef GPUCA_HAS_ONNX -#include "GPUTPCNNClusterizerKernels.h" -#include "GPUTPCNNClusterizerHost.h" -#endif - using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; @@ -791,7 +806,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) using ChargeMapType = decltype(*clustererShadow.mPchargeMap); using PeakMapType = decltype(*clustererShadow.mPpeakMap); - runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); // TODO: Not working in OpenCL2!!! + runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(ChargeMapType)); runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap, TPCMapMemoryLayout::items(GetProcessingSettings().overrideClusterizerFragmentLen) * sizeof(PeakMapType)); if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 8fb6fc4771658..fc07a91004c5f 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -19,6 +19,9 @@ #include "GPUTPCCFChainContext.h" #include "TPCClusterDecompressor.h" #include "GPUDefParametersRuntime.h" +#include "GPUConstantMem.h" // TODO: Try to get rid of as many GPUConstantMem includes as possible! +#include "GPUTPCCompressionKernels.h" +#include "GPUTPCDecompressionKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index c42d9622f5332..5d05cd6a97776 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -15,6 +15,8 @@ #include "GPUChainTracking.h" #include "GPUTrackingInputProvider.h" #include "GPUMemorySizeScalers.h" +#include "GPUConstantMem.h" +#include "GPUTPCClusterFilter.h" #include #include #include @@ -23,8 +25,6 @@ #include "bitmapfile.h" #endif -#include "GPUTPCClusterFilter.h" - #define PROFILE_MAX_SIZE (100 * 1024 * 1024) using namespace o2::gpu; @@ -209,7 +209,7 @@ void GPUChainTracking::PrintDebugOutput() void GPUChainTracking::PrintOutputStat() { int32_t nTracks = 0, nAttachedClusters = 0, nAttachedClustersFitted = 0, nAdjacentClusters = 0; - uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : GetTPCMerger().NMaxClusters(); + uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : processors()->tpcMerger.NMaxClusters(); if (GetProcessingSettings().createO2Output > 1) { nTracks = mIOPtrs.nOutputTracksTPCO2; nAttachedClusters = mIOPtrs.nMergedTrackHits; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h index dc1a665e6052c..e02419955001a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDefs.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDefs.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUChainTracking.h +/// \file GPUChainTrackingDefs.h /// \author David Rohr #ifndef GPUCHAINTRACKINGDEFS_H diff --git a/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc b/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc new file mode 100644 index 0000000000000..5b72a8f23c242 --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingGetters.inc @@ -0,0 +1,36 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingGetters.inc +/// \author David Rohr + +#ifndef GPUCHAINTRACKINGGETTERS_INC_H +#define GPUCHAINTRACKINGGETTERS_INC_H + +#include "GPUChainTracking.h" +#include "GPUConstantMem.h" + +namespace o2::gpu +{ +inline const CorrectionMapsHelper* GPUChainTracking::GetTPCTransformHelper() const { return processors()->calibObjects.fastTransformHelper; } +inline const TPCPadGainCalib* GPUChainTracking::GetTPCPadGainCalib() const { return processors()->calibObjects.tpcPadGain; } +inline const TPCZSLinkMapping* GPUChainTracking::GetTPCZSLinkMapping() const { return processors()->calibObjects.tpcZSLinkMapping; } +inline const o2::tpc::CalibdEdxContainer* GPUChainTracking::GetdEdxCalibContainer() const { return processors()->calibObjects.dEdxCalibContainer; } +inline const o2::base::MatLayerCylSet* GPUChainTracking::GetMatLUT() const { return processors()->calibObjects.matLUT; } +inline const GPUTRDGeometry* GPUChainTracking::GetTRDGeometry() const { return (GPUTRDGeometry*)processors()->calibObjects.trdGeometry; } +inline const o2::base::Propagator* GPUChainTracking::GetO2Propagator() const { return processors()->calibObjects.o2Propagator; } +inline void GPUChainTracking::SetMatLUT(const o2::base::MatLayerCylSet* lut) { processors()->calibObjects.matLUT = lut; } +inline void GPUChainTracking::SetTRDGeometry(const o2::trd::GeometryFlat* geo) { processors()->calibObjects.trdGeometry = geo; } +inline void GPUChainTracking::SetCalibObjects(const GPUCalibObjectsConst& obj) { processors()->calibObjects = obj; } +inline void GPUChainTracking::SetCalibObjects(const GPUCalibObjects& obj) { memcpy((void*)&processors()->calibObjects, (const void*)&obj, sizeof(obj)); } +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 4f7846b852b98..5e7672022b3ff 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -34,6 +34,7 @@ #include "GPUTrackingInputProvider.h" #include "TPCZSLinkMapping.h" #include "GPUTriggerOutputs.h" +#include "GPUConstantMem.h" #include "SimulationDataFormat/MCCompLabel.h" #include "SimulationDataFormat/MCTruthContainer.h" diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index a647c213660c9..163f08634ef86 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -17,6 +17,11 @@ #include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" #include "GPUQA.h" +#include "GPUTPCGMMerger.h" +#include "GPUConstantMem.h" +#include "GPUTPCGMMergerGPU.h" +#include "GPUTPCGMO2Output.h" +#include "GPUTPCGlobalDebugSortKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 8d1efd7011227..4662b5464f710 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -13,8 +13,12 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPULogging.h" #include "GPUO2DataTypes.h" +#include "GPUTrackingRefit.h" +#include "GPUConstantMem.h" +#include "GPUTrackingRefitKernel.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 962b0922eeecc..635641c00ae14 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -20,6 +20,16 @@ #include "GPUTrackingInputProvider.h" #include "GPUTPCClusterOccupancyMap.h" #include "GPUDefParametersRuntime.h" +#include "GPUTPCExtrapolationTracking.h" +#include "GPUTPCCreateOccupancyMap.h" +#include "GPUTPCCreateTrackingData.h" +#include "GPUTPCNeighboursFinder.h" +#include "GPUTPCNeighboursCleaner.h" +#include "GPUTPCStartHitsFinder.h" +#include "GPUTPCStartHitsSorter.h" +#include "GPUTPCTrackletConstructor.h" +#include "GPUTPCTrackletSelector.h" +#include "GPUTPCSectorDebugSortKernels.h" #include "utils/strtag.h" #include diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx index 0f17bbcc26842..f9011131803e3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTRD.cxx @@ -21,13 +21,19 @@ #include "GPUTRDTracker.h" #include "GPUTrackingInputProvider.h" #include "GPUTRDTrackerKernels.h" +#include "GPUConstantMem.h" #include "utils/strtag.h" using namespace o2::gpu; using namespace o2::trd; -template int32_t GPUChainTracking::RunTRDTracking() +{ + return GetProcessingSettings().trdTrackModelO2 ? RunTRDTrackingInternal() : RunTRDTrackingInternal(); +} + +template +int32_t GPUChainTracking::RunTRDTrackingInternal() { auto& Tracker = processors()->getTRDTracker(); if (!Tracker.IsInitialized()) { @@ -189,9 +195,7 @@ int32_t GPUChainTracking::DoTRDGPUTracking(T* externalInstance) return (0); } -template int32_t GPUChainTracking::RunTRDTracking(); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTrackerGPU*); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTracker*); -template int32_t GPUChainTracking::RunTRDTracking(); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTracker*); template int32_t GPUChainTracking::DoTRDGPUTracking(GPUTRDTrackerGPU*); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx index db5e5ae3aeb75..c9d4d269f070c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingTransformation.cxx @@ -19,9 +19,13 @@ #include "GPUTPCClusterData.h" #include "GPUReconstructionConvert.h" #include "GPUMemorySizeScalers.h" +#include "GPUTPCConvert.h" #include "AliHLTTPCRawCluster.h" +#include "GPUConstantMem.h" +#include "GPUTPCConvertKernel.h" #include "DataFormatsTPC/ClusterNative.h" +#include "DataFormatsTPC/ZeroSuppression.h" #include "CommonDataFormat/InteractionRecord.h" #include "utils/strtag.h" diff --git a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx index a5457bf3f2f23..7ef9baa903fbe 100644 --- a/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx +++ b/GPU/GPUTracking/Global/GPUTrackingInputProvider.cxx @@ -18,6 +18,9 @@ #include "GPUReconstruction.h" #include "GPUTPCClusterOccupancyMap.h" #include "GPUErrors.h" +#include "GPUParam.h" +#include "DataFormatsTPC/ClusterNative.h" +#include "GPUTRDSpacePoint.h" using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 4dac56afed671..81eb2c285192b 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -15,6 +15,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUChainITS.h" #include "GPUMemorySizeScalers.h" #include "GPUOutputControl.h" @@ -23,6 +24,7 @@ #include "GPUParam.inc" #include "GPUQA.h" #include "GPUOutputControl.h" +#include "DetectorsBase/Propagator.h" #include #include #include diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 82b21e2045b8e..4f960a8e1ec76 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -757,7 +757,7 @@ struct MergeBorderTracks_compMin { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { if (cmpMax) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); @@ -1839,13 +1839,13 @@ struct GPUTPCGMMergerSortTracksQPt_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } @@ -2065,7 +2065,7 @@ struct GPUTPCGMMergerMergeLoopers_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 1e4cc633eb4ca..2f8fbecadce5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -36,6 +36,7 @@ #include "GPUTPCClusterOccupancyMap.h" #include "GPUTrackingRefit.h" #include "CorrectionMapsHelper.h" +#include "GPUConstantMem.h" using namespace o2::gpu; using namespace gputpcgmmergertypes; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 13b34a0a64a84..6640b556c3011 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -102,7 +102,7 @@ struct GPUTPCGMO2OutputSort_comp { }; template <> -inline void GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx index 7d83ff9abd91c..e66ad71783dbd 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrackerDump.cxx @@ -16,6 +16,7 @@ #include "GPUReconstruction.h" #include "GPUTPCHitId.h" #include "GPUTPCTrack.h" +#include "GPULogging.h" #include #include diff --git a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx index b32db2bfebf11..d4c83f92a2157 100644 --- a/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx +++ b/GPU/GPUTracking/Standalone/Benchmark/standalone.cxx @@ -17,11 +17,24 @@ #include "GPUReconstructionTimeframe.h" #include "GPUReconstructionConvert.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUTPCDef.h" #include "GPUQA.h" +#include "GPUParam.h" #include "display/GPUDisplayInterface.h" #include "genEvents.h" +#include "TPCFastTransform.h" +#include "CorrectionMapsHelper.h" +#include "GPUTPCGMMergedTrack.h" +#include "GPUSettings.h" +#include "GPUConstantMem.h" + +#include "GPUO2DataTypes.h" +#include "GPUChainITS.h" + +#include "DataFormatsTPC/CompressedClusters.h" + #include #include #include @@ -32,6 +45,7 @@ #include #include #include +#include #ifndef _WIN32 #include @@ -48,15 +62,6 @@ #include "utils/qmaths_helpers.h" #include "utils/vecpod.h" -#include "TPCFastTransform.h" -#include "CorrectionMapsHelper.h" -#include "GPUTPCGMMergedTrack.h" -#include "GPUSettings.h" -#include - -#include "GPUO2DataTypes.h" -#include "GPUChainITS.h" - using namespace o2::gpu; // #define BROKEN_EVENTS @@ -915,7 +920,7 @@ int32_t main(int argc, char** argv) nEventsProcessed++; if (configStandalone.timeFrameTime) { - double nClusters = chainTracking->GetTPCMerger().NMaxClusters(); + double nClusters = chainTracking->GetProcessors()->tpcMerger.NMaxClusters(); if (nClusters > 0) { const int32_t nOrbits = 32; const double colRate = 50000; diff --git a/GPU/GPUTracking/Standalone/tools/createGeo.C b/GPU/GPUTracking/Standalone/tools/createGeo.C index 307d687f716d1..c454978177ad6 100644 --- a/GPU/GPUTracking/Standalone/tools/createGeo.C +++ b/GPU/GPUTracking/Standalone/tools/createGeo.C @@ -20,6 +20,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/Standalone/tools/createLUT.C b/GPU/GPUTracking/Standalone/tools/createLUT.C index 7bb4edbf89f18..b4a053aa46c66 100644 --- a/GPU/GPUTracking/Standalone/tools/createLUT.C +++ b/GPU/GPUTracking/Standalone/tools/createLUT.C @@ -18,6 +18,7 @@ #include "GPUO2Interface.h" #include "GPUReconstruction.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TPCClusterFinder/Array2D.h b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h similarity index 81% rename from GPU/GPUTracking/TPCClusterFinder/Array2D.h rename to GPU/GPUTracking/TPCClusterFinder/CfArray2D.h index b62176fdc4365..3c8bcf94da4b3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/Array2D.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h @@ -16,22 +16,22 @@ #define O2_GPU_ARRAY2D_H #include "clusterFinderDefs.h" -#include "ChargePos.h" +#include "CfChargePos.h" namespace o2::gpu { template -class AbstractArray2D +class AbstractCfArray2D { public: - GPUdi() explicit AbstractArray2D(T* d) : data(d) {} + GPUdi() explicit AbstractCfArray2D(T* d) : data(d) {} - GPUdi() T& operator[](const ChargePos& p) { return data[Layout::idx(p)]; } - GPUdi() const T& operator[](const ChargePos& p) const { return data[Layout::idx(p)]; } + GPUdi() T& operator[](const CfChargePos& p) { return data[Layout::idx(p)]; } + GPUdi() const T& operator[](const CfChargePos& p) const { return data[Layout::idx(p)]; } - GPUdi() void safeWrite(const ChargePos& p, const T& v) + GPUdi() void safeWrite(const CfChargePos& p, const T& v) { if (data != nullptr) { (*this)[p] = v; @@ -52,7 +52,7 @@ class TilingLayout WidthInTiles = (TPC_NUM_OF_PADS + Width - 1) / Width, }; - GPUdi() static tpccf::SizeT idx(const ChargePos& p) + GPUdi() static tpccf::SizeT idx(const CfChargePos& p) { const tpccf::SizeT tilePad = p.gpad / Width; const tpccf::SizeT tileTime = p.timePadded / Height; @@ -72,7 +72,7 @@ class TilingLayout class LinearLayout { public: - GPUdi() static tpccf::SizeT idx(const ChargePos& p) + GPUdi() static tpccf::SizeT idx(const CfChargePos& p) { return TPC_NUM_OF_PADS * p.timePadded + p.gpad; } @@ -119,7 +119,7 @@ using TPCMapMemoryLayout = LinearLayout; #endif template -using Array2D = AbstractArray2D>; +using CfArray2D = AbstractCfArray2D>; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h b/GPU/GPUTracking/TPCClusterFinder/CfChargePos.h similarity index 80% rename from GPU/GPUTracking/TPCClusterFinder/ChargePos.h rename to GPU/GPUTracking/TPCClusterFinder/CfChargePos.h index cdd489e0ef938..bf6ce2fc804ba 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ChargePos.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfChargePos.h @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file ChargePos.h +/// \file CfChargePos.h /// \author Felix Weiglhofer #ifndef O2_GPU_CHARGE_POS_H @@ -22,20 +22,20 @@ namespace o2::gpu #define INVALID_TIME_BIN (-GPUCF_PADDING_TIME - 1) -struct ChargePos { +struct CfChargePos { tpccf::GlobalPad gpad; tpccf::TPCFragmentTime timePadded; - GPUdDefault() ChargePos() = default; + GPUdDefault() CfChargePos() = default; - constexpr GPUhdi() ChargePos(tpccf::Row row, tpccf::Pad pad, tpccf::TPCFragmentTime t) + constexpr GPUhdi() CfChargePos(tpccf::Row row, tpccf::Pad pad, tpccf::TPCFragmentTime t) : gpad(tpcGlobalPadIdx(row, pad)), timePadded(t + GPUCF_PADDING_TIME) { } - GPUdi() ChargePos(const tpccf::GlobalPad& p, const tpccf::TPCFragmentTime& t) : gpad(p), timePadded(t) {} + GPUdi() CfChargePos(const tpccf::GlobalPad& p, const tpccf::TPCFragmentTime& t) : gpad(p), timePadded(t) {} - GPUdi() ChargePos delta(const tpccf::Delta2& d) const + GPUdi() CfChargePos delta(const tpccf::Delta2& d) const { return {tpccf::GlobalPad(gpad + d.x), tpccf::TPCFragmentTime(timePadded + d.y)}; } @@ -56,7 +56,7 @@ struct ChargePos { } }; -inline constexpr ChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; +inline constexpr CfChargePos INVALID_CHARGE_POS{255, 255, INVALID_TIME_BIN}; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h index 75dcc166abd9b..96f4893c74af3 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfUtils.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfUtils.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" #include "GPUCommonAlgorithm.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfConsts.h" namespace o2::gpu @@ -169,14 +169,14 @@ class CfUtils template static GPUdi() void blockLoad( - const Array2D& map, + const CfArray2D& map, uint32_t wgSize, uint32_t elems, uint16_t ll, uint32_t offset, uint32_t N, GPUconstexprref() const tpccf::Delta2* neighbors, - const ChargePos* posBcast, + const CfChargePos* posBcast, GPUgeneric() T* buf) { #if defined(GPUCA_GPUCODE) @@ -186,7 +186,7 @@ class CfUtils tpccf::Delta2 d = neighbors[x + offset]; for (uint32_t i = y; i < wgSize; i += (elems / N)) { - ChargePos readFrom = posBcast[i]; + CfChargePos readFrom = posBcast[i]; uint32_t writeTo = N * i + x; buf[writeTo] = map[readFrom.delta(d)]; } @@ -196,7 +196,7 @@ class CfUtils return; } - ChargePos readFrom = posBcast[ll]; + CfChargePos readFrom = posBcast[ll]; GPUbarrier(); @@ -213,14 +213,14 @@ class CfUtils template static GPUdi() void condBlockLoad( - const Array2D& map, + const CfArray2D& map, uint16_t wgSize, uint16_t elems, uint16_t ll, uint16_t offset, uint16_t N, GPUconstexprref() const tpccf::Delta2* neighbors, - const ChargePos* posBcast, + const CfChargePos* posBcast, const uint8_t* aboveThreshold, GPUgeneric() T* buf) { @@ -230,7 +230,7 @@ class CfUtils uint16_t x = ll % N; tpccf::Delta2 d = neighbors[x + offset]; for (uint32_t i = y; i < wgSize; i += (elems / N)) { - ChargePos readFrom = posBcast[i]; + CfChargePos readFrom = posBcast[i]; uint8_t above = aboveThreshold[i]; uint32_t writeTo = N * i + x; T v(0); @@ -247,7 +247,7 @@ class CfUtils return; } - ChargePos readFrom = posBcast[ll]; + CfChargePos readFrom = posBcast[ll]; uint8_t above = aboveThreshold[ll]; GPUbarrier(); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx index 622da856af805..a80283b91c940 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx @@ -58,7 +58,7 @@ GPUd() Charge ClusterAccumulator::updateOuter(PackedCharge charge, Delta2 d) return q; } -GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, TPCTime timeOffset) +GPUd() void ClusterAccumulator::finalize(const CfChargePos& pos, const Charge q, TPCTime timeOffset) { mQtot += q; @@ -75,7 +75,7 @@ GPUd() void ClusterAccumulator::finalize(const ChargePos& pos, const Charge q, T mTimeMean += timeOffset + pos.time(); } -GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const Array2D& chargeMap) +GPUd() bool ClusterAccumulator::toNative(const CfChargePos& pos, const Charge q, tpc::ClusterNative& cn, const GPUParam& param, const CfArray2D& chargeMap) { Pad pad = pos.pad(); diff --git a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h index 90d977372b201..fb208ca0150d4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h @@ -17,7 +17,7 @@ #include "clusterFinderDefs.h" #include "PackedCharge.h" -#include "Array2D.h" +#include "CfArray2D.h" namespace o2 { @@ -30,7 +30,7 @@ struct ClusterNative; namespace gpu { -struct ChargePos; +struct CfChargePos; struct GPUParam; class GPUTPCGeometry; @@ -52,8 +52,8 @@ class ClusterAccumulator mSplitInTime = splitInTime; } - GPUd() void finalize(const ChargePos&, const tpccf::Charge, tpccf::TPCTime); - GPUd() bool toNative(const ChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const Array2D&); + GPUd() void finalize(const CfChargePos&, const tpccf::Charge, tpccf::TPCTime); + GPUd() bool toNative(const CfChargePos&, const tpccf::Charge, tpc::ClusterNative&, const GPUParam&, const CfArray2D&); private: float mQtot = 0; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx index 8dbc5804f8fb8..d2ca3d419c138 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx @@ -13,7 +13,7 @@ /// \author Felix Weiglhofer #include "GPUTPCCFChargeMapFiller.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "DataFormatsTPC/Digit.h" #include "TPCPadGainCalib.h" @@ -23,14 +23,14 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFChargeMapFiller::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D indexMap(clusterer.mPindexMap); + CfArray2D indexMap(clusterer.mPindexMap); fillIndexMapImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPmemory->fragment, clusterer.mPdigits, indexMap, clusterer.mPmemory->counters.nDigitsInFragment); } GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, const CfFragment& fragment, const tpc::Digit* digits, - Array2D& indexMap, + CfArray2D& indexMap, size_t maxDigit) { size_t idx = get_global_id(0); @@ -39,21 +39,21 @@ GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t n } CPU_ONLY(idx += fragment.digitsStart); CPU_ONLY(tpc::Digit digit = digits[idx]); - CPU_ONLY(ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp()))); + CPU_ONLY(CfChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp()))); CPU_ONLY(indexMap.safeWrite(pos, idx)); } template <> GPUdii() void GPUTPCCFChargeMapFiller::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); fillFromDigitsImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, clusterer.mPmemory->counters.nPositions, clusterer.mPdigits, clusterer.mPpositions, chargeMap); } GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, processorType& clusterer, const CfFragment& fragment, size_t digitNum, const tpc::Digit* digits, - ChargePos* positions, - Array2D& chargeMap) + CfChargePos* positions, + CfArray2D& chargeMap) { size_t idx = get_global_id(0); if (idx >= digitNum) { @@ -61,7 +61,7 @@ GPUd() void GPUTPCCFChargeMapFiller::fillFromDigitsImpl(int32_t nBlocks, int32_t } tpc::Digit digit = digits[fragment.digitsStart + idx]; - ChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); + CfChargePos pos(digit.getRow(), digit.getPad(), fragment.toLocal(digit.getTimeStamp())); positions[idx] = pos; float q = digit.getChargeFloat(); q *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(clusterer.mISector, digit.getRow(), digit.getPad()); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h index f7aab78c33bd1..800ba786c2105 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::tpc @@ -30,7 +30,7 @@ class Digit; namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFChargeMapFiller : public GPUKernelTemplate { @@ -55,9 +55,9 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); - static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D&, size_t); + static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, CfArray2D&, size_t); - static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, ChargePos*, Array2D&); + static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, CfChargePos*, CfArray2D&); private: static GPUd() size_t findTransition(int32_t, const tpc::Digit*, size_t, size_t); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx index 1e76860331de6..ec084c308312e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.cxx @@ -13,7 +13,7 @@ /// \author Felix Weiglhofer #include "GPUTPCCFCheckPadBaseline.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "GPUTPCGeometry.h" #include "clusterFinderDefs.h" @@ -33,10 +33,10 @@ template <> GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { const CfFragment& fragment = clusterer.mPmemory->fragment; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); int32_t basePad = iBlock * PadsPerCacheline; - ChargePos basePos = padToChargePos(basePad, clusterer); + CfChargePos basePos = padToCfChargePos(basePad, clusterer); if (not basePos.valid()) { return; @@ -55,7 +55,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread bool handlePad = localTimeBin == 0; for (tpccf::TPCFragmentTime t = fragment.firstNonOverlapTimeBin(); t < fragment.lastNonOverlapTimeBin(); t += NumOfCachedTimebins) { - const ChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)}); + const CfChargePos pos = basePos.delta({localPadId, int16_t(t + localTimeBin)}); smem.charges[localPadId][localTimeBin] = (pos.valid()) ? chargeMap[pos].unpack() : 0; GPUbarrier(); if (handlePad) { @@ -150,7 +150,7 @@ GPUd() void GPUTPCCFCheckPadBaseline::Thread<0>(int32_t nBlocks, int32_t nThread #endif } -GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) +GPUd() CfChargePos GPUTPCCFCheckPadBaseline::padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder& clusterer) { constexpr GPUTPCGeometry geo; @@ -161,12 +161,12 @@ GPUd() ChargePos GPUTPCCFCheckPadBaseline::padToChargePos(int32_t& pad, const GP if (0 <= padInRow && padInRow < CAMath::nextMultipleOf(npads)) { int32_t cachelineOffset = padInRow % PadsPerCacheline; pad -= cachelineOffset; - return ChargePos{r, Pad(padInRow - cachelineOffset), 0}; + return CfChargePos{r, Pad(padInRow - cachelineOffset), 0}; } padOffset += npads; } - return ChargePos{0, 0, INVALID_TIME_BIN}; + return CfChargePos{0, 0, INVALID_TIME_BIN}; } GPUd() void GPUTPCCFCheckPadBaseline::updatePadBaseline(int32_t pad, const GPUTPCClusterFinder& clusterer, int32_t totalCharges, int32_t consecCharges, Charge maxCharge) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index d6daa6803ca39..2403aa6d29ecd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -52,7 +52,7 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer); private: - GPUd() static ChargePos padToChargePos(int32_t& pad, const GPUTPCClusterFinder&); + GPUd() static CfChargePos padToCfChargePos(int32_t& pad, const GPUTPCClusterFinder&); GPUd() static void updatePadBaseline(int32_t pad, const GPUTPCClusterFinder&, int32_t totalCharges, int32_t consecCharges, tpccf::Charge maxCharge); }; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx index 2131347decec6..c9c6b157499f2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx @@ -30,7 +30,7 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h index 79f3325ed9ad2..466d13d3254de 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::tpc @@ -38,7 +38,7 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFClusterizer); struct GPUSharedMemory { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; }; @@ -57,16 +57,16 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate template GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t); - static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); + static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const CfArray2D&, const CfChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*); - static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); + static GPUd() void buildCluster(const GPUSettingsRec&, const CfArray2D&, CfChargePos, CfChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*); static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*); private: - static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); + static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const CfChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*); - static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*); + static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const CfChargePos&, ClusterAccumulator*, MCLabelAccumulator*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc index 8a6b73be8bd8d..e32abbf37584f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc @@ -19,8 +19,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t processorType& clusterer, const CfFragment& fragment, GPUSharedMemory& smem, - const Array2D& chargeMap, - const ChargePos* filteredPeakPositions, + const CfArray2D& chargeMap, + const CfChargePos* filteredPeakPositions, const GPUSettingsRec& calib, MCLabelAccumulator* labelAcc, uint32_t clusternum, @@ -34,7 +34,7 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t // For certain configurations dummy work items are added, so the total // number of work items is dividable by 64. // These dummy items also compute the last cluster but discard the result. - ChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; + CfChargePos pos = filteredPeakPositions[CAMath::Min(idx, clusternum - 1)]; Charge charge = chargeMap[pos].unpack(); ClusterAccumulator pc; @@ -94,7 +94,7 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterInner( uint16_t lid, uint16_t N, const PackedCharge* buf, - const ChargePos& pos, + const CfChargePos& pos, ClusterAccumulator* cluster, MCLabelAccumulator* labelAcc, uint8_t* innerAboveThreshold) @@ -125,7 +125,7 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( uint16_t M, uint16_t offset, const PackedCharge* buf, - const ChargePos& pos, + const CfChargePos& pos, ClusterAccumulator* cluster, MCLabelAccumulator* labelAcc) { @@ -144,9 +144,9 @@ GPUdii() void GPUTPCCFClusterizer::updateClusterOuter( GPUdii() void GPUTPCCFClusterizer::buildCluster( const GPUSettingsRec& calib, - const Array2D& chargeMap, - ChargePos pos, - ChargePos* posBcast, + const CfArray2D& chargeMap, + CfChargePos pos, + CfChargePos* posBcast, PackedCharge* buf, uint8_t* innerAboveThreshold, ClusterAccumulator* myCluster, diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx index 6662b93eccb78..312085d2947ab 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFDecodeZS.h" #include "GPUCommonMath.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "CfUtils.h" #include "CommonConstants/LHCConstants.h" @@ -53,8 +53,8 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared if (zs.count[endpoint] == 0) { return; } - ChargePos* positions = clusterer.mPpositions; - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos* positions = clusterer.mPpositions; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); const size_t nDigits = clusterer.mPzsOffsets[iBlock].offset; if (iThread == 0) { const int32_t region = endpoint / 2; @@ -175,7 +175,7 @@ GPUdii() void GPUTPCCFDecodeZS::decode(GPUTPCClusterFinder& clusterer, GPUShared TPCTime globalTime = timeBin + l; bool inFragment = fragment.contains(globalTime); Row row = rowOffset + m; - ChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN); + CfChargePos pos(row, Pad(pad), inFragment ? fragment.toLocal(globalTime) : INVALID_TIME_BIN); positions[nDigitsTmp++] = pos; if (inFragment) { @@ -552,7 +552,7 @@ GPUd() o2::tpc::PadPos GPUTPCCFDecodeZSLinkBase::GetPadAndRowFromFEC(processorTy GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, float charge, PadPos padAndRow, TPCFragmentTime localTime, size_t positionOffset) { const uint32_t sector = clusterer.mISector; - ChargePos* positions = clusterer.mPpositions; + CfChargePos* positions = clusterer.mPpositions; #ifdef GPUCA_CHECK_TPCZS_CORRUPTION if (padAndRow.getRow() >= GPUCA_ROW_COUNT) { positions[positionOffset] = INVALID_CHARGE_POS; @@ -560,9 +560,9 @@ GPUd() void GPUTPCCFDecodeZSLinkBase::WriteCharge(processorType& clusterer, floa return; } #endif - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); + CfChargePos pos(padAndRow.getRow(), padAndRow.getPad(), localTime); positions[positionOffset] = pos; charge *= clusterer.GetConstantMem()->calibObjects.tpcPadGain->getGainCorrection(sector, padAndRow.getRow(), padAndRow.getPad()); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx index dab8123698abf..429d51685e504 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFDeconvolution.h" #include "CfConsts.h" #include "CfUtils.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "GPUDefMacros.h" using namespace o2::gpu; @@ -24,15 +24,15 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions); } GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, - const Array2D& peakMap, - Array2D& chargeMap, - const ChargePos* positions, + const CfArray2D& peakMap, + CfArray2D& chargeMap, + const CfChargePos* positions, const uint32_t digitnum) { SizeT idx = get_global_id(0); @@ -40,7 +40,7 @@ GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t bool iamDummy = (idx >= digitnum); idx = iamDummy ? digitnum - 1 : idx; - ChargePos pos = positions[idx]; + CfChargePos pos = positions[idx]; bool iamPeak = CfUtils::isPeak(peakMap[pos]); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h index 78fcc8ba1785a..e971a042e95a4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h @@ -20,7 +20,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu @@ -31,7 +31,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFDeconvolution); struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - ChargePos posBcast1[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast1[SCRATCH_PAD_WORK_GROUP_SIZE]; uint8_t aboveThresholdBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; uint8_t buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_COUNT_N]; }; @@ -51,7 +51,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const Array2D&, Array2D&, const ChargePos*, const uint32_t); + static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D&, CfArray2D&, const CfChargePos*, const uint32_t); static GPUdi() uint8_t countPeaksInner(uint16_t, const uint8_t*, uint8_t*); static GPUdi() uint8_t countPeaksOuter(uint16_t, uint8_t, const uint8_t*); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx index f3a914cbfcaee..4dfa50d9439e4 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.cxx @@ -13,10 +13,10 @@ /// \author Felix Weiglhofer #include "GPUTPCCFNoiseSuppression.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfConsts.h" #include "CfUtils.h" -#include "ChargePos.h" +#include "CfChargePos.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -24,29 +24,29 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFNoiseSuppression::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); noiseSuppressionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, clusterer.Param().rec, chargeMap, isPeakMap, clusterer.mPpeakPositions, clusterer.mPmemory->counters.nPeaks, clusterer.mPisPeak); } template <> GPUdii() void GPUTPCCFNoiseSuppression::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D isPeakMap(clusterer.mPpeakMap); updatePeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPpeakPositions, clusterer.mPisPeak, clusterer.mPmemory->counters.nPeaks, isPeakMap); } GPUdii() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, const GPUSettingsRec& calibration, - const Array2D& chargeMap, - const Array2D& peakMap, - const ChargePos* peakPositions, + const CfArray2D& chargeMap, + const CfArray2D& peakMap, + const CfChargePos* peakPositions, const uint32_t peaknum, uint8_t* isPeakPredicate) { SizeT idx = get_global_id(0); - ChargePos pos = peakPositions[CAMath::Min(idx, (SizeT)(peaknum - 1))]; + CfChargePos pos = peakPositions[CAMath::Min(idx, (SizeT)(peaknum - 1))]; Charge charge = chargeMap[pos].unpack(); uint64_t minimas, bigger, peaksAround; @@ -75,10 +75,10 @@ GPUdii() void GPUTPCCFNoiseSuppression::noiseSuppressionImpl(int32_t nBlocks, in } GPUd() void GPUTPCCFNoiseSuppression::updatePeaksImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, - const ChargePos* peakPositions, + const CfChargePos* peakPositions, const uint8_t* isPeak, const uint32_t peakNum, - Array2D& peakMap) + CfArray2D& peakMap) { SizeT idx = get_global_id(0); @@ -86,7 +86,7 @@ GPUd() void GPUTPCCFNoiseSuppression::updatePeaksImpl(int32_t nBlocks, int32_t n return; } - ChargePos pos = peakPositions[idx]; + CfChargePos pos = peakPositions[idx]; uint8_t peak = isPeak[idx]; @@ -164,12 +164,12 @@ GPUdi() bool GPUTPCCFNoiseSuppression::keepPeak( } GPUd() void GPUTPCCFNoiseSuppression::findMinimaAndPeaks( - const Array2D& chargeMap, - const Array2D& peakMap, + const CfArray2D& chargeMap, + const CfArray2D& peakMap, const GPUSettingsRec& calibration, float q, - const ChargePos& pos, - ChargePos* posBcast, + const CfChargePos& pos, + CfChargePos* posBcast, PackedCharge* buf, uint64_t* minimas, uint64_t* bigger, diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h index 71236bc317443..59196da11079b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h @@ -19,13 +19,13 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFNoiseSuppression : public GPUKernelTemplate { @@ -38,7 +38,7 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks)); struct GPUSharedMemory { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N]; }; @@ -57,9 +57,9 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void noiseSuppressionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const GPUSettingsRec&, const Array2D&, const Array2D&, const ChargePos*, const uint32_t, uint8_t*); + static GPUd() void noiseSuppressionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const GPUSettingsRec&, const CfArray2D&, const CfArray2D&, const CfChargePos*, const uint32_t, uint8_t*); - static GPUd() void updatePeaksImpl(int32_t, int32_t, int32_t, int32_t, const ChargePos*, const uint8_t*, const uint32_t, Array2D&); + static GPUd() void updatePeaksImpl(int32_t, int32_t, int32_t, int32_t, const CfChargePos*, const uint8_t*, const uint32_t, CfArray2D&); static GPUdi() void checkForMinima(const float, const float, const float, PackedCharge, int32_t, uint64_t*, uint64_t*); @@ -69,7 +69,7 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate static GPUdi() bool keepPeak(uint64_t, uint64_t); - static GPUd() void findMinimaAndPeaks(const Array2D&, const Array2D&, const GPUSettingsRec&, float, const ChargePos&, ChargePos*, PackedCharge*, uint64_t*, uint64_t*, uint64_t*); + static GPUd() void findMinimaAndPeaks(const CfArray2D&, const CfArray2D&, const GPUSettingsRec&, float, const CfChargePos&, CfChargePos*, PackedCharge*, uint64_t*, uint64_t*, uint64_t*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx index 1de922f716c14..6749ab8e8485e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.cxx @@ -14,7 +14,7 @@ #include "GPUTPCCFPeakFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "CfUtils.h" #include "PackedCharge.h" #include "TPCPadGainCalib.h" @@ -25,19 +25,19 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFPeakFinder::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer) { - Array2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); findPeaksImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, chargeMap, clusterer.mPpadIsNoisy, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions, clusterer.Param().rec, *clusterer.GetConstantMem()->calibObjects.tpcPadGain, clusterer.mPisPeak, isPeakMap); } GPUdii() bool GPUTPCCFPeakFinder::isPeak( GPUSharedMemory& smem, Charge q, - const ChargePos& pos, + const CfChargePos& pos, uint16_t N, - const Array2D& chargeMap, + const CfArray2D& chargeMap, const GPUSettingsRec& calib, - ChargePos* posBcast, + CfChargePos* posBcast, PackedCharge* buf) { uint16_t ll = get_local_id(0); @@ -91,21 +91,21 @@ GPUdii() bool GPUTPCCFPeakFinder::isPeak( } GPUd() void GPUTPCCFPeakFinder::findPeaksImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, - const Array2D& chargeMap, + const CfArray2D& chargeMap, const uint8_t* padHasLostBaseline, - const ChargePos* positions, + const CfChargePos* positions, SizeT digitnum, const GPUSettingsRec& calib, const TPCPadGainCalib& gainCorrection, // Only used for globalPad() function uint8_t* isPeakPredicate, - Array2D& peakMap) + CfArray2D& peakMap) { SizeT idx = get_global_id(0); // For certain configurations dummy work items are added, so the total // number of work items is dividable by 64. // These dummy items also compute the last digit but discard the result. - ChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))]; + CfChargePos pos = positions[CAMath::Min(idx, (SizeT)(digitnum - 1))]; Charge charge = pos.valid() ? chargeMap[pos].unpack() : Charge(0); bool hasLostBaseline = padHasLostBaseline[gainCorrection.globalPad(pos.row(), pos.pad())]; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h index ec17d98322239..e480518ddc9dd 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFPeakFinder.h @@ -19,20 +19,20 @@ #include "GPUConstantMem.h" #include "clusterFinderDefs.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" namespace o2::gpu { -struct ChargePos; +struct CfChargePos; class GPUTPCCFPeakFinder : public GPUKernelTemplate { public: static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFPeakFinder); struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_SEARCH_N]; }; @@ -51,9 +51,9 @@ class GPUTPCCFPeakFinder : public GPUKernelTemplate GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args); private: - static GPUd() void findPeaksImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const Array2D&, const uint8_t*, const ChargePos*, tpccf::SizeT, const GPUSettingsRec&, const TPCPadGainCalib&, uint8_t*, Array2D&); + static GPUd() void findPeaksImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D&, const uint8_t*, const CfChargePos*, tpccf::SizeT, const GPUSettingsRec&, const TPCPadGainCalib&, uint8_t*, CfArray2D&); - static GPUd() bool isPeak(GPUSharedMemory&, tpccf::Charge, const ChargePos&, uint16_t, const Array2D&, const GPUSettingsRec&, ChargePos*, PackedCharge*); + static GPUd() bool isPeak(GPUSharedMemory&, tpccf::Charge, const CfChargePos&, uint16_t, const CfArray2D&, const GPUSettingsRec&, CfChargePos*, PackedCharge*); }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index efed3643800b6..1da5a1158a8c2 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -15,7 +15,7 @@ #include "GPUTPCCFStreamCompaction.h" #include "GPUCommonAlgorithm.h" -#include "ChargePos.h" +#include "CfChargePos.h" #include "CfUtils.h" using namespace o2::gpu; @@ -92,7 +92,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread -GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, ChargePos* in, ChargePos* out) +GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out) { uint32_t nElems = CompactionElems(clusterer, stage); SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index 613c4ad9e5fa6..051391f12cc6d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -16,12 +16,14 @@ #include "GPUReconstruction.h" #include "GPUMemorySizeScalers.h" #include "GPUHostDataTypes.h" +#include "GPUSettings.h" +#include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/ZeroSuppression.h" #include "DataFormatsTPC/Digit.h" -#include "ChargePos.h" -#include "Array2D.h" +#include "CfChargePos.h" +#include "CfArray2D.h" using namespace o2::gpu; using namespace o2::tpc; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 3e9ea2c6f608b..96efe08be6dc6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -46,7 +46,7 @@ namespace o2::gpu struct GPUTPCClusterMCInterimArray; struct TPCPadGainCalib; -struct ChargePos; +struct CfChargePos; class GPUTPCClusterFinder : public GPUProcessor { @@ -98,9 +98,9 @@ class GPUTPCClusterFinder : public GPUProcessor MinMaxCN* mMinMaxCN = nullptr; uint8_t* mPpadIsNoisy = nullptr; tpc::Digit* mPdigits = nullptr; // input digits, only set if ZS is skipped - ChargePos* mPpositions = nullptr; - ChargePos* mPpeakPositions = nullptr; - ChargePos* mPfilteredPeakPositions = nullptr; + CfChargePos* mPpositions = nullptr; + CfChargePos* mPpeakPositions = nullptr; + CfChargePos* mPfilteredPeakPositions = nullptr; uint8_t* mPisPeak = nullptr; uint32_t* mPclusterPosInRow = nullptr; // store the index where the corresponding cluster is stored in a bucket. // Required when MC are enabled to write the mc data to the correct position. diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index a9fbc1b5f40e0..da30375149b7c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -14,8 +14,10 @@ #include "GPUTPCClusterFinder.h" #include "GPUReconstruction.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "DataFormatsTPC/Digit.h" +#include "DataFormatsTPC/ClusterNative.h" +#include "GPUSettings.h" using namespace o2::gpu; using namespace o2::gpu::tpccf; @@ -37,7 +39,7 @@ void GPUTPCClusterFinder::DumpDigits(std::ostream& out) void GPUTPCClusterFinder::DumpChargeMap(std::ostream& out, std::string_view title) { out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - Array2D map(mPchargeMap); + CfArray2D map(mPchargeMap); out << std::hex; @@ -71,7 +73,7 @@ void GPUTPCClusterFinder::DumpPeakMap(std::ostream& out, std::string_view title) { out << "\nClusterer - " << title << " - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - Array2D map(mPpeakMap); + CfArray2D map(mPpeakMap); out << std::hex; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index fe3202fe7b439..092af2ea393c5 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -15,6 +15,7 @@ #include "GPUReconstruction.h" #include "ML/3rdparty/GPUORTFloat16.h" #include "GPUTPCNNClusterizer.h" +#include "GPUSettings.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index da490b0f94d58..022642f9f142e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -15,7 +15,7 @@ #ifndef O2_GPUTPCNNCLUSTERIZER_H #define O2_GPUTPCNNCLUSTERIZER_H -#include "ChargePos.h" +#include "CfChargePos.h" #include "GPUProcessor.h" namespace o2::OrtDataType diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 2cf9ab2037007..512bc1d3bb09b 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -43,7 +43,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CPU_ONLY(MCLabelAccumulator labelAcc(clusterer)); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; o2::gpu::GPUTPCCFClusterizer::GPUSharedMemory smem_new; @@ -58,9 +58,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); @@ -75,7 +75,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - Array2D isPeakMap(clusterer.mPpeakMap); - ChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfArray2D isPeakMap(clusterer.mPpeakMap); + CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()); if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; - ChargePos tmp_pos = peak.delta(d); + CfChargePos tmp_pos = peak.delta(d); clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; } @@ -161,7 +161,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(chargeMap[peak].unpack()); int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; - ChargePos tmp_pos(row + r, pad + p, time + t); + CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { @@ -227,8 +227,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); @@ -322,8 +322,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - ChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); + CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index 27cfba2487aed..dc7f537c6c1e8 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -19,7 +19,7 @@ #include "GPUGeneralKernels.h" #include "GPUConstantMem.h" #include "GPUTPCClusterFinder.h" -#include "Array2D.h" +#include "CfArray2D.h" #include "PackedCharge.h" #include "GPUTPCNNClusterizer.h" @@ -47,7 +47,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer); struct GPUSharedMemory { // Regular cluster finder - ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; + CfChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE]; PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_BUILD_N]; uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE]; }; diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx index 19ef7aa9ecd0d..e58edae208115 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.cxx @@ -26,7 +26,7 @@ MCLabelAccumulator::MCLabelAccumulator(GPUTPCClusterFinder& clusterer) { } -void MCLabelAccumulator::collect(const ChargePos& pos, Charge q) +void MCLabelAccumulator::collect(const CfChargePos& pos, Charge q) { if (q == 0 || !engaged()) { return; diff --git a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h index 176fbea02befe..35c24bfeb5f18 100644 --- a/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h +++ b/GPU/GPUTracking/TPCClusterFinder/MCLabelAccumulator.h @@ -16,7 +16,7 @@ #define O2_GPU_MC_LABEL_ACCUMULATOR_H #include "clusterFinderDefs.h" -#include "Array2D.h" +#include "CfArray2D.h" #include #include @@ -44,14 +44,14 @@ class MCLabelAccumulator public: MCLabelAccumulator(GPUTPCClusterFinder&); - void collect(const ChargePos&, tpccf::Charge); + void collect(const CfChargePos&, tpccf::Charge); bool engaged() const { return mLabels != nullptr && mOutput != nullptr; } void commit(tpccf::Row, uint32_t, uint32_t); private: - Array2D mIndexMap; + CfArray2D mIndexMap; const o2::dataformats::ConstMCLabelContainerView* mLabels = nullptr; GPUTPCClusterMCInterimArray* mOutput = nullptr; diff --git a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx index 3d6b45c372ea0..899149d320bda 100644 --- a/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx +++ b/GPU/GPUTracking/TPCConvert/GPUTPCConvert.cxx @@ -17,6 +17,7 @@ #include "GPUTPCClusterData.h" #include "GPUReconstruction.h" #include "GPUO2DataTypes.h" +#include "GPUParam.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx index c633f10adae38..2f754d2416bc1 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTracker.cxx @@ -22,6 +22,7 @@ #include "GPUTRDTrackerDebug.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" +#include "GPUConstantMem.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index c2b74489e6250..5b0960919da15 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" #include "GPUTPCDef.h" @@ -204,7 +205,7 @@ int32_t GPUDisplay::DrawGLScene() int32_t retVal = 0; if (mChain) { mIOPtrs = &mChain->mIOPtrs; - mCalib = &mChain->calib(); + mCalib = &mChain->GetCalib(); } if (!mIOPtrs) { mNCollissions = 0; diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index bb270cda23565..dbd90020698b2 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -15,12 +15,10 @@ #ifndef GPUDISPLAY_H #define GPUDISPLAY_H -#include "GPUSettings.h" #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" #include "GPUDisplayInterface.h" -#include "GPUChainTracking.h" #include "../utils/vecpod.h" #include "../utils/qsem.h" @@ -34,6 +32,7 @@ namespace o2::gpu class GPUTPCTracker; struct GPUParam; class GPUQA; +class GPUTRDGeometry; class GPUDisplay : public GPUDisplayInterface { @@ -77,7 +76,7 @@ class GPUDisplay : public GPUDisplayInterface int32_t& drawTextFontSize() { return mDrawTextFontSize; } private: - static constexpr int32_t NSECTORS = GPUChainTracking::NSECTORS; + static constexpr int32_t NSECTORS = GPUCA_NSECTORS; static constexpr float GL_SCALE_FACTOR = (1.f / 100.f); static constexpr const int32_t N_POINTS_TYPE = 15; @@ -157,16 +156,7 @@ class GPUDisplay : public GPUDisplayInterface void insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last); void insertVertexList(int32_t iSector, size_t first, size_t last); template - void SetInfo(Args... args) - { -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wformat-security" -#pragma GCC diagnostic ignored "-Wformat-truncation" - snprintf(mInfoText2, 1024, args...); -#pragma GCC diagnostic pop - GPUInfo("%s", mInfoText2); - mInfoText2Timer.ResetStart(); - } + void SetInfo(Args... args); void PrintGLHelpText(float colorValue); void calcXYZ(const float*); void mAnimationCloseAngle(float& newangle, float lastAngle); diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx index ded8803801fb7..98d2593c27950 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackend.cxx @@ -34,6 +34,7 @@ #endif #include "GPUDisplay.h" +#include "GPULogging.h" #include using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx index 3ee3384c8e118..25ae5e1f8055f 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendOpenGL.cxx @@ -27,6 +27,8 @@ #include "GPUDisplayBackendOpenGL.h" #include "shaders/GPUDisplayShaders.h" #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUParam.h" #define OPENGL_EMULATE_MULTI_DRAW 0 diff --git a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx index 2324c194d04b9..93c19356ac062 100644 --- a/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx +++ b/GPU/GPUTracking/display/backend/GPUDisplayBackendVulkan.cxx @@ -19,6 +19,8 @@ VULKAN_HPP_DEFAULT_DISPATCH_LOADER_DYNAMIC_STORAGE #include "GPUCommonDef.h" #include "GPUDisplayBackendVulkan.h" #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUParam.h" #include diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx index 590d8648eb5bb..22970c3228815 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontend.cxx @@ -30,6 +30,9 @@ #include "GPUDisplayFrontendWayland.h" #endif +#include "GPULogging.h" +#include + #ifdef GPUCA_BUILD_EVENT_DISPLAY_QT #include "GPUDisplayGUIWrapper.h" #else diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx index ad3b620ba8f55..d0aae2ffaad02 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayFrontendWayland.cxx @@ -18,6 +18,7 @@ #include "GPUDisplayGUIWrapper.h" #include "GPUDisplay.h" #include "GPULogging.h" +#include "GPUParam.h" #include #include #include diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc b/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc new file mode 100644 index 0000000000000..b6ac78b31f315 --- /dev/null +++ b/GPU/GPUTracking/display/frontend/GPUDisplayInfo.inc @@ -0,0 +1,36 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUDisplayInfo.inc +/// \author David Rohr + +#ifndef GPUDISPLAYINFO_INC_H +#define GPUDISPLAYINFO_INC_H + +#include "GPUDisplay.h" +#include "GPULogging.h" + +namespace o2::gpu +{ +template +void GPUDisplay::SetInfo(Args... args) +{ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-security" +#pragma GCC diagnostic ignored "-Wformat-truncation" + snprintf(mInfoText2, 1024, args...); +#pragma GCC diagnostic pop + GPUInfo("%s", mInfoText2); + mInfoText2Timer.ResetStart(); +} +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx index 32ff6c73e110c..54258857a244c 100644 --- a/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx +++ b/GPU/GPUTracking/display/frontend/GPUDisplayKeys.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx index 6c0595b073cd0..cc9ec2e766c4d 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayAnimation.cxx @@ -13,6 +13,8 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" +#include "GPUCommonMath.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx index b04c93ab8496e..ef94628baeb38 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayBackendOpenGLMagneticField.cxx @@ -31,6 +31,7 @@ #include "backend/GPUDisplayBackendOpenGL.h" #include "shaders/GPUDisplayShaders.h" #include "GPUDisplay.h" +#include "GPULogging.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx index ca9fd6be01703..9d188d03c7b69 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayHelpers.cxx @@ -13,6 +13,9 @@ /// \author David Rohr #include "GPUDisplay.h" +#include "GPUChainTracking.h" +#include "GPULogging.h" +#include "GPUParam.h" #ifndef _WIN32 #include "bitmapfile.h" diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx index 644995929acb7..4dacaec2fbca5 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayInterpolation.cxx @@ -14,6 +14,7 @@ #include #include "GPUDisplay.h" +#include "GPULogging.h" using namespace o2::gpu; diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx index ee50f32e3c1ac..d31ee206f35e2 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayLoader.cxx @@ -16,6 +16,7 @@ #include "frontend/GPUDisplayFrontend.h" #include "GPUDisplayInterface.h" +#include #include #include diff --git a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx index dbeefc7bf9b07..07a05e585d422 100644 --- a/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx +++ b/GPU/GPUTracking/display/helpers/GPUDisplayROOT.cxx @@ -17,6 +17,9 @@ #endif #include "GPUDisplay.h" +#include "GPULogging.h" +#include "GPUConstantMem.h" +#include "GPUChainTracking.h" using namespace o2::gpu; #ifndef GPUCA_NO_ROOT diff --git a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx index 24668c576d795..e1d63ea1a21e4 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayDraw.cxx @@ -28,6 +28,7 @@ #include "GPUTPCMCInfo.h" #include "GPUParam.inc" #include "GPUCommonMath.h" +#include "GPUChainTracking.h" #include @@ -43,7 +44,7 @@ using namespace o2::gpu; #define GET_CID(sector, i) (mParam->par.earlyTpcTransform ? mIOPtrs->clusterData[sector][i].id : (mIOPtrs->clustersNative->clusterOffset[sector][0] + i)) const GPUTRDGeometry* GPUDisplay::trdGeometry() { return (GPUTRDGeometry*)mCalib->trdGeometry; } -const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetTPCSectorTrackers()[iSector]; } +const GPUTPCTracker& GPUDisplay::sectorTracker(int32_t iSector) { return mChain->GetProcessors()->tpcTrackers[iSector]; } inline void GPUDisplay::insertVertexList(std::pair*, vecpod*>& vBuf, size_t first, size_t last) { diff --git a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx index 0a780732273db..ab4c0abd7b60e 100644 --- a/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx +++ b/GPU/GPUTracking/display/render/GPUDisplayImportEvent.cxx @@ -17,6 +17,7 @@ #endif #include "GPUDisplay.h" +#include "frontend/GPUDisplayInfo.inc" #include "GPUO2DataTypes.h" #include "GPUTPCClusterData.h" #include "GPUTPCConvertImpl.h" diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 7e3ddf868af2a..202ea47d1f3bf 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -128,7 +128,7 @@ o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUS o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage ChargePos* in ChargePos* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) diff --git a/GPU/GPUTracking/qa/GPUQA.cxx b/GPU/GPUTracking/qa/GPUQA.cxx index ba7aeb3800a5e..6a2623fb6e09d 100644 --- a/GPU/GPUTracking/qa/GPUQA.cxx +++ b/GPU/GPUTracking/qa/GPUQA.cxx @@ -38,6 +38,7 @@ #include "GPUTPCDef.h" #include "GPUTPCTrackingData.h" #include "GPUChainTracking.h" +#include "GPUChainTrackingGetters.inc" #include "GPUTPCTrack.h" #include "GPUTPCTracker.h" #include "GPUTPCGMMergedTrack.h" @@ -1702,7 +1703,7 @@ void GPUQA::RunQA(bool matchOnly, const std::vector* tracksEx } } - uint32_t nCl = clNative ? clNative->nClustersTotal : mTracking->GetTPCMerger().NMaxClusters(); + uint32_t nCl = clNative ? clNative->nClustersTotal : mTracking->GetProcessors()->tpcMerger.NMaxClusters(); mClusterCounts.nTotal += nCl; if (mQATasks & taskClusterCounts) { for (uint32_t i = 0; i < nCl; i++) { diff --git a/GPU/GPUTracking/qa/genEvents.cxx b/GPU/GPUTracking/qa/genEvents.cxx index 2e1bc1c5c64b2..9c2220f9ef748 100644 --- a/GPU/GPUTracking/qa/genEvents.cxx +++ b/GPU/GPUTracking/qa/genEvents.cxx @@ -37,6 +37,7 @@ #include "GPUTPCGMPropagator.h" #include "GPUTPCGMMerger.h" #include "GPUChainTracking.h" +#include "GPUConstantMem.h" #include "../utils/qconfig.h" @@ -169,7 +170,7 @@ int32_t genEvents::GenerateEvent(const GPUParam& param, char* filename) GPUTPCGMPropagator prop; { prop.SetToyMCEventsFlag(kTRUE); - const GPUTPCGMMerger& merger = mRec->GetTPCMerger(); + const GPUTPCGMMerger& merger = mRec->GetProcessors()->tpcMerger; prop.SetPolynomialField(&merger.Param().polynomialField); } From 73a093527f4973ed92d43e7a8c893cbd8909af63 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 20:57:39 +0200 Subject: [PATCH 0422/1914] GPU: Move kernel specializations to dedicated file --- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 8 +- ...GPUReconstructionCUDAKernelsSpecialize.inc | 138 ++++++++++++++++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- .../opencl/GPUReconstructionOCLKernels.cxx | 7 +- .../GPUReconstructionOCLKernelsSpecialize.inc | 20 +++ GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 124 +--------------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 15 -- 7 files changed, 164 insertions(+), 150 deletions(-) create mode 100644 GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc create mode 100644 GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 758ab1b0e36c3..11a62bcec2318 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -22,16 +22,12 @@ using namespace o2::gpu; #include "GPUReconstructionIncludesDeviceAll.h" +#include "GPUReconstructionCUDAKernelsSpecialize.inc" + #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUConstantMemBuffer; } #endif -template <> -inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) -{ - GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); -} - template inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc new file mode 100644 index 0000000000000..899c2e240cd94 --- /dev/null +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -0,0 +1,138 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionCUDAKernelsSpecialize.inc +/// \author David Rohr + +#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) + +namespace o2::gpu::internal +{ +namespace // anonymous +{ +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); + } +}; + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return (a.y > b.y); + } +}; + +} // anonymous namespace +} // namespace o2::gpu::internal + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +{ + if (cmpMax) { + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); + } else { + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); + } +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); +} + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +{ + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); +} +#endif // GPUCA_SPECIALIZE_THRUST_SORTS + +template <> +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); +} diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d7adb222d547b..d29a6afb60899 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}") if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify) file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR}) - set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) + set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDAKernelsSpecialize.inc GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu) set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h) set(HIP_SOURCES "") foreach(file ${GPUCA_HIP_FILE_LIST}) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 29b71017e9f73..72c68428149dd 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -15,12 +15,7 @@ #include "GPUReconstructionOCLIncludesHost.h" #include "GPUReconstructionKernelIncludes.h" -template <> -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) -{ - cl_int4 val0 = {0, 0, 0, 0}; - GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); -} +#include "GPUReconstructionOCLKernelsSpecialize.inc" template inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc new file mode 100644 index 0000000000000..1b860e47a4243 --- /dev/null +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc @@ -0,0 +1,20 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionOCLKernelsSpecialize.inc +/// \author David Rohr + +template <> +inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +{ + cl_int4 val0 = {0, 0, 0, 0}; + GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); +} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 4f960a8e1ec76..b6241ad36b5de 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -736,46 +736,15 @@ GPUd() void GPUTPCGMMerger::MergeBorderTracks<1>(int32_t nBlocks, int32_t nThrea #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize MergeBorderTracks<3> -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) -{ - if (cmpMax) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); - } else { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMin()); - } -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize MergeBorderTracks<3> - template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMax < b.fMax; }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return a.fMin < b.fMin; }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); } } #endif @@ -1783,74 +1752,6 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr } } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt - GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS @@ -2050,27 +1951,6 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); - } -}; -} // anonymous namespace -} // namespace o2::gpu::internal - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMMergerSortTracks and GPUTPCGMMergerSortTracksQPt - GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const MergeLooperParam* params = mLooperCandidates; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 6640b556c3011..ea219a02a1887 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -93,21 +93,6 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, #endif } -#if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) // Specialize GPUTPCGMO2Output::Thread -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return (a.y > b.y); - } -}; - -template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) -{ - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); -} -#endif // GPUCA_SPECIALIZE_THRUST_SORTS - Specialize GPUTPCGMO2Output::Thread - template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { From 29e2526be7b62961793f538b1c00d7f7f1618ccb Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 10:30:29 +0200 Subject: [PATCH 0423/1914] GPU: Add protections not to do invalid memory allocations while volatile memory is allocated --- GPU/GPUTracking/Base/GPUMemoryResource.h | 34 +++++------ GPU/GPUTracking/Base/GPUReconstruction.cxx | 56 ++++++++++++------- GPU/GPUTracking/Base/GPUReconstruction.h | 18 +++--- GPU/GPUTracking/Global/GPUChainITS.cxx | 4 +- .../Global/GPUChainTrackingCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 2 +- .../Global/GPUChainTrackingMerger.cxx | 2 +- 7 files changed, 69 insertions(+), 49 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUMemoryResource.h b/GPU/GPUTracking/Base/GPUMemoryResource.h index 06e350db0bfc7..947bcac504733 100644 --- a/GPU/GPUTracking/Base/GPUMemoryResource.h +++ b/GPU/GPUTracking/Base/GPUMemoryResource.h @@ -56,24 +56,24 @@ class GPUMemoryResource public: enum MemoryType { - MEMORY_HOST = 1, - MEMORY_GPU = 2, - MEMORY_INPUT_FLAG = 4, - MEMORY_INPUT = 7, - MEMORY_OUTPUT_FLAG = 8, - MEMORY_OUTPUT = 11, - MEMORY_INOUT = 15, - MEMORY_SCRATCH = 16, - MEMORY_SCRATCH_HOST = 17, - MEMORY_EXTERNAL = 32, - MEMORY_PERMANENT = 64, - MEMORY_CUSTOM = 128, - MEMORY_CUSTOM_TRANSFER = 256, - MEMORY_STACK = 512 + MEMORY_HOST = 1, // Memory allocated on host (irrespective of other flags) + MEMORY_GPU = 2, // Memory allocated on GPU (irrespective of other flags) + MEMORY_INPUT_FLAG = 4, // Flag to signal this memory is copied to GPU with TransferMemoryResourcesToGPU, and alike + MEMORY_INPUT = 7, // Input data for GPU has the MEMORY_INPUT_FLAG flat and is allocated on host and GPU + MEMORY_OUTPUT_FLAG = 8, // Flag to signal this memory is copied to Host with TransferMemoryResourcesToHost, and alike + MEMORY_OUTPUT = 11, // Output data for GPU has the MEMORY_OUTPUT_FLAG flat and is allocated on host and GPU + MEMORY_INOUT = 15, // Combination if MEMORY_INPUT and MEMORY_OUTPUT + MEMORY_SCRATCH = 16, // Scratch memory, is allocated only on GPU by default if running on GPU, only on host otherwise, if MEMORY_HOST and MEMORY_GPU flags not set. + MEMORY_SCRATCH_HOST = 17, // Scratch memory only on host + MEMORY_EXTERNAL = 32, // Special flag to signal that memory on host shall not be allocated, but will be provided externally and manually + MEMORY_PERMANENT = 64, // Permanent memory, registered once with AllocateRegisteredPermanentMemory, not per time frame. Only for small sizes! + MEMORY_CUSTOM = 128, // Memory is not allocated automatically with AllocateRegisteredMemory(GPUProcessor), but must be allocated manually via AllocateRegisteredMemory(memoryId) + MEMORY_CUSTOM_TRANSFER = 256, // Memory is not transfered automatically with TransferMemoryResourcesTo, but must be transferred manually with TransferMemoryTo...(memoryId) + MEMORY_STACK = 512 // Use memory from non-persistent stack at the end of the global memory region. Not persistent for full TF. Use PushNonPersistentMemory and PopNonPersistentMemory to release memory from the stack }; - enum AllocationType { ALLOCATION_AUTO = 0, - ALLOCATION_INDIVIDUAL = 1, - ALLOCATION_GLOBAL = 2 }; + enum AllocationType { ALLOCATION_AUTO = 0, // --> GLOBAL if GPU is used, INDIVIDUAL otherwise + ALLOCATION_INDIVIDUAL = 1, // Individual memory allocations with malloc (host only) + ALLOCATION_GLOBAL = 2 }; // Allocate memory blocks from large preallocated memory range with internal allocator (host and GPU) GPUMemoryResource(GPUProcessor* proc, void* (GPUProcessor::*setPtr)(void*), MemoryType type, const char* name = "") : mProcessor(proc), mPtr(nullptr), mPtrDevice(nullptr), mSetPointers(setPtr), mName(name), mSize(0), mOverrideSize(0), mReuse(-1), mType(type) { diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c79c743e96ce5..ab2210e5dd555 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -538,6 +538,10 @@ size_t GPUReconstruction::AllocateRegisteredPermanentMemory() if (GetProcessingSettings().debugLevel >= 5) { GPUInfo("Allocating Permanent Memory"); } + if (mVolatileMemoryStart) { + GPUError("Must not allocate permanent memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } int32_t total = 0; for (uint32_t i = 0; i < mMemoryResources.size(); i++) { if ((mMemoryResources[i].mType & GPUMemoryResource::MEMORY_PERMANENT) && mMemoryResources[i].mPtr == nullptr) { @@ -669,6 +673,10 @@ void GPUReconstruction::AllocateRegisteredMemoryInternal(GPUMemoryResource* res, GPUError("Device Processor not set (%s)", res->mName); throw std::bad_alloc(); } + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && !(res->mType & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate non-stacked device memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } size_t size = AllocateRegisteredMemoryHelper(res, res->mPtrDevice, recPool->mDeviceMemoryPool, recPool->mDeviceMemoryBase, recPool->mDeviceMemorySize, &GPUMemoryResource::SetDevicePointers, recPool->mDeviceMemoryPoolEnd, " gpu"); if (!(res->mType & GPUMemoryResource::MEMORY_HOST) || (res->mType & GPUMemoryResource::MEMORY_EXTERNAL)) { @@ -702,7 +710,7 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro return res->mReuse >= 0 ? 0 : res->mSize; } -void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) +void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) { if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); @@ -711,6 +719,10 @@ void* GPUReconstruction::AllocateUnmanagedMemory(size_t size, int32_t type) mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); } else { + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate direct memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool; void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; char* retVal; @@ -745,7 +757,6 @@ void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) if (GetProcessingSettings().allocDebugLevel >= 2) { std::cout << "Allocated (volatile GPU): " << size << " - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; } - return retVal; } @@ -758,6 +769,30 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device) return GPUProcessor::alignPointer(mVolatileChunks.back().get()); } +void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile() +{ + mDeviceMemoryAsVolatile = true; + AllocateVolatileDeviceMemory(0); +} + +void GPUReconstruction::ReturnVolatileDeviceMemory() +{ + mDeviceMemoryAsVolatile = false; + if (mVolatileMemoryStart) { + mDeviceMemoryPool = mVolatileMemoryStart; + mVolatileMemoryStart = nullptr; + } + if (GetProcessingSettings().allocDebugLevel >= 2) { + std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; + } +} + +void GPUReconstruction::ReturnVolatileMemory() +{ + ReturnVolatileDeviceMemory(); + mVolatileChunks.clear(); +} + void GPUReconstruction::ResetRegisteredMemoryPointers(GPUProcessor* proc) { for (uint32_t i = 0; i < mMemoryResources.size(); i++) { @@ -814,23 +849,6 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) res->mPtrDevice = nullptr; } -void GPUReconstruction::ReturnVolatileDeviceMemory() -{ - if (mVolatileMemoryStart) { - mDeviceMemoryPool = mVolatileMemoryStart; - mVolatileMemoryStart = nullptr; - } - if (GetProcessingSettings().allocDebugLevel >= 2) { - std::cout << "Freed (volatile GPU) - available: " << ptrDiff(mDeviceMemoryPoolEnd, mDeviceMemoryPool) << "\n"; - } -} - -void GPUReconstruction::ReturnVolatileMemory() -{ - ReturnVolatileDeviceMemory(); - mVolatileChunks.clear(); -} - void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) { mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index b6256f7f8ad82..396a007761fb7 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -166,9 +166,10 @@ class GPUReconstruction size_t AllocateRegisteredMemory(int16_t res, GPUOutputControl* control = nullptr); void AllocateRegisteredForeignMemory(int16_t res, GPUReconstruction* rec, GPUOutputControl* control = nullptr); - void* AllocateUnmanagedMemory(size_t size, int32_t type); + void* AllocateDirectMemory(size_t size, int32_t type); void* AllocateVolatileDeviceMemory(size_t size); void* AllocateVolatileMemory(size_t size, bool device); + void MakeFutureDeviceMemoryAllocationsVolatile(); void FreeRegisteredMemory(GPUProcessor* proc, bool freeCustom = false, bool freePermanent = false); void FreeRegisteredMemory(int16_t res); void ClearAllocatedMemory(bool clearOutputs = true); @@ -326,14 +327,15 @@ class GPUReconstruction void* mHostMemoryPoolBlocked = nullptr; // Ptr to end of pool size_t mHostMemorySize = 0; // Size of host memory buffer size_t mHostMemoryUsedMax = 0; // Maximum host memory size used over time - void* mDeviceMemoryBase = nullptr; // - void* mDeviceMemoryPermanent = nullptr; // - void* mDeviceMemoryPool = nullptr; // - void* mDeviceMemoryPoolEnd = nullptr; // - void* mDeviceMemoryPoolBlocked = nullptr; // - size_t mDeviceMemorySize = 0; // + void* mDeviceMemoryBase = nullptr; // Same for device ... + void* mDeviceMemoryPermanent = nullptr; // ... + void* mDeviceMemoryPool = nullptr; // ... + void* mDeviceMemoryPoolEnd = nullptr; // ... + void* mDeviceMemoryPoolBlocked = nullptr; // ... + size_t mDeviceMemorySize = 0; // ... + size_t mDeviceMemoryUsedMax = 0; // ... void* mVolatileMemoryStart = nullptr; // Ptr to beginning of temporary volatile memory allocation, nullptr if uninitialized - size_t mDeviceMemoryUsedMax = 0; // + bool mDeviceMemoryAsVolatile = false; // Make device memory allocations volatile std::unordered_set mRegisteredMemoryPtrs; // List of pointers registered for GPU diff --git a/GPU/GPUTracking/Global/GPUChainITS.cxx b/GPU/GPUTracking/Global/GPUChainITS.cxx index eeead79b1840b..5d36dc63ca85d 100644 --- a/GPU/GPUTracking/Global/GPUChainITS.cxx +++ b/GPU/GPUTracking/Global/GPUChainITS.cxx @@ -28,7 +28,7 @@ class GPUFrameworkExternalAllocator final : public o2::its::ExternalAllocator public: void* allocate(size_t size) override { - return mFWReco->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); + return mFWReco->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); } void setReconstructionFramework(o2::gpu::GPUReconstruction* fwr) { mFWReco = fwr; } @@ -86,7 +86,7 @@ o2::its::TimeFrame* GPUChainITS::GetITSTimeframe() } #if !defined(GPUCA_STANDALONE) if (mITSTimeFrame->mIsGPU) { - auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateUnmanagedMemory(size, GPUMemoryResource::MEMORY_GPU); }; + auto doFWExtAlloc = [this](size_t size) -> void* { return rec()->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU); }; mFrameworkAllocator.reset(new o2::its::GPUFrameworkExternalAllocator); mFrameworkAllocator->setReconstructionFramework(rec()); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index fc07a91004c5f..24c74a661f18e 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -43,7 +43,7 @@ int32_t GPUChainTracking::RunTPCCompression() } if (gatherMode == 3) { - mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile + mRec->MakeFutureDeviceMemoryAllocationsVolatile(); } SetupGPUProcessor(&Compressor, true); new (Compressor.mMemory) GPUTPCCompression::memory; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 5d05cd6a97776..53bdfbadd4b25 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -34,7 +34,7 @@ static inline uint32_t RGB(uint8_t r, uint8_t g, uint8_t b) { return (uint32_t)r int32_t GPUChainTracking::PrepareProfile() { #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE - char* tmpMem = (char*)mRec->AllocateUnmanagedMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU); + char* tmpMem = (char*)mRec->AllocateDirectMemory(PROFILE_MAX_SIZE, GPUMemoryResource::MEMORY_GPU); processorsShadow()->tpcTrackers[0].mStageAtSync = tmpMem; runKernel({{BlockCount(), ThreadCount(), -1}}, tmpMem, PROFILE_MAX_SIZE); #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 163f08634ef86..84835a1695071 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -297,7 +297,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) SynchronizeEventAndRelease(mEvents->single, doGPU); if (GetProcessingSettings().clearO2OutputFromGPU) { - mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile + mRec->MakeFutureDeviceMemoryAllocationsVolatile(); } AllocateRegisteredMemory(Merger.MemoryResOutputO2(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2)]); AllocateRegisteredMemory(Merger.MemoryResOutputO2Clus(), mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcTracksO2ClusRefs)]); From 52c23287f6abc479a29f9aa2fc1acebf64e58f29 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 23:01:33 +0200 Subject: [PATCH 0424/1914] GPU: Get rid of backendInternal additional wrapper --- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 18 +----- GPU/GPUTracking/Base/GPUReconstructionCPU.h | 5 +- .../GPUReconstructionProcessingKernels.inc | 9 ++- .../Base/cuda/GPUReconstructionCUDA.cu | 3 +- .../Base/cuda/GPUReconstructionCUDA.h | 7 +-- .../Base/cuda/GPUReconstructionCUDAKernels.cu | 60 +++++++++---------- ...GPUReconstructionCUDAKernelsSpecialize.inc | 12 ++-- .../Base/opencl/GPUReconstructionOCL.cxx | 2 +- .../Base/opencl/GPUReconstructionOCL.h | 4 +- .../opencl/GPUReconstructionOCLKernels.cxx | 14 +---- .../GPUReconstructionOCLKernelsSpecialize.inc | 2 +- 11 files changed, 59 insertions(+), 77 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 2453ce4a2328f..5f80a56e9e64e 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -54,7 +54,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU() } template -inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCPU::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { auto& x = _xyz.x; auto& y = _xyz.y; @@ -88,7 +88,7 @@ inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& } template <> -inline void GPUReconstructionCPU::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionCPU::runKernelBackend(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { int32_t nThreads = std::max(1, std::min(size / (16 * 1024 * 1024), getNKernelHostThreads(true))); if (nThreads > 1) { @@ -108,17 +108,6 @@ inline void GPUReconstructionCPU::runKernelBackendInternal(con } } -template -void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args) -{ -#pragma GCC diagnostic push -#if defined(__clang__) -#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below -#endif - std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); -#pragma GCC diagnostic push -} - template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu) { @@ -137,8 +126,7 @@ GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelPrope return ret; } -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - template void GPUReconstructionCPU::runKernelBackend(const krnlSetupArgs& args); \ +#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ template GPUReconstructionProcessing::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu); #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.h b/GPU/GPUTracking/Base/GPUReconstructionCPU.h index d0d8b05c4af0e..d93d1335d45c5 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.h +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.h @@ -40,7 +40,7 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface template krnlProperties getKernelProperties(int gpu = -1); template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false); int32_t GPUStuck() { return mGPUStuck; } @@ -59,9 +59,6 @@ class GPUReconstructionCPU : public GPUReconstructionProcessing::KernelInterface GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing::KernelInterface(cfg) {} - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override { return 0; } int32_t unregisterMemoryForGPU_internal(const void* ptr) override { return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc index 49d02515372b8..b303cb7c8d39c 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc +++ b/GPU/GPUTracking/Base/GPUReconstructionProcessingKernels.inc @@ -21,6 +21,11 @@ namespace o2::gpu { +#pragma GCC diagnostic push +#if defined(__clang__) +#pragma GCC diagnostic ignored "-Wunused-lambda-capture" // this is not alway captured below +#endif + template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args) { @@ -28,7 +33,7 @@ void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \ case x_num: { \ const auto& args2 = *(const krnlSetupArgs*)args; \ - ((T*)this)->template runKernelBackend(args2); \ + std::apply([this, &args2](auto&... vals) { ((T*)this)->template runKernelBackend(args2.s, vals...); }, args2.v); \ break; \ } #include "GPUReconstructionKernelList.h" @@ -36,6 +41,8 @@ void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const } // clang-format on } +#pragma GCC diagnostic push + } // namespace o2::gpu #endif // GPURECONSTRUCTIONPROCESSINGKERNELS_H diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 970b331ea99fb..c40c607396f3f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -24,7 +24,8 @@ #include "GPUParamRTC.h" #include "GPUReconstructionCUDAHelpers.inc" #include "GPUDefParametersLoad.inc" -#include "GPUReconstructionProcessingKernels.inc" +#include "GPUReconstructionKernelIncludes.h" +#include "GPUConstantMem.h" #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 #include "utils/qGetLdBinarySymbols.h" diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index ed75100dfe351..36dcdffb1c6d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -45,7 +45,9 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override; template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); + template + void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args); template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); @@ -53,9 +55,6 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac protected: GPUReconstructionCUDAInternals* mInternals; - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); - int32_t InitDevice_Runtime() override; int32_t ExitDevice_Runtime() override; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 11a62bcec2318..3267e1d5c67f6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -23,13 +23,15 @@ using namespace o2::gpu; #include "GPUReconstructionIncludesDeviceAll.h" #include "GPUReconstructionCUDAKernelsSpecialize.inc" +#include "GPUReconstructionProcessingKernels.inc" +template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args); #if defined(__HIPCC__) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) __global__ void gGPUConstantMemBuffer_dummy(int32_t* p) { *p = *(int32_t*)&gGPUConstantMemBuffer; } #endif template -inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionCUDA::runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args) { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 if (!GetProcessingSettings().rtc.enable) { @@ -52,18 +54,18 @@ inline void GPUReconstructionCUDA::runKernelBackendInternal(const krnlSetupTime& } template -void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args) +inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { - auto& x = args.s.x; - auto& z = args.s.z; + auto& x = _xyz.x; + auto& z = _xyz.z; if (z.evList) { for (int32_t k = 0; k < z.nEvents; k++) { GPUChkErr(cudaStreamWaitEvent(mInternals->Streams[x.stream], ((cudaEvent_t*)z.evList)[k], 0)); } } { - GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, args.s, this); - std::apply([this, &args](auto&... vals) { this->runKernelBackendInternal(args.s, vals...); }, args.v); + GPUDebugTiming timer(GetProcessingSettings().deviceTimers && GetProcessingSettings().debugLevel > 0, (deviceEvent*)mDebugEvents, mInternals->Streams, _xyz, this); + runKernelBackendTimed(_xyz, args...); } GPUChkErr(cudaGetLastError()); if (z.ev) { @@ -74,31 +76,29 @@ void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& #undef GPUCA_KRNL_REG #define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args))) -#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1 // ---------- COMPILE_MODE = perkernel ---------- -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); -#else // ---------- COMPILE_MODE = onefile | rdc ---------- -#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 -#define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc -#endif - -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ - template void GPUReconstructionCUDA::runKernelBackend(const krnlSetupArgs& args); - -#ifndef __HIPCC__ // CUDA version -#define GPUCA_KRNL_CALL(x_class, ...) \ - GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); -#else // HIP version -#undef GPUCA_KRNL_CUSTOM -#define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) -#define GPUCA_KRNL_CALL(x_class, ...) \ - hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); -#endif // __HIPCC__ - +// clang-format off +#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE != 1 // ---------- COMPILE_MODE = perkernel ---------- + #if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2 + #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc + #endif + + #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) + + #ifndef __HIPCC__ // CUDA version + #define GPUCA_KRNL_CALL(x_class, ...) \ + GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); + #else // HIP version + #undef GPUCA_KRNL_CUSTOM + #define GPUCA_KRNL_CUSTOM(args) GPUCA_M_STRIP(args) + #define GPUCA_KRNL_CALL(x_class, ...) \ + hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); + #endif // __HIPCC__ + + #include "GPUReconstructionKernelList.h" + #undef GPUCA_KRNL #endif // ---------- COMPILE_MODE = onefile | rdc ---------- - -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL +// clang-format on #ifndef GPUCA_NO_CONSTANT_MEMORY static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstSymbol([]() { diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 899c2e240cd94..8796f063abdc5 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -97,7 +97,7 @@ struct GPUTPCGMO2OutputSort_comp { } // namespace o2::gpu::internal template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { if (cmpMax) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, range, N, MergeBorderTracks_compMax()); @@ -107,32 +107,32 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInter } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.LooperCandidates(), processors()->tpcMerger.Memory()->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSortO2(), processors()->tpcMerger.NOutputTracksTPCO2(), GPUTPCGMO2OutputSort_comp()); } #endif // GPUCA_SPECIALIZE_THRUST_SORTS template <> -inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { GPUChkErr(cudaMemsetAsync(ptr, 0, size, mInternals->Streams[_xyz.x.stream])); } diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 28c809dd4a09a..949dd6195b262 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -13,8 +13,8 @@ /// \author David Rohr #include "GPUReconstructionOCLIncludesHost.h" -#include "GPUReconstructionProcessingKernels.inc" #include "GPUDefParametersLoad.inc" +#include "GPUConstantMem.h" #include diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h index 091bc0409630d..958d5186bf41a 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h @@ -34,7 +34,7 @@ class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface ~GPUReconstructionOCL() override; template - void runKernelBackend(const krnlSetupArgs& args); + void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); protected: int32_t InitDevice_Runtime() override; @@ -57,8 +57,6 @@ class GPUReconstructionOCL : public GPUReconstructionProcessing::KernelInterface template int32_t AddKernel(); - template - void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args); GPUReconstructionOCLInternals* mInternals; float mOclVersion; diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx index 72c68428149dd..655df5404276b 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernels.cxx @@ -16,9 +16,11 @@ #include "GPUReconstructionKernelIncludes.h" #include "GPUReconstructionOCLKernelsSpecialize.inc" +#include "GPUReconstructionProcessingKernels.inc" +template void GPUReconstructionProcessing::KernelInterface::runKernelVirtual(const int num, const void* args); template -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args) +inline void GPUReconstructionOCL::runKernelBackend(const krnlSetupTime& _xyz, const Args&... args) { cl_kernel k = getKernelObject(); auto& x = _xyz.x; @@ -48,12 +50,6 @@ inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& } } -template -void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args) -{ - std::apply([this, &args](auto&... vals) { runKernelBackendInternal(args.s, vals...); }, args.v); -} - template int32_t GPUReconstructionOCL::AddKernel() { @@ -86,7 +82,3 @@ int32_t GPUReconstructionOCL::AddKernels() #undef GPUCA_KRNL return 0; } - -#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) template void GPUReconstructionOCL::runKernelBackend(const krnlSetupArgs& args); -#include "GPUReconstructionKernelList.h" -#undef GPUCA_KRNL diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc index 1b860e47a4243..d5b0338aecbd9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCLKernelsSpecialize.inc @@ -13,7 +13,7 @@ /// \author David Rohr template <> -inline void GPUReconstructionOCL::runKernelBackendInternal(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) +inline void GPUReconstructionOCL::runKernelBackend(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size) { cl_int4 val0 = {0, 0, 0, 0}; GPUChkErr(clEnqueueFillBuffer(mInternals->command_queue[_xyz.x.stream], mInternals->mem_gpu, &val0, sizeof(val0), (char*)ptr - (char*)mDeviceMemoryBase, (size + sizeof(val0) - 1) & ~(sizeof(val0) - 1), _xyz.z.evList == nullptr ? 0 : _xyz.z.nEvents, _xyz.z.evList->getEventList(), _xyz.z.ev->getEventList())); From 255597cf52515da16637a9a37bb3b70de1806d80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Sun, 20 Apr 2025 23:47:46 +0200 Subject: [PATCH 0425/1914] GPU: Fix onefile compile mode after all the refactoring --- .../Base/GPUReconstructionKernelMacros.h | 14 -------------- .../Base/cuda/GPUReconstructionCUDA.h | 2 ++ .../Base/cuda/GPUReconstructionCUDAKernels.cu | 17 +++++++++++++---- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h index 2b16dfb32fe14..a03d9de13ef8f 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h +++ b/GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h @@ -62,20 +62,6 @@ } #endif -// GPU Host wrappers for kernel -#define GPUCA_KRNL_HOST(x_class, ...) \ - GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ - template <> class GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::backendInternal { \ - public: \ - template \ - static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ - { \ - auto& x = _xyz.x; \ - auto& y = _xyz.y; \ - GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \ - } \ - }; - #endif // GPUCA_GPUCODE #define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h index 36dcdffb1c6d6..6c126d153d8ae 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h @@ -48,6 +48,8 @@ class GPUReconstructionCUDA : public GPUReconstructionProcessing::KernelInterfac void runKernelBackend(const krnlSetupTime& _xyz, const Args&... args); template void runKernelBackendTimed(const krnlSetupTime& _xyz, const Args&... args); + template + struct kernelBackendMacro; template friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp); diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu index 3267e1d5c67f6..e6ed94bba2cec 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu @@ -35,7 +35,7 @@ inline void GPUReconstructionCUDA::runKernelBackendTimed(const krnlSetupTime& _x { #if !defined(GPUCA_KERNEL_COMPILE_MODE) || GPUCA_KERNEL_COMPILE_MODE != 1 if (!GetProcessingSettings().rtc.enable) { - backendInternal::runKernelBackendMacro(_xyz, this, args...); + kernelBackendMacro::run(_xyz, this, args...); } else #endif { @@ -82,9 +82,6 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c #define GPUCA_KRNL_DEFONLY // COMPILE_MODE = rdc #endif - #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ - GPUCA_KRNL_HOST(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) - #ifndef __HIPCC__ // CUDA version #define GPUCA_KRNL_CALL(x_class, ...) \ GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.index, args...); @@ -95,6 +92,18 @@ inline void GPUReconstructionCUDA::runKernelBackend(const krnlSetupTime& _xyz, c hipLaunchKernelGGL(HIP_KERNEL_NAME(GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))), dim3(x.nBlocks), dim3(x.nThreads), 0, me->mInternals->Streams[x.stream], GPUCA_CONSMEM_CALL y.index, args...); #endif // __HIPCC__ + #define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \ + GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ + template <> struct GPUReconstructionCUDA::kernelBackendMacro { \ + template \ + static inline void run(const GPUReconstructionProcessing::krnlSetupTime& _xyz, auto* me, const Args&... args) \ + { \ + auto& x = _xyz.x; \ + auto& y = _xyz.y; \ + GPUCA_KRNL_CALL(x_class, x_attributes, x_arguments, x_forward, x_types, __VA_ARGS__) \ + } \ + }; + #include "GPUReconstructionKernelList.h" #undef GPUCA_KRNL #endif // ---------- COMPILE_MODE = onefile | rdc ---------- From 062fbe612c19a9bb23d77fff43c42d74d7e54b6e Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Tue, 22 Apr 2025 10:33:57 +0200 Subject: [PATCH 0426/1914] ITS: fix integer comparison warning in NoiseMap --- .../ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h b/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h index 49e6f531eeb76..25b7f451b6452 100644 --- a/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h +++ b/DataFormats/Detectors/ITSMFT/common/include/DataFormatsITSMFT/NoiseMap.h @@ -205,7 +205,7 @@ class NoiseMap NoiseMap merge(const NoiseMap* prev) { int incre = 0; - for (size_t i = 0; i < (int)mNoisyPixels.size(); ++i) { + for (size_t i = 0; i < mNoisyPixels.size(); ++i) { for (const auto& prev_np : prev->mNoisyPixels[i]) { // only enters this for loop if the "i" chip exists. if (mNoisyPixels[i].find(prev_np.first) == mNoisyPixels[i].end()) { mNoisyPixels[i][prev_np.first] = prev_np.second; From b05a704889e0d5004c8029f25161af4112d27a90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?BiaoZhang=20=28=E5=BC=A0=E5=BD=AA=29?= <52267892+zhangbiao-phy@users.noreply.github.com> Date: Tue, 22 Apr 2025 11:36:55 +0200 Subject: [PATCH 0427/1914] Add PDG code and mass of Bc meson (#14175) * Add PDG code and mass of Bc * Update make_pdg_header.py with Bc meson * Update PhysicsConstants.h * Update PhysicsConstants.h --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 2 ++ Common/Constants/include/CommonConstants/make_pdg_header.py | 1 + 2 files changed, 3 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index c6fa3cddcdad5..f3b983b966faf 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -34,6 +34,7 @@ enum Pdg { kB0 = 511, kB0Bar = -511, kBPlus = 521, + kBCPlus = 541, kBS = 531, kBSBar = -531, kD0 = 421, @@ -84,6 +85,7 @@ enum Pdg { constexpr double MassB0 = 5.27966; constexpr double MassB0Bar = 5.27966; constexpr double MassBPlus = 5.27934; +constexpr double MassBCPlus = 6.27447; constexpr double MassBS = 5.36692; constexpr double MassBSBar = 5.36692; constexpr double MassD0 = 1.86484; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index e4f92e6e8b62d..89e3f0e35070d 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -89,6 +89,7 @@ class Pdg(Enum): kB0 = 511 kB0Bar = -511 kBPlus = 521 + kBCPlus = 541 kBS = 531 kBSBar = -531 kD0 = 421 From 3412dff968bad3565c97df879a5974c6366610ca Mon Sep 17 00:00:00 2001 From: Mattia Faggin Date: Tue, 22 Apr 2025 11:37:16 +0200 Subject: [PATCH 0428/1914] Add SigmaC(2520). (#14187) * Add SigmaC(2520). * Remove comments * Update python script. --------- Co-authored-by: Mattia Faggin --- Common/Constants/include/CommonConstants/PhysicsConstants.h | 4 ++++ Common/Constants/include/CommonConstants/make_pdg_header.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Common/Constants/include/CommonConstants/PhysicsConstants.h b/Common/Constants/include/CommonConstants/PhysicsConstants.h index f3b983b966faf..5f169f799eb5b 100644 --- a/Common/Constants/include/CommonConstants/PhysicsConstants.h +++ b/Common/Constants/include/CommonConstants/PhysicsConstants.h @@ -64,6 +64,8 @@ enum Pdg { kPhi = 333, kSigmaC0 = 4112, kSigmaCPlusPlus = 4222, + kSigmaCStar0 = 4114, + kSigmaCStarPlusPlus = 4224, kX3872 = 9920443, kXi0 = 3322, kXiB0 = 5232, @@ -115,6 +117,8 @@ constexpr double MassKPlusStar892 = 0.89167; constexpr double MassPhi = 1.019461; constexpr double MassSigmaC0 = 2.45375; constexpr double MassSigmaCPlusPlus = 2.45397; +constexpr double MassSigmaCStar0 = 2.51848; +constexpr double MassSigmaCStarPlusPlus = 2.51841; constexpr double MassX3872 = 3.87165; constexpr double MassXi0 = 1.31486; constexpr double MassXiB0 = 5.7919; diff --git a/Common/Constants/include/CommonConstants/make_pdg_header.py b/Common/Constants/include/CommonConstants/make_pdg_header.py index 89e3f0e35070d..4fdfd052ff613 100755 --- a/Common/Constants/include/CommonConstants/make_pdg_header.py +++ b/Common/Constants/include/CommonConstants/make_pdg_header.py @@ -119,6 +119,8 @@ class Pdg(Enum): kPhi = 333 kSigmaC0 = 4112 kSigmaCPlusPlus = 4222 + kSigmaCStar0 = 4114 + kSigmaCStarPlusPlus = 4224 kX3872 = 9920443 kXi0 = 3322 kXiB0 = 5232 From 37d0ba15113ee8666208a00b0e6ef7c070df8f3f Mon Sep 17 00:00:00 2001 From: Matthias Kleiner Date: Thu, 10 Apr 2025 08:06:34 +0200 Subject: [PATCH 0429/1914] TPC: change default setting for pad status map --- .../TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h index aff7c279cf5a8..667386e6481ca 100644 --- a/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h +++ b/Detectors/TPC/workflow/include/TPCWorkflow/TPCFactorizeIDCSpec.h @@ -506,7 +506,7 @@ DataProcessorSpec getTPCFactorizeIDCSpec(const int lane, const std::vector Date: Tue, 22 Apr 2025 09:28:39 +0200 Subject: [PATCH 0430/1914] GPU CMake: Set GPU architecture before checking the language, to suppress warnings about architecture detection failure --- dependencies/FindO2GPU.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index d50705d106bf3..0c5313c16af68 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -103,6 +103,9 @@ endif() # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) + if(CUDA_COMPUTETARGET) + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) + endif() set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) include(CheckLanguage) @@ -227,6 +230,9 @@ endif() # ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) + if(HIP_AMDGPUTARGET) + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "" FORCE) + endif() if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") endif() @@ -300,7 +306,7 @@ if(ENABLE_HIP) endif() string(REGEX REPLACE "(gfx1[0-9]+;?)" "" CMAKE_HIP_ARCHITECTURES "${CMAKE_HIP_ARCHITECTURES}") # ROCm currently doesn’t support integrated graphics if(HIP_AMDGPUTARGET) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") # If GPU build is enforced we override autodetection + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") endif() else() set(HIP_ENABLED OFF) From 33f93083e7524497014e31740f87a5a74c399ca1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 09:34:49 +0200 Subject: [PATCH 0431/1914] ONNXRuntime CMake: Use standard ONNXRuntime lowercase library syntax, remove obsolete wrapper, move detection from toplevel CMake to dependencies --- CMakeLists.txt | 2 -- Common/ML/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- GPU/GPUTracking/CMakeLists.txt | 2 +- dependencies/FindONNXRuntime.cmake | 23 ----------------------- dependencies/O2Dependencies.cmake | 3 +++ 7 files changed, 7 insertions(+), 29 deletions(-) delete mode 100644 dependencies/FindONNXRuntime.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d28f191021fdf..b71d05175e9e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,8 +41,6 @@ o2_build_sanity_checks() set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED TRUE) -include(dependencies/FindONNXRuntime.cmake) - include(O2CheckCXXFeatures) o2_check_cxx_features() diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index 2db91fc4f4320..a5b336bf7e820 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -12,7 +12,7 @@ o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName - PRIVATE_LINK_LIBRARIES O2::Framework ONNXRuntime::ONNXRuntime) + PRIVATE_LINK_LIBRARIES O2::Framework onnxruntime::onnxruntime) # Pass ORT variables as a preprocessor definition target_compile_definitions(${targetName} PRIVATE diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index f595fb051db54..8dd430d00a5c0 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,7 +114,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index d29a6afb60899..315a6c2fa3080 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,7 +162,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 0cd302cc0be94..c0648b3274108 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -343,7 +343,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PRIVATE_LINK_LIBRARIES ONNXRuntime::ONNXRuntime + PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) diff --git a/dependencies/FindONNXRuntime.cmake b/dependencies/FindONNXRuntime.cmake deleted file mode 100644 index b783c2e1c7bf3..0000000000000 --- a/dependencies/FindONNXRuntime.cmake +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -find_package(ONNXRuntime::ONNXRuntime CONFIG) -if (ONNXRuntime::ONNXRuntime_FOUND) - set(onnxruntime_FOUND 1) - add_library(onnxruntime::onnxruntime ALIAS ONNXRuntime::ONNXRuntime) -endif() - -if (NOT ONNXRuntime::ONNXRuntime_FOUND) - find_package(onnxruntime CONFIG) - if (onnxruntime_FOUND) - add_library(ONNXRuntime::ONNXRuntime ALIAS onnxruntime::onnxruntime) - endif() -endif() diff --git a/dependencies/O2Dependencies.cmake b/dependencies/O2Dependencies.cmake index c5c22b3a79097..9020f99b4877b 100644 --- a/dependencies/O2Dependencies.cmake +++ b/dependencies/O2Dependencies.cmake @@ -69,6 +69,9 @@ if (NOT TARGET Gandiva::gandiva_shared) add_library(Gandiva::gandiva_shared ALIAS gandiva_shared) endif() +find_package(onnxruntime CONFIG) +set_package_properties(onnxruntime PROPERTIES TYPE REQUIRED) + find_package(Vc) set_package_properties(Vc PROPERTIES TYPE REQUIRED) From 121ec682d4868106df67cf923699e82523afd23c Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 10:22:49 +0200 Subject: [PATCH 0432/1914] GPU ONNX: Fix compiler warning and simplify code --- .../Base/cuda/GPUReconstructionCUDA.cu | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c40c607396f3f..d5b01bfa34833 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -621,21 +621,10 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } } -#ifndef __HIPCC__ // CUDA -void GPUReconstructionCUDA::startGPUProfiling() -{ - GPUChkErr(cudaProfilerStart()); -} - -void GPUReconstructionCUDA::endGPUProfiling() -{ - GPUChkErr(cudaProfilerStop()); -} - void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) { -#ifdef ORT_CUDA_BUILD - cudaGetDevice(deviceId); + GPUChkErr(cudaGetDevice(deviceId)); +#if !defined(__HIPCC__) && defined(ORT_CUDA_BUILD) OrtCUDAProviderOptionsV2* cuda_options = nullptr; CreateCUDAProviderOptions(&cuda_options); @@ -650,22 +639,7 @@ void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_option // Finally, don't forget to release the provider options ReleaseCUDAProviderOptions(cuda_options); -#endif // ORT_CUDA_BUILD -} - -#else // HIP -void* GPUReconstructionHIP::getGPUPointer(void* ptr) -{ - void* retVal = nullptr; - GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); - return retVal; -} - -void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) -{ -#ifdef ORT_ROCM_BUILD - // Create ROCm provider options - cudaGetDevice(deviceId); +#elif defined(ORT_ROCM_BUILD) // const auto& api = Ort::GetApi(); // api.GetCurrentGpuDeviceId(deviceId); OrtROCMProviderOptions rocm_options; @@ -676,4 +650,25 @@ void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions& session_options session_options.AppendExecutionProvider_ROCM(rocm_options); #endif // ORT_ROCM_BUILD } + +#ifndef __HIPCC__ // CUDA + +void GPUReconstructionCUDA::startGPUProfiling() +{ + GPUChkErr(cudaProfilerStart()); +} + +void GPUReconstructionCUDA::endGPUProfiling() +{ + GPUChkErr(cudaProfilerStop()); +} + +#else // HIP +void* GPUReconstructionHIP::getGPUPointer(void* ptr) +{ + void* retVal = nullptr; + GPUChkErr(hipHostGetDevicePointer(&retVal, ptr, 0)); + return retVal; +} + #endif // __HIPCC__ From 72eed35cf94511efea88384d35dba3b9ad9e9bae Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Tue, 22 Apr 2025 14:23:33 +0200 Subject: [PATCH 0433/1914] Bug fix for incorrect setting --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 1106f96ed1cb2..2d8c2184e3b02 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -272,7 +272,7 @@ AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regress AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).") AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)") // CCDB -AddOption(nnLoadFromCCDB, int, 1, "", 0, "If 1 networks are fetched from ccdb, else locally") +AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally") AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched") AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched") AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks") From 83c6a8a500f4f4a01e83475fbd608cbde674af76 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 11:06:27 +0200 Subject: [PATCH 0434/1914] ML: Fix compiler warnings --- Common/ML/include/ML/3rdparty/GPUORTFloat16.h | 10 +++++----- Common/ML/include/ML/OrtInterface.h | 1 + Common/ML/src/OrtInterface.cxx | 4 ++-- .../Global/GPUChainTrackingClusterizer.cxx | 4 ++-- .../TPCClusterFinder/GPUTPCNNClusterizerHost.cxx | 16 +++++++++------- .../GPUTPCNNClusterizerKernels.cxx | 6 +++--- 6 files changed, 22 insertions(+), 19 deletions(-) diff --git a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h index 9516ba5dad573..3bf2f465b2a35 100644 --- a/Common/ML/include/ML/3rdparty/GPUORTFloat16.h +++ b/Common/ML/include/ML/3rdparty/GPUORTFloat16.h @@ -535,9 +535,9 @@ GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept result = kPositiveQNaNBits; } else { auto get_msb_half = [](float fl) { - uint16_t result; + uint16_t res; #ifdef GPUCA_GPUCODE - o2::gpu::CAMath::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); + o2::gpu::CAMath::memcpy(&res, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); #else #ifdef __cpp_if_constexpr if constexpr (detail::endian::native == detail::endian::little) @@ -545,12 +545,12 @@ GPUdi() uint16_t BFloat16Impl::ToUint16Impl(float v) noexcept if (detail::endian::native == detail::endian::little) #endif { - std::memcpy(&result, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); + std::memcpy(&res, reinterpret_cast(&fl) + sizeof(uint16_t), sizeof(uint16_t)); } else { - std::memcpy(&result, &fl, sizeof(uint16_t)); + std::memcpy(&res, &fl, sizeof(uint16_t)); } #endif - return result; + return res; }; uint16_t upper_bits = get_msb_half(v); diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index e37b6a69b6036..791f6813c2d24 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -22,6 +22,7 @@ #include #include #include +#include // O2 includes #include "Framework/Logger.h" diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index 24a2fbffb252c..a8a20b11f9e64 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -19,6 +19,8 @@ // ONNX includes #include +#include + namespace o2 { @@ -139,7 +141,6 @@ void OrtModel::initSession() void OrtModel::memoryOnDevice(int32_t deviceIndex) { -#if (defined(ORT_ROCM_BUILD) || defined(ORT_MIGRAPHX_BUILD) || defined(ORT_CUDA_BUILD) || defined(ORT_TENSORRT_BUILD)) if (deviceIndex >= 0) { (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h @@ -161,7 +162,6 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; } } -#endif } void OrtModel::resetSession() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 981d565852d28..37c12b2a3b3f4 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -980,12 +980,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } - float time_clusterizer = 0, time_fill = 0, time_networks = 0; + // float time_clusterizer = 0, time_fill = 0, time_networks = 0; for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); - auto start0 = std::chrono::high_resolution_clock::now(); + // auto start0 = std::chrono::high_resolution_clock::now(); runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data // auto stop0 = std::chrono::high_resolution_clock::now(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index db2f05711f537..31b71fd8f1ebe 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -29,7 +29,7 @@ using namespace o2::gpu; void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& settings) { std::string class_model_path = settings.nnClassificationPath, reg_model_path = settings.nnRegressionPath; - std::vector reg_model_paths; + std::vector reg_model_paths_local; std::vector evalMode = o2::utils::Str::tokenize(settings.nnEvalMode, ':'); if (settings.nnLoadFromCCDB) { @@ -60,20 +60,20 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set model_class.initOptions(OrtOptions); modelsUsed[0] = true; - reg_model_paths = o2::utils::Str::tokenize(reg_model_path, ':'); + reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { - if (reg_model_paths.size() == 1) { - OrtOptions["model-path"] = reg_model_paths[0]; + if (reg_model_paths_local.size() == 1) { + OrtOptions["model-path"] = reg_model_paths_local[0]; OrtOptions["onnx-environment-name"] = "r1"; model_reg_1.initOptions(OrtOptions); modelsUsed[1] = true; } else { - OrtOptions["model-path"] = reg_model_paths[0]; + OrtOptions["model-path"] = reg_model_paths_local[0]; OrtOptions["onnx-environment-name"] = "r1"; model_reg_1.initOptions(OrtOptions); modelsUsed[1] = true; - OrtOptions["model-path"] = reg_model_paths[1]; + OrtOptions["model-path"] = reg_model_paths_local[1]; OrtOptions["onnx-environment-name"] = "r2"; model_reg_2.initOptions(OrtOptions); modelsUsed[2] = true; @@ -154,6 +154,7 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info MockedOrtAllocator::~MockedOrtAllocator() { // Ort::GetApi().ReleaseMemoryInfo(memory_info); + (void)0; // Suppress warning for empty destructor } void* MockedOrtAllocator::Alloc(size_t size) @@ -191,8 +192,9 @@ size_t MockedOrtAllocator::NumReserveAllocations() const void MockedOrtAllocator::LeakCheck() { - if (memory_inuse.load()) + if (memory_inuse.load()) { LOG(warning) << "memory leak!!!"; + } } void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 512bc1d3bb09b..413293502d3c6 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -124,7 +124,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.row()), pad = static_cast(peak.pad()); - if (clustererNN.nnClusterizerAddIndexData && transient_index == (clustererNN.nnClusterizerElementSize - 1)) { + if (clustererNN.nnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.nnClusterizerElementSize - 1)) { uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; @@ -141,7 +141,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(pad) / GPUTPCGeometry::NPads(row); } - } else if (transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + } else if ((int32_t)transient_index < (clustererNN.nnClusterizerElementSize - 3)) { int time = static_cast(peak.time()); int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); @@ -197,7 +197,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.modelProbabilities_16[pIdx]); From 427e840295ead75ab7256b6c85fa1ed2cb4f0ec0 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 11:06:40 +0200 Subject: [PATCH 0435/1914] GPU Standalone: support build with ONNXRuntime --- Common/ML/CMakeLists.txt | 2 +- Common/ML/include/ML/OrtInterface.h | 2 +- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 12 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 11 +- GPU/GPUTracking/CMakeLists.txt | 10 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 23 +- GPU/GPUTracking/Standalone/cmake/config.cmake | 1 + GPU/GPUTracking/Standalone/cmake/prepare.sh | 2 +- GPU/GPUTracking/cmake/kernel_helpers.cmake | 2 - GPU/GPUTracking/kernels.cmake | 214 +++++++++--------- 10 files changed, 151 insertions(+), 128 deletions(-) diff --git a/Common/ML/CMakeLists.txt b/Common/ML/CMakeLists.txt index a5b336bf7e820..0ed52e1a23e20 100644 --- a/Common/ML/CMakeLists.txt +++ b/Common/ML/CMakeLists.txt @@ -12,7 +12,7 @@ o2_add_library(ML SOURCES src/OrtInterface.cxx TARGETVARNAME targetName - PRIVATE_LINK_LIBRARIES O2::Framework onnxruntime::onnxruntime) + PRIVATE_LINK_LIBRARIES O2::GPUCommon onnxruntime::onnxruntime) # Pass ORT variables as a preprocessor definition target_compile_definitions(${targetName} PRIVATE diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index 791f6813c2d24..ea70e28c0421c 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -25,7 +25,7 @@ #include // O2 includes -#include "Framework/Logger.h" +#include "GPUCommonLogger.h" namespace Ort { diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 8dd430d00a5c0..36162bcaa2f13 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -121,12 +121,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - - target_compile_definitions(${targetName} PRIVATE - GPUCA_HAS_ONNX=1 - $<$:ORT_CUDA_BUILD> - $<$:ORT_TENSORRT_BUILD>) - install(FILES ${HDRS} DESTINATION include/GPU) endif() @@ -141,6 +135,12 @@ endif() target_compile_definitions(${targetName} PRIVATE $) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE + $<$:ORT_CUDA_BUILD> + $<$:ORT_TENSORRT_BUILD>) +endif() + # Setting target architecture and adding GPU libraries target_link_libraries(${targetName} PRIVATE cuda cudart nvrtc) set_target_cuda_arch(${targetName}) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 315a6c2fa3080..9398ffdd5b9f1 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -170,11 +170,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - target_compile_definitions(${targetName} PRIVATE - GPUCA_HAS_ONNX=1 - $<$:ORT_ROCM_BUILD> - $<$:ORT_MIGRAPHX_BUILD>) - install(FILES ${HDRS} DESTINATION include/GPU) # o2_add_test(GPUsortHIP NAME test_GPUsortHIP @@ -195,6 +190,12 @@ endif() target_compile_definitions(${targetName} PRIVATE $) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE + $<$:ORT_ROCM_BUILD> + $<$:ORT_MIGRAPHX_BUILD>) +endif() + add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. target_compile_definitions(${MODULE}_CXX PRIVATE $) target_include_directories(${MODULE}_CXX PRIVATE $) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index c0648b3274108..4c1de17025627 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -200,7 +200,7 @@ set(SRCS_NO_CINT ${SRCS_NO_CINT} Refit/GPUTrackingRefitKernel.cxx Merger/GPUTPCGMO2Output.cxx) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") +if(onnxruntime_FOUND) list(APPEND SRCS_NO_CINT TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx TPCClusterFinder/GPUTPCNNClusterizer.cxx TPCClusterFinder/GPUTPCNNClusterizerHost.cxx) endif() @@ -343,7 +343,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") O2::DetectorsRaw O2::Steer O2::ML - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PUBLIC_INCLUDE_DIRECTORIES ${INCDIRS} SOURCES ${SRCS} ${SRCS_NO_CINT} ${SRCS_NO_H}) @@ -351,7 +350,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${targetName} PRIVATE $) - target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2 GPUCA_HAS_ONNX=1) + target_compile_definitions(${targetName} PRIVATE GPUCA_O2_LIB GPUCA_TPC_GEOMETRY_O2) o2_target_root_dictionary(${MODULE} HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} @@ -421,6 +420,11 @@ target_link_libraries(${targetName} PRIVATE TBB::tbb) target_compile_options(${targetName} PRIVATE -Wno-instantiation-after-specialization) +if (onnxruntime_FOUND) + target_compile_definitions(${targetName} PRIVATE GPUCA_HAS_ONNX=1) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) +endif() + # Add CMake recipes for GPU Tracking librararies if(CUDA_ENABLED OR OPENCL_ENABLED OR HIP_ENABLED) if(CMAKE_SYSTEM_NAME MATCHES Darwin) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index fbc256d5d7f91..a17c58ad1ba03 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -121,11 +121,25 @@ else() endif() # Detect GPU Backends -find_package(O2GPU) +find_package(O2GPU REQUIRED) + +if(GPUCA_CONFIG_ONNX) + find_package(onnxruntime REQUIRED) + if(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) + set(ORT_CUDA_BUILD ON) + elseif(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) + set(ORT_ROCM_BUILD ON) + endif() +else() + set(onnxruntime_FOUND OFF) +endif() # Create main targets add_subdirectory(../../ GPU) -add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx +add_library(standalone_support SHARED + ${O2_DIR}/Common/Field/src/MagFieldFast.cxx + ${O2_DIR}/Common/ML/src/OrtInterface.cxx + ${O2_DIR}/Common/Utils/src/StringUtils.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -150,6 +164,7 @@ target_include_directories(standalone_support PUBLIC ${O2_DIR}/Common/Constants/include ${O2_DIR}/Common/MathUtils/include ${O2_DIR}/Common/Utils/include + ${O2_DIR}/Common/ML/include ${O2_DIR}/DataFormats/common/include ${O2_DIR}/DataFormats/Detectors/Common/include ${O2_DIR}/DataFormats/Detectors/ITSMFT/common/include @@ -210,6 +225,10 @@ if(GPUCA_CONFIG_ROOT) ROOT::Tree) endif() +if(GPUCA_CONFIG_ONNX) + target_link_libraries(standalone_support PRIVATE onnxruntime::onnxruntime) +endif() + if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") execute_process(COMMAND ${CMAKE_CXX_COMPILER} -print-file-name=libclang_rt.asan-x86_64.so OUTPUT_VARIABLE CLANG_ASAN_SO_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) get_filename_component(CLANG_ASAN_SO_PATH "${CLANG_ASAN_SO_PATH}" DIRECTORY) diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index af7c96bb96fbb..1de0cfa27d7ee 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -18,6 +18,7 @@ set(ENABLE_OPENCL AUTO) set(GPUCA_CONFIG_VC 1) set(GPUCA_CONFIG_FMT 1) set(GPUCA_CONFIG_ROOT 1) +set(GPUCA_CONFIG_ONNX 0) set(GPUCA_BUILD_EVENT_DISPLAY 1) set(GPUCA_BUILD_EVENT_DISPLAY_FREETYPE 1) set(GPUCA_BUILD_EVENT_DISPLAY_VULKAN 1) diff --git a/GPU/GPUTracking/Standalone/cmake/prepare.sh b/GPU/GPUTracking/Standalone/cmake/prepare.sh index 17474b5fc6956..121245e23dc65 100755 --- a/GPU/GPUTracking/Standalone/cmake/prepare.sh +++ b/GPU/GPUTracking/Standalone/cmake/prepare.sh @@ -11,6 +11,6 @@ else fi eval "`alienv shell-helper`" # alienv load O2/latest -for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT; do +for i in Vc boost fmt CMake ms_gsl Clang ninja TBB ROOT ONNXRuntime; do source sw/$ALIARCH/$i/latest/etc/profile.d/init.sh done diff --git a/GPU/GPUTracking/cmake/kernel_helpers.cmake b/GPU/GPUTracking/cmake/kernel_helpers.cmake index 35f2915d9486a..e36cb4e2f3149 100644 --- a/GPU/GPUTracking/cmake/kernel_helpers.cmake +++ b/GPU/GPUTracking/cmake/kernel_helpers.cmake @@ -174,7 +174,6 @@ function(o2_gpu_kernel_add_parameter) list(LENGTH ARGV n) math(EXPR n "${n} - 1") foreach(i RANGE 0 ${n}) - message(STATUS "Adding ${ARGV${i}}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_PARAMS "${ARGV${i}}") endforeach() endfunction() @@ -182,7 +181,6 @@ function(o2_gpu_kernel_add_string_parameter) list(LENGTH ARGV n) math(EXPR n "${n} - 1") foreach(i RANGE 0 ${n}) - message(STATUS "Adding ${ARGV${i}}") set_property(TARGET O2_GPU_KERNELS APPEND PROPERTY O2_GPU_KERNEL_STRING_PARAMS "${ARGV${i}}") endforeach() endfunction() diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 202ea47d1f3bf..937a92fef33df 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -24,117 +24,117 @@ o2_gpu_kernel_file_list(O2PROPAGATOR TrackParametrization.cxx TrackParametrizati o2_gpu_kernel_file_list(TPCCOMPRESSION GPUTPCCompressionTrackModel.cxx) o2_gpu_kernel_file_list(TPCDECOMPRESSION GPUTPCCompressionTrackModel.cxx ERRORS) o2_gpu_kernel_file_list(TPCCLUSTERFINDER ERRORS ClusterAccumulator.cxx) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") -o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) -endif() o2_gpu_kernel_file_list(TRDTRACKER GPUTRDTrack.cxx GPUTRDTracker.cxx GPUTRDTrackletWord.cxx GeometryBase.cxx) o2_gpu_kernel_file_list(GLOBALREFIT TPCMERGER O2PROPAGATOR MATLUT GPUTrackingRefit.cxx) +if(onnxruntime_FOUND) +o2_gpu_kernel_file_list(TPCNNCLUSTERFINDER ERRORS ClusterAccumulator.cxx GPUTPCNNClusterizerKernels.cxx) +endif() -o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) -o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr "uint64_t" size) -o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) -o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) -o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) -o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map "uint32_t*" output) -o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) -o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) -o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) -o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) -o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) -o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) -o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") -o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) -o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") -o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) -o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) -o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) -o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) -o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) -o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) -o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) -o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) -o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) -if(NOT ALIGPU_BUILD_TYPE STREQUAL "Standalone") -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) -o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNeighboursFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCNeighboursCleaner" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsFinder" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCStartHitsSorter" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletConstructor" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUTPCTrackletSelector" "= TPCTRACKER" LB) +o2_gpu_add_kernel("GPUMemClean16" "GPUGeneralKernels" NO void* ptr uint64_t size) +o2_gpu_add_kernel("GPUitoa" "GPUGeneralKernels" NO int32_t* ptr uint64_t size) +o2_gpu_add_kernel("GPUTPCExtrapolationTrackingCopyNumbers" "GPUTPCExtrapolationTracking TPCTRACKER" NO int32_t n) +o2_gpu_add_kernel("GPUTPCExtrapolationTracking" "= TPCTRACKER TPCTRACKLETCONS" LB) +o2_gpu_add_kernel("GPUTPCCreateTrackingData" "= TPCTRACKER TPCSECTORDATA" LB) +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, hitData" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, startHits" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCSectorDebugSortKernels, sectorTracks" "= TPCTRACKER") +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, clearIds" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, sectorTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks1" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, extrapolatedTracks2" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO int8_t parameter) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map) +o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB GPUTPCClusterOccupancyMapBin* map uint32_t* output) +o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB int32_t mode) +o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerSectorRefit" "GPUTPCGMMergerGPU TPCMERGER MATLUT" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackGlobal" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector) +o2_gpu_add_kernel("GPUTPCGMMergerUnpackSaveNumber" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t id) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step3" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerResolve, step4" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t useOrigTrackParam int8_t mergeAll) +o2_gpu_add_kernel("GPUTPCGMMergerClearLinks" "GPUTPCGMMergerGPU TPCMERGER" LB int8_t output) +o2_gpu_add_kernel("GPUTPCGMMergerMergeWithinPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeSectorsPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t border0 int32_t border1 int8_t useOrigTrackParam) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step0" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step1" "GPUTPCGMMergerGPU TPCMERGER" NO int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, step2" "GPUTPCGMMergerGPU TPCMERGER" LB int32_t iSector int8_t withinSector int8_t mergeMode) +o2_gpu_add_kernel("GPUTPCGMMergerMergeBorders, variant" "GPUTPCGMMergerGPU TPCMERGER" NO gputpcgmmergertypes::GPUTPCGMBorderRange* range int32_t N int32_t cmpMax) +o2_gpu_add_kernel("GPUTPCGMMergerMergeCE" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerLinkExtrapolatedTracks" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerCollect" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerSortTracks" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksQPt" "GPUTPCGMMergerGPU TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMMergerSortTracksPrepare" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerPrepareClusters, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerFinalize, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step0" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step1" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMMergerMergeLoopers, step2" "GPUTPCGMMergerGPU TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, prepare" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, sort" "= TPCMERGER") +o2_gpu_add_kernel("GPUTPCGMO2Output, output" "= TPCMERGER" LB) +o2_gpu_add_kernel("GPUTPCGMO2Output, mc" "= TPCMERGER") +o2_gpu_add_kernel("GPUTRDTrackerKernels, gpuVersion" "= TRDTRACKER MATLUT TPCMERGER" LB GPUTRDTrackerGPU* externalInstance) +o2_gpu_add_kernel("GPUTRDTrackerKernels, o2Version" "= TRDTRACKER MATLUT O2PROPAGATOR" LB GPUTRDTracker* externalInstance) +o2_gpu_add_kernel("GPUITSFitterKernels" "= TPCMERGER MATLUT" LB) +o2_gpu_add_kernel("GPUTPCConvertKernel" "=" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step0attached" "= TPCCOMPRESSION" LB) +o2_gpu_add_kernel("GPUTPCCompressionKernels, step1unattached" "= ERRORS" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, unbuffered" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered32" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered64" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, buffered128" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCompressionGatherKernels, multiBlock" "GPUTPCCompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step0attached" "= TPCDECOMPRESSION" LB int32_t trackStart int32_t trackEnd) +o2_gpu_add_kernel("GPUTPCDecompressionKernels, step1unattached" "= TPCDECOMPRESSION" LB int32_t sectorStart int32_t nSectors) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, sortPerSectorRow" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, countFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCDecompressionUtilKernels, storeFilteredClusters" "GPUTPCDecompressionKernels" LB) +o2_gpu_add_kernel("GPUTPCCFCheckPadBaseline" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillIndexMap" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, fillFromDigits" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFChargeMapFiller, findFragmentStart" "= TPCCLUSTERFINDER" LB int8_t setPositions) +o2_gpu_add_kernel("GPUTPCCFPeakFinder" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, noiseSuppression" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFNoiseSuppression, updatePeaks" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFDeconvolution" "= TPCCLUSTERFINDER" LB) +o2_gpu_add_kernel("GPUTPCCFClusterizer" "= TPCCLUSTERFINDER" LB int8_t onlyMC) +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") +o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf uint32_t offset int32_t nElems) +o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) +o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) +o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) +o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) +if(onnxruntime_FOUND) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, runCfClusterizer" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNN" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, fillInputNNSingleElement" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass1Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, determineClass2Labels" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass1Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) +o2_gpu_add_kernel("GPUTPCNNClusterizerKernels, publishClass2Regression" "= TPCNNCLUSTERFINDER" LB uint8_t sector int8_t dtype int8_t onlyMC uint batchStart) endif() -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, setRowOffsets" "= TPCCLUSTERFINDER") -o2_gpu_add_kernel("GPUTPCCFMCLabelFlattener, flatten" "= TPCCLUSTERFINDER" NO GPUTPCLinearLabels* out) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanStart" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanUp" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanTop" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPCCLUSTERFINDER" LB int32_t iBuf "uint32_t" offset int32_t nElems) -o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out) -o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF) -o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB) -o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB) o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP NEIGHBOURS_FINDER_UNROLL_GLOBAL From e9b2d160946a3e929f6309c4af97171dd9cf0617 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 18:34:36 +0200 Subject: [PATCH 0436/1914] CUDA ORT: Must use api struct to call functions --- .../Base/cuda/GPUReconstructionCUDA.cu | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d5b01bfa34833..d4f9faaf203c9 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -621,24 +621,34 @@ void GPUReconstructionCUDA::loadKernelModules(bool perKernel) } } +#define ORTCHK(command) \ + { \ + OrtStatus* status = command; \ + if (status != nullptr) { \ + const char* msg = api->GetErrorMessage(status); \ + GPUFatal("ONNXRuntime Error: %s", msg); \ + } \ + } + void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_options, int32_t stream, int32_t* deviceId) { GPUChkErr(cudaGetDevice(deviceId)); #if !defined(__HIPCC__) && defined(ORT_CUDA_BUILD) + const OrtApi* api = OrtGetApiBase()->GetApi(ORT_API_VERSION); OrtCUDAProviderOptionsV2* cuda_options = nullptr; - CreateCUDAProviderOptions(&cuda_options); + ORTCHK(api->CreateCUDAProviderOptions(&cuda_options)); // std::vector keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"}; // std::vector values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"}; // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); // this implicitly sets "has_user_compute_stream" - cuda_options.has_user_compute_stream = 1; - UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream]); + cuda_options->has_user_compute_stream = 1; + ORTCHK(api->UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream])); session_options.AppendExecutionProvider_CUDA_V2(cuda_options); // Finally, don't forget to release the provider options - ReleaseCUDAProviderOptions(cuda_options); + api->ReleaseCUDAProviderOptions(cuda_options); #elif defined(ORT_ROCM_BUILD) // const auto& api = Ort::GetApi(); // api.GetCurrentGpuDeviceId(deviceId); From 17132044b680bcbfe5d979fcc0f107b3d5ccdc2e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 22:35:20 +0200 Subject: [PATCH 0437/1914] GPU CUDA ORT: Fix usage of OrtCUDAProviderOptionsV2 --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 3 +-- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index 36162bcaa2f13..dbdf6b606df18 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -114,7 +114,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingCUDA - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -139,6 +138,7 @@ if (onnxruntime_FOUND) target_compile_definitions(${targetName} PRIVATE $<$:ORT_CUDA_BUILD> $<$:ORT_TENSORRT_BUILD>) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) endif() # Setting target architecture and adding GPU libraries diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index d4f9faaf203c9..c8e5420a8bcf3 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -643,9 +643,8 @@ void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions& session_option // UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size()); // this implicitly sets "has_user_compute_stream" - cuda_options->has_user_compute_stream = 1; ORTCHK(api->UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", mInternals->Streams[stream])); - session_options.AppendExecutionProvider_CUDA_V2(cuda_options); + ORTCHK(api->SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options)); // Finally, don't forget to release the provider options api->ReleaseCUDAProviderOptions(cuda_options); diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 9398ffdd5b9f1..4689fee02d31e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -162,7 +162,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${MODULE} SOURCES ${SRCS} PUBLIC_LINK_LIBRARIES O2::GPUTracking O2::ITStrackingHIP - PRIVATE_LINK_LIBRARIES onnxruntime::onnxruntime PRIVATE_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/Detectors/Base/src ${CMAKE_SOURCE_DIR}/Detectors/TRD/base/src @@ -194,6 +193,7 @@ if (onnxruntime_FOUND) target_compile_definitions(${targetName} PRIVATE $<$:ORT_ROCM_BUILD> $<$:ORT_MIGRAPHX_BUILD>) + target_link_libraries(${targetName} PRIVATE onnxruntime::onnxruntime) endif() add_library(${MODULE}_CXX OBJECT ${SRCS_CXX}) # Adding a C++ library for the .cxx code of the HIP library, such that it does not link to HIP libraries, and CMake HIP Language doesn't add HIP compile flags. From 6a0656424b366aa04fd338ac3f27f26b40df948d Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 21:23:11 +0200 Subject: [PATCH 0438/1914] GPU: Use aligned new/delete for some host allocations --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 27 ++++++++++++++-------- GPU/GPUTracking/Base/GPUReconstruction.h | 10 +++++--- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index ab2210e5dd555..7a8d73e689b84 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -716,8 +716,13 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); } if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mUnmanagedChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); - return GPUProcessor::alignPointer(mUnmanagedChunks.back().get()); + char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; + if ((type & GPUMemoryResource::MEMORY_STACK)) { + mNonPersistentIndividualDirectAllocations.emplace_back(retVal, alignedDeleter()); + } else { + mDirectMemoryChunks.emplace_back(retVal, alignedDeleter()); + } + return retVal; } else { if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { GPUError("Must not allocate direct memory while volatile chunks are allocated"); @@ -765,8 +770,9 @@ void* GPUReconstruction::AllocateVolatileMemory(size_t size, bool device) if (device) { return AllocateVolatileDeviceMemory(size); } - mVolatileChunks.emplace_back(new char[size + GPUCA_BUFFER_ALIGNMENT]); - return GPUProcessor::alignPointer(mVolatileChunks.back().get()); + char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; + mVolatileChunks.emplace_back(retVal, alignedDeleter()); + return retVal; } void GPUReconstruction::MakeFutureDeviceMemoryAllocationsVolatile() @@ -851,7 +857,7 @@ void GPUReconstruction::FreeRegisteredMemory(GPUMemoryResource* res) void GPUReconstruction::PushNonPersistentMemory(uint64_t tag) { - mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), tag); + mNonPersistentMemoryStack.emplace_back(mHostMemoryPoolEnd, mDeviceMemoryPoolEnd, mNonPersistentIndividualAllocations.size(), mNonPersistentIndividualDirectAllocations.size(), tag); } void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) @@ -862,11 +868,11 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) if (mNonPersistentMemoryStack.size() == 0) { GPUFatal("Trying to pop memory state from empty stack"); } - if (tag != 0 && std::get<3>(mNonPersistentMemoryStack.back()) != tag) { - GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str()); + if (tag != 0 && std::get<4>(mNonPersistentMemoryStack.back()) != tag) { + GPUFatal("Tag mismatch when popping non persistent memory from stack : pop %s vs on stack %s", qTag2Str(tag).c_str(), qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str()); } if ((GetProcessingSettings().debugLevel >= 3 || GetProcessingSettings().allocDebugLevel) && (IsGPU() || GetProcessingSettings().forceHostMemoryPoolSize)) { - printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<3>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); + printf("Allocated memory after %30s (%8s) (Stack %zu): ", GPUDataTypes::RECO_STEP_NAMES[getRecoStepNum(step, true)], qTag2Str(std::get<4>(mNonPersistentMemoryStack.back())).c_str(), mNonPersistentMemoryStack.size()); PrintMemoryOverview(); printf("%76s", ""); PrintMemoryMax(); @@ -882,6 +888,7 @@ void GPUReconstruction::PopNonPersistentMemory(RecoStep step, uint64_t tag) res->mPtrDevice = nullptr; } mNonPersistentIndividualAllocations.resize(std::get<2>(mNonPersistentMemoryStack.back())); + mNonPersistentIndividualDirectAllocations.resize(std::get<3>(mNonPersistentMemoryStack.back())); mNonPersistentMemoryStack.pop_back(); } @@ -917,9 +924,11 @@ void GPUReconstruction::ClearAllocatedMemory(bool clearOutputs) FreeRegisteredMemory(i); } } - mUnmanagedChunks.clear(); mNonPersistentMemoryStack.clear(); mNonPersistentIndividualAllocations.clear(); + mDirectMemoryChunks.clear(); + mNonPersistentIndividualDirectAllocations.clear(); + mVolatileChunks.clear(); mVolatileMemoryStart = nullptr; if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_GLOBAL) { mHostMemoryPool = GPUProcessor::alignPointer(mHostMemoryPermanent); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index 396a007761fb7..f5b39cb370b9e 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -69,8 +69,6 @@ class GPUReconstruction class LibraryLoader; // These must be the first members to ensure correct destructor order! std::shared_ptr mMyLib = nullptr; std::vector mMemoryResources; - std::vector> mUnmanagedChunks; - std::vector> mVolatileChunks; std::vector> mChains; public: @@ -373,9 +371,15 @@ class GPUReconstruction GPUProcessor* proc = nullptr; std::vector res; }; + struct alignedDeleter { + void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + }; std::unordered_map mMemoryReuse1to1; - std::vector> mNonPersistentMemoryStack; + std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag std::vector mNonPersistentIndividualAllocations; + std::vector> mNonPersistentIndividualDirectAllocations; + std::vector> mDirectMemoryChunks; + std::vector> mVolatileChunks; std::unique_ptr mPipelineContext; From fef77bcb601992f654c44035c593b1c60c41bbfa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 21 Apr 2025 21:40:18 +0200 Subject: [PATCH 0439/1914] GPU: Direct memory allocation supports stacked memory --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 45 +++++++++++++--------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 7a8d73e689b84..ad2ee2e840d00 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -712,9 +712,6 @@ size_t GPUReconstruction::AllocateRegisteredMemory(int16_t ires, GPUOutputContro void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) { - if (type != GPUMemoryResource::MEMORY_HOST && (!IsGPU() || type != GPUMemoryResource::MEMORY_GPU)) { - throw std::runtime_error("Requested invalid memory typo for unmanaged allocation"); - } if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { char* retVal = new (std::align_val_t(GPUCA_BUFFER_ALIGNMENT)) char[size]; if ((type & GPUMemoryResource::MEMORY_STACK)) { @@ -723,25 +720,35 @@ void* GPUReconstruction::AllocateDirectMemory(size_t size, int32_t type) mDirectMemoryChunks.emplace_back(retVal, alignedDeleter()); } return retVal; + } + + if ((type & ~(GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK)) || ((type & GPUMemoryResource::MEMORY_HOST) && (type & GPUMemoryResource::MEMORY_GPU))) { + throw std::runtime_error("Requested invalid memory typo for direct allocation"); + } + if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { + GPUError("Must not allocate direct memory while volatile chunks are allocated"); + throw std::bad_alloc(); + } + + void*& pool = (type & GPUMemoryResource::MEMORY_GPU) ? mDeviceMemoryPool : mHostMemoryPool; + void*& poolend = (type & GPUMemoryResource::MEMORY_GPU) ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; + char* retVal; + if ((type & GPUMemoryResource::MEMORY_STACK)) { + poolend = (char*)poolend - size; + poolend = (char*)poolend - GPUProcessor::getAlignmentMod(poolend); + retVal = (char*)poolend; } else { - if (mVolatileMemoryStart && !mDeviceMemoryAsVolatile && (type & GPUMemoryResource::MEMORY_GPU) && !(type & GPUMemoryResource::MEMORY_STACK)) { - GPUError("Must not allocate direct memory while volatile chunks are allocated"); - throw std::bad_alloc(); - } - void*& pool = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPool : mHostMemoryPool; - void*& poolend = type == GPUMemoryResource::MEMORY_GPU ? mDeviceMemoryPoolEnd : mHostMemoryPoolEnd; - char* retVal; GPUProcessor::computePointerWithAlignment(pool, retVal, size); - if (pool > poolend) { - GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend)); - throw std::bad_alloc(); - } - UpdateMaxMemoryUsed(); - if (GetProcessingSettings().allocDebugLevel >= 2) { - std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; - } - return retVal; } + if (pool > poolend) { + GPUError("Insufficient unmanaged memory: missing %ld bytes", ptrDiff(pool, poolend)); + throw std::bad_alloc(); + } + UpdateMaxMemoryUsed(); + if (GetProcessingSettings().allocDebugLevel >= 2) { + std::cout << "Allocated (unmanaged " << (type == GPUMemoryResource::MEMORY_GPU ? "gpu" : "host") << "): " << size << " - available: " << ptrDiff(poolend, pool) << "\n"; + } + return retVal; } void* GPUReconstruction::AllocateVolatileDeviceMemory(size_t size) From 1f6767ce039ea2ec6cc72da136368f1dad9677e3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 22 Apr 2025 18:07:01 +0200 Subject: [PATCH 0440/1914] GPU CMake: Another attempt to silence the CMake warnings about architectures --- GPU/GPUTracking/Standalone/CMakeLists.txt | 6 +++--- dependencies/FindO2GPU.cmake | 17 +++++------------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index a17c58ad1ba03..0859223187f00 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -125,10 +125,10 @@ find_package(O2GPU REQUIRED) if(GPUCA_CONFIG_ONNX) find_package(onnxruntime REQUIRED) - if(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) - set(ORT_CUDA_BUILD ON) - elseif(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) + if(HIP_ENABLED AND NOT DEFINED ORT_ROCM_BUILD) set(ORT_ROCM_BUILD ON) + elseif(CUDA_ENABLED AND NOT DEFINED ORT_CUDA_BUILD) + set(ORT_CUDA_BUILD ON) endif() else() set(onnxruntime_FOUND OFF) diff --git a/dependencies/FindO2GPU.cmake b/dependencies/FindO2GPU.cmake index 0c5313c16af68..33925e8cf1341 100644 --- a/dependencies/FindO2GPU.cmake +++ b/dependencies/FindO2GPU.cmake @@ -104,7 +104,9 @@ endif() # ---------------------------------- CUDA ---------------------------------- if(ENABLE_CUDA) if(CUDA_COMPUTETARGET) - set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) + set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET}) + else() + set(CMAKE_CUDA_ARCHITECTURES 61-virtual) endif() set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) @@ -121,11 +123,6 @@ if(ENABLE_CUDA) message(STATUS "Using as CUDA GCC version: ${GPUCA_CUDA_GCCBIN}") set(CMAKE_CUDA_HOST_COMPILER "${GPUCA_CUDA_GCCBIN}") endif() - if(CUDA_COMPUTETARGET) - set(CMAKE_CUDA_ARCHITECTURES ${CUDA_COMPUTETARGET} CACHE STRING "" FORCE) - else() - set(CMAKE_CUDA_ARCHITECTURES 61-virtual CACHE STRING "" FORCE) - endif() enable_language(CUDA) get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) if (ENABLE_CUDA STREQUAL "AUTO") @@ -231,7 +228,8 @@ endif() # ---------------------------------- HIP ---------------------------------- if(ENABLE_HIP) if(HIP_AMDGPUTARGET) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "" FORCE) + set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}") + set(AMDGPU_TARGETS "${HIP_AMDGPUTARGET}") endif() if(NOT "$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" AND NOT CMAKE_PREFIX_PATH MATCHES "rocm" AND EXISTS "/opt/rocm/lib/cmake/") list(APPEND CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake") @@ -239,11 +237,6 @@ if(ENABLE_HIP) if("$ENV{CMAKE_PREFIX_PATH}" MATCHES "rocm" OR CMAKE_PREFIX_PATH MATCHES "rocm") set(CMAKE_HIP_STANDARD ${CMAKE_CXX_STANDARD}) set(CMAKE_HIP_STANDARD_REQUIRED TRUE) - if(HIP_AMDGPUTARGET) - set(AMDGPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - set(GPU_TARGETS "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - set(CMAKE_HIP_ARCHITECTURES "${HIP_AMDGPUTARGET}" CACHE STRING "AMD GPU targets to compile for" FORCE) - endif() set(TMP_ROCM_DIR_LIST "${CMAKE_PREFIX_PATH}:$ENV{CMAKE_PREFIX_PATH}") string(REPLACE ":" ";" TMP_ROCM_DIR_LIST "${TMP_ROCM_DIR_LIST}") list(FILTER TMP_ROCM_DIR_LIST INCLUDE REGEX rocm) From cde32e7eee5ac9ff2668621115d0f839b95fb11c Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 17 Apr 2025 20:22:59 +0200 Subject: [PATCH 0441/1914] Common: ConfKey align prov + print hash Signed-off-by: Felix Schlepper --- .../include/CommonUtils/ConfigurableParam.h | 2 +- .../CommonUtils/ConfigurableParamHelper.h | 14 +++--- Common/Utils/src/ConfigurableParamHelper.cxx | 49 ++++++++++++++----- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/Common/Utils/include/CommonUtils/ConfigurableParam.h b/Common/Utils/include/CommonUtils/ConfigurableParam.h index f44d9efcaea76..39b24bbbbd57c 100644 --- a/Common/Utils/include/CommonUtils/ConfigurableParam.h +++ b/Common/Utils/include/CommonUtils/ConfigurableParam.h @@ -162,7 +162,7 @@ class ConfigurableParam virtual std::string getName() const = 0; // print the current keys and values to screen (optionally with provenance information) - virtual void printKeyValues(bool showprov = true, bool useLogger = false) const = 0; + virtual void printKeyValues(bool showprov = true, bool useLogger = false, bool withPadding = false, bool showHash = false) const = 0; // get a single size_t hash_value of this parameter (can be used as a checksum to see // if object changed or different) diff --git a/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h b/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h index 7d9cb78bb9968..6e69fae03e6c3 100644 --- a/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h +++ b/Common/Utils/include/CommonUtils/ConfigurableParamHelper.h @@ -34,7 +34,7 @@ struct ParamDataMember { std::string value; std::string provenance; - std::string toString(std::string const& prefix, bool showProv) const; + std::string toString(std::string const& prefix, bool showProv, size_t padding = 0) const; }; // ---------------------------------------------------------------- @@ -58,8 +58,8 @@ class _ParamHelper static void syncCCDBandRegistry(std::string const& mainkey, TClass* cl, void* to, void* from, std::map* provmap, size_t offset); - static void outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger); - static void printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger); + static void outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding = false, bool showHash = false); + static void printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash); static size_t getHashImpl(std::string const& mainkey, std::vector const* members); @@ -100,13 +100,13 @@ class ConfigurableParamHelper : virtual public ConfigurableParam // ---------------------------------------------------------------- // one of the key methods, using introspection to print itself - void printKeyValues(bool showProv = true, bool useLogger = false) const final + void printKeyValues(bool showProv = true, bool useLogger = false, bool withPadding = true, bool showHash = true) const final { if (!isInitialized()) { initialize(); } auto members = getDataMembers(); - _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger); + _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger, withPadding, showHash); } // @@ -237,13 +237,13 @@ class ConfigurableParamPromoter : public Base, virtual public ConfigurableParam // ---------------------------------------------------------------- // one of the key methods, using introspection to print itself - void printKeyValues(bool showProv = true, bool useLogger = false) const final + void printKeyValues(bool showProv = true, bool useLogger = false, bool withPadding = true, bool showHash = true) const final { if (!isInitialized()) { initialize(); } auto members = getDataMembers(); - _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger); + _ParamHelper::printMembersImpl(getName(), members, showProv, useLogger, withPadding, showHash); } // diff --git a/Common/Utils/src/ConfigurableParamHelper.cxx b/Common/Utils/src/ConfigurableParamHelper.cxx index f217d402bcb45..161735b3a5ce4 100644 --- a/Common/Utils/src/ConfigurableParamHelper.cxx +++ b/Common/Utils/src/ConfigurableParamHelper.cxx @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef NDEBUG #undef NDEBUG #endif @@ -35,16 +36,25 @@ using namespace o2::conf; // ---------------------------------------------------------------------- -std::string ParamDataMember::toString(std::string const& prefix, bool showProv) const +std::string ParamDataMember::toString(std::string const& prefix, bool showProv, size_t padding) const { - std::string nil = ""; - + const std::string label = prefix + "." + name + " : " + value; std::ostringstream out; - out << prefix << "." << name << " : " << value; + out << label; if (showProv) { - std::string prov = (provenance.compare("") == 0 ? nil : provenance); - out << "\t\t[ " + prov + " ]"; + std::string prov = (provenance.compare("") == 0 ? "" : provenance); + if (padding) { + size_t len = label.size() - prefix.size() - 5; // 4 four the extra chars + 1 for the maxpad + if (len < padding) { + out << std::string(padding - len, ' '); + } else { + out << ' '; + } + out << "[ " + prov + " ]"; + } else { + out << "\t\t[ " + prov + " ]"; + } } return out.str(); } @@ -308,23 +318,40 @@ void _ParamHelper::fillKeyValuesImpl(std::string const& mainkey, TClass* cl, voi // ---------------------------------------------------------------------- -void _ParamHelper::printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger) +void _ParamHelper::printMembersImpl(std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash) { - _ParamHelper::outputMembersImpl(std::cout, mainkey, members, showProv, useLogger); + _ParamHelper::outputMembersImpl(std::cout, mainkey, members, showProv, useLogger, withPadding, showHash); } -void _ParamHelper::outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger) +void _ParamHelper::outputMembersImpl(std::ostream& out, std::string const& mainkey, std::vector const* members, bool showProv, bool useLogger, bool withPadding, bool showHash) { if (members == nullptr) { return; } + size_t maxpad{0}; + if (withPadding) { + for (auto& member : *members) { + maxpad = std::max(maxpad, member.name.size() + member.value.size()); + } + } + + if (showHash) { + std::string shash = std::format("{:07x}", getHashImpl(mainkey, members)); + shash = shash.substr(0, 7); + if (useLogger) { + LOG(info) << mainkey << " [Hash#" << shash << "]"; + } else { + out << mainkey << " [Hash#" << shash << "]\n"; + } + } + for (auto& member : *members) { if (useLogger) { - LOG(info) << member.toString(mainkey, showProv); + LOG(info) << member.toString(mainkey, showProv, maxpad); } else { - out << member.toString(mainkey, showProv) << "\n"; + out << member.toString(mainkey, showProv, maxpad) << "\n"; } } } From aae7f0a9d4201dd1efff98d47d3ea51ebd7e9c19 Mon Sep 17 00:00:00 2001 From: Francesco Mazzaschi <43742195+fmazzasc@users.noreply.github.com> Date: Wed, 23 Apr 2025 15:19:36 +0200 Subject: [PATCH 0442/1914] [StrangenessTracker] Fix chi2 calculation and attachment structure (#14186) * [StrangenessTracker] Fix chi2 calculation and attachment structure * Please consider the following formatting changes --------- Co-authored-by: Francesco Mazzaschi Co-authored-by: ALICE Action Bot --- .../StrangenessTracking/StrangenessTracker.h | 7 ++----- .../StrangenessTracking/src/StrangenessTracker.cxx | 14 ++++++-------- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h index 11feac64c59ae..b30be4dd081e3 100644 --- a/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h +++ b/Detectors/Vertexing/StrangenessTracking/include/StrangenessTracking/StrangenessTracker.h @@ -56,7 +56,7 @@ enum DauType : int { struct ClusAttachments { - std::array arr; + std::array arr; }; class StrangenessTracker @@ -357,10 +357,7 @@ class StrangenessTracker std::vector mFitter4Body; // optional DCA Fitter for final 4 Body refit (per thread) o2::base::PropagatorImpl::MatCorrType mCorrType = o2::base::PropagatorImpl::MatCorrType::USEMatCorrNONE; // use mat correction - - std::vector> mDaughterTracks; // vector of daughter tracks (per thread) - ClusAttachments mStructClus; // # of attached tracks, 1 for mother, 2 for daughter - + std::vector> mDaughterTracks; // vector of daughter tracks (per thread) ClassDefNV(StrangenessTracker, 1); }; diff --git a/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx b/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx index c8a62fa76e3ed..acbaa9d6a08fd 100644 --- a/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx +++ b/Detectors/Vertexing/StrangenessTracking/src/StrangenessTracker.cxx @@ -245,7 +245,7 @@ void StrangenessTracker::processCascade(int iCasc, const Cascade& casc, const Ca strangeTrack.mDecayRef = iCasc; strangeTrack.mITSRef = mSortedITSindexes[iTrack]; mStrangeTrackVec[iThread].push_back(strangeTrack); - mClusAttachments[iThread].push_back(mStructClus); + mClusAttachments[iThread].push_back(structClus); if (mMCTruthON) { auto lab = getStrangeTrackLabel(itsTrack, strangeTrack, structClus); mStrangeTrackLabels[iThread].push_back(lab); @@ -350,7 +350,7 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang auto nMinClusMother = trackClusters.size() < 4 ? 2 : mStrParams->mMinMotherClus; std::vector motherClusters; - std::array nAttachments; + std::array nAttachments; nAttachments.fill(-1); // fill arr with -1 int nUpdates = 0; @@ -412,13 +412,13 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang std::reverse(motherClusters.begin(), motherClusters.end()); - mGlobalChi2 = -1; + mGlobalChi2 = 0; for (auto& clus : motherClusters) { if (!updateTrack(clus, motherTrackClone)) { break; } } - strangeTrack.mMatchChi2 = mGlobalChi2; + strangeTrack.mMatchChi2 = mGlobalChi2 / motherClusters.size(); LOG(debug) << "Inward-outward refit finished, starting final topology refit"; // final Topology refit @@ -481,7 +481,6 @@ bool StrangenessTracker::matchDecayToITStrack(float decayR, StrangeTrack& strang strangeTrack.mTopoChi2 = mFitter3Body[iThread].getChi2AtPCACandidate(); } structClus.arr = nAttachments; - return true; } @@ -508,9 +507,8 @@ bool StrangenessTracker::updateTrack(const ITSCluster& clus, o2::track::TrackPar return false; } } - auto chi2 = std::abs(track.getPredictedChi2Quiet(clus)); // abs to be understood - LOG(debug) << "Chi2: " << chi2; - if (chi2 > mStrParams->mMaxChi2 || chi2 < 0) { + auto chi2 = track.getPredictedChi2Quiet(clus); // abs to be understood + if (std::abs(chi2) > mStrParams->mMaxChi2) { return false; } From 04baff0b28a95c388a139f2554a77eeab97588dc Mon Sep 17 00:00:00 2001 From: abmodak <67369858+abmodak@users.noreply.github.com> Date: Wed, 23 Apr 2025 18:50:42 +0200 Subject: [PATCH 0443/1914] Add PMD info to AO2Ds (#13998) --- .../include/Framework/AnalysisDataModel.h | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Framework/Core/include/Framework/AnalysisDataModel.h b/Framework/Core/include/Framework/AnalysisDataModel.h index d90952f38ac9f..3216a24ed73a2 100644 --- a/Framework/Core/include/Framework/AnalysisDataModel.h +++ b/Framework/Core/include/Framework/AnalysisDataModel.h @@ -1778,6 +1778,23 @@ DECLARE_SOA_COLUMN(DCAr, dcaR, float); //! DCA DECLARE_SOA_COLUMN(DCAz, dcaZ, float); //! DCA in z direction DECLARE_SOA_COLUMN(Mass, mass, float); //! mass of the conversion. Do NOT use for cut! } // namespace oftv0 +namespace pmd +{ +DECLARE_SOA_INDEX_COLUMN(BC, bc); //! BC index +DECLARE_SOA_COLUMN(X, pmdclsx, float); //! cluster x position +DECLARE_SOA_COLUMN(Y, pmdclsy, float); //! cluster y position +DECLARE_SOA_COLUMN(Z, pmdclsz, float); //! cluster z position +DECLARE_SOA_COLUMN(CluADC, pmdclsadc, float); //! cluster energy in ADC +DECLARE_SOA_COLUMN(CluPID, pmdclspid, float); //! cluster probability, 1: photon, 0:hadron +DECLARE_SOA_COLUMN(Det, pmddet, uint8_t); //! Detector, 0:PRE, 1:CPV +DECLARE_SOA_COLUMN(Ncell, pmdncell, uint8_t); //! cluster cells +DECLARE_SOA_COLUMN(Smn, pmdmodule, int32_t); //! module number +DECLARE_SOA_COLUMN(TrackNo, pmdtrackno, int32_t); //! Track number assigned to clus from simulation +DECLARE_SOA_COLUMN(TrackPid, pmdtrackpid, int32_t); //! Track PID assigned to clus from simulation +DECLARE_SOA_COLUMN(SigX, pmdsigx, float); //! Cluster x-width +DECLARE_SOA_COLUMN(SigY, pmdsigy, float); //! Cluster y-width +DECLARE_SOA_COLUMN(ClMatching, pmdclmatching, int32_t); //! Cluster of PRE matching with CPV +} // namespace pmd } // namespace run2 DECLARE_SOA_TABLE(Run2BCInfos_000, "AOD", "RUN2BCINFO", run2::EventCuts, //! Legacy information for Run 2 event selection @@ -1811,6 +1828,14 @@ DECLARE_SOA_TABLE(Run2OTFV0s, "AOD", "Run2OTFV0", //! Run 2 V0 on the fly table using Run2OTFV0 = Run2OTFV0s::iterator; +DECLARE_SOA_TABLE(Pmds, "AOD", "PMD", //! Photon information from PMD detector + o2::soa::Index<>, run2::pmd::BCId, run2::pmd::X, run2::pmd::Y, + run2::pmd::Z, run2::pmd::CluADC, run2::pmd::CluPID, run2::pmd::Det, + run2::pmd::Ncell, run2::pmd::Smn, run2::pmd::TrackNo, run2::pmd::TrackPid, + run2::pmd::SigX, run2::pmd::SigY, run2::pmd::ClMatching); + +using Pmd = Pmds::iterator; + // ---- MC tables ---- namespace mccollision { From 72b50c63aeeb24144bc186d2c4efabc56503cf55 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Thu, 24 Apr 2025 07:22:33 +0200 Subject: [PATCH 0444/1914] ITS: Fix GPU deterministic mode + refactoring (#14200) --- .../GPU/ITStrackingGPU/TimeFrameGPU.h | 14 +- .../GPU/ITStrackingGPU/TrackerTraitsGPU.h | 29 +- .../GPU/ITStrackingGPU/TrackingKernels.h | 3 +- .../tracking/GPU/cuda/TrackerTraitsGPU.cxx | 87 ++-- .../ITS/tracking/GPU/cuda/TrackingKernels.cu | 445 ++++++++---------- .../tracking/include/ITStracking/Tracker.h | 31 +- .../include/ITStracking/TrackerTraits.h | 30 +- .../include/ITStracking/TrackingInterface.h | 5 +- Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx | 188 ++------ .../ITSMFT/ITS/tracking/src/TrackerTraits.cxx | 24 +- .../ITS/tracking/src/TrackingInterface.cxx | 19 +- .../ITSMFT/ITS/workflow/src/TrackerSpec.cxx | 2 + 12 files changed, 317 insertions(+), 560 deletions(-) diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h index 29d2404e98681..88666cdfdb7fb 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h @@ -15,18 +15,13 @@ #include "ITStracking/TimeFrame.h" #include "ITStracking/Configuration.h" - -#include "ITStrackingGPU/ClusterLinesGPU.h" #include "ITStrackingGPU/Utils.h" #include -namespace o2 -{ -namespace its -{ -namespace gpu +namespace o2::its::gpu { + class Stream; class DefaultGPUAllocator : public ExternalAllocator @@ -228,7 +223,6 @@ inline int TimeFrameGPU::getNumberOfCells() const return std::accumulate(mNCells.begin(), mNCells.end(), 0); } -} // namespace gpu -} // namespace its -} // namespace o2 +} // namespace o2::its::gpu + #endif diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h index f9583d97ca030..c765307473749 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h @@ -13,8 +13,6 @@ #ifndef ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ #define ITSTRACKINGGPU_TRACKERTRAITSGPU_H_ -#include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/TrackerTraits.h" #include "ITStrackingGPU/TimeFrameGPU.h" @@ -24,28 +22,27 @@ namespace its { template -class TrackerTraitsGPU : public TrackerTraits +class TrackerTraitsGPU final : public TrackerTraits { public: TrackerTraitsGPU() = default; ~TrackerTraitsGPU() override = default; - // void computeLayerCells() final; - void adoptTimeFrame(TimeFrame* tf) override; - void initialiseTimeFrame(const int iteration) override; + void adoptTimeFrame(TimeFrame* tf) final; + void initialiseTimeFrame(const int iteration) final; + void computeLayerTracklets(const int iteration, int, int) final; - void computeLayerCells(const int iteration) override; - void setBz(float) override; - void findCellsNeighbours(const int iteration) override; - void findRoads(const int iteration) override; + void computeLayerCells(const int iteration) final; + void findCellsNeighbours(const int iteration) final; + void findRoads(const int iteration) final; + + bool supportsExtendTracks() const noexcept final { return false; } + bool supportsFindShortPrimaries() const noexcept final { return false; } - // Methods to get CPU execution from traits - void initialiseTimeFrameHybrid(const int iteration) override { initialiseTimeFrame(iteration); }; - void computeTrackletsHybrid(const int iteration, int, int) override; - void computeCellsHybrid(const int iteration) override; - void findCellsNeighboursHybrid(const int iteration) override; + void setBz(float) final; - void extendTracks(const int iteration) override; + const char* getName() const noexcept final { return "GPU"; } + bool isGPU() const noexcept final { return true; } // TimeFrameGPU information forwarding int getTFNumberOfClusters() const override; diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h index 720867ddaba29..21b14fd9292d2 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h @@ -176,8 +176,7 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads); -int filterCellNeighboursHandler(std::vector&, - gpuPair*, +int filterCellNeighboursHandler(gpuPair*, int*, unsigned int); diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx index 3c65faddcff71..f3b62ec8a6108 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx @@ -11,10 +11,7 @@ /// #include -#include -#include #include -#include #include "DataFormatsITS/TrackITS.h" @@ -41,54 +38,7 @@ void TrackerTraitsGPU::initialiseTimeFrame(const int iteration) } template -void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int, int) -{ -} - -template -void TrackerTraitsGPU::computeLayerCells(const int iteration) -{ -} - -template -void TrackerTraitsGPU::findCellsNeighbours(const int iteration) -{ -} - -template -void TrackerTraitsGPU::extendTracks(const int iteration) -{ -} - -template -void TrackerTraitsGPU::setBz(float bz) -{ - mBz = bz; - mTimeFrameGPU->setBz(bz); -} - -template -int TrackerTraitsGPU::getTFNumberOfClusters() const -{ - return mTimeFrameGPU->getNumberOfClusters(); -} - -template -int TrackerTraitsGPU::getTFNumberOfTracklets() const -{ - return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); -} - -template -int TrackerTraitsGPU::getTFNumberOfCells() const -{ - return mTimeFrameGPU->getNumberOfCells(); -} - -//////////////////////////////////////////////////////////////////////////////// -// Hybrid tracking -template -void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int iROFslice, int iVertex) +void TrackerTraitsGPU::computeLayerTracklets(const int iteration, int iROFslice, int iVertex) { auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); mTimeFrameGPU->createTrackletsLUTDevice(iteration); @@ -161,7 +111,7 @@ void TrackerTraitsGPU::computeTrackletsHybrid(const int iteration, int } template -void TrackerTraitsGPU::computeCellsHybrid(const int iteration) +void TrackerTraitsGPU::computeLayerCells(const int iteration) { mTimeFrameGPU->createCellsLUTDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -208,7 +158,7 @@ void TrackerTraitsGPU::computeCellsHybrid(const int iteration) } template -void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) +void TrackerTraitsGPU::findCellsNeighbours(const int iteration) { mTimeFrameGPU->createNeighboursIndexTablesDevice(); auto& conf = o2::its::ITSGpuTrackingParamConfig::Instance(); @@ -250,8 +200,7 @@ void TrackerTraitsGPU::findCellsNeighboursHybrid(const int iteration) conf.nBlocks, conf.nThreads); - filterCellNeighboursHandler(mTimeFrameGPU->getCellsNeighbours()[iLayer], - mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), + filterCellNeighboursHandler(mTimeFrameGPU->getDeviceNeighbourPairs(iLayer), mTimeFrameGPU->getDeviceNeighbours(iLayer), nNeigh); } @@ -270,9 +219,6 @@ void TrackerTraitsGPU::findRoads(const int iteration) if ((mTrkParams[iteration].StartLayerMask & (1 << (startLayer + 2))) == 0) { continue; } - std::vector lastCellId, updatedCellId; - std::vector lastCellSeed, updatedCellSeed; - processNeighboursHandler(startLayer, startLevel, mTimeFrameGPU->getDeviceArrayCells(), @@ -366,5 +312,30 @@ void TrackerTraitsGPU::findRoads(const int iteration) } }; +template +int TrackerTraitsGPU::getTFNumberOfClusters() const +{ + return mTimeFrameGPU->getNumberOfClusters(); +} + +template +int TrackerTraitsGPU::getTFNumberOfTracklets() const +{ + return std::accumulate(mTimeFrameGPU->getNTracklets().begin(), mTimeFrameGPU->getNTracklets().end(), 0); +} + +template +int TrackerTraitsGPU::getTFNumberOfCells() const +{ + return mTimeFrameGPU->getNumberOfCells(); +} + +template +void TrackerTraitsGPU::setBz(float bz) +{ + mBz = bz; + mTimeFrameGPU->setBz(bz); +} + template class TrackerTraitsGPU<7>; } // namespace o2::its diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu index 805e66675e1b9..bb39e9e70341b 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu @@ -12,11 +12,9 @@ #include #include -#include -#include #include -#include +#include #include #include #include @@ -28,7 +26,6 @@ #include #include "ITStracking/Constants.h" -#include "ITStracking/Configuration.h" #include "ITStracking/IndexTableUtils.h" #include "ITStracking/MathUtils.h" #include "DataFormatsITS/TrackITS.h" @@ -59,7 +56,7 @@ namespace o2::its using namespace constants::its2; using Vertex = o2::dataformats::Vertex>; -GPUd() float Sq(float v) +GPUdii() float Sq(float v) { return v * v; } @@ -76,15 +73,15 @@ GPUd() const int4 getBinsRect(const Cluster& currentCluster, const int layerInde const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz; const float phiRangeMax = (maxdeltaphi > constants::math::Pi) ? constants::math::TwoPi : currentCluster.phi + maxdeltaphi; - if (zRangeMax < -LayersZCoordinate()[layerIndex + 1] || - zRangeMin > LayersZCoordinate()[layerIndex + 1] || zRangeMin > zRangeMax) { + if (zRangeMax < -utils.getLayerZ(layerIndex) || + zRangeMin > utils.getLayerZ(layerIndex) || zRangeMin > zRangeMax) { return getEmptyBinsRect(); } - return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex + 1, zRangeMin)), + return int4{o2::gpu::CAMath::Max(0, utils.getZBinIndex(layerIndex, zRangeMin)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)), - o2::gpu::CAMath::Min(ZBins - 1, utils.getZBinIndex(layerIndex + 1, zRangeMax)), + o2::gpu::CAMath::Min(utils.getNzBins() - 1, utils.getZBinIndex(layerIndex, zRangeMax)), utils.getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))}; } @@ -184,6 +181,11 @@ struct equal_tracklets { GPUhd() bool operator()(const Tracklet& a, const Tracklet& b) { return a.firstClusterIndex == b.firstClusterIndex && a.secondClusterIndex == b.secondClusterIndex; } }; +template +struct sort_by_second { + GPUhd() bool operator()(const gpuPair& a, const gpuPair& b) const { return a.second < b.second; } +}; + template struct pair_to_first { GPUhd() int operator()(const gpuPair& a) const @@ -522,7 +524,7 @@ GPUg() void computeLayerTrackletsMultiROFKernel( const float zAtRmax{tanLambda * (maxR - currentCluster.radius) + currentCluster.zCoordinate}; const float sqInverseDeltaZ0{1.f / (Sq(currentCluster.zCoordinate - primaryVertex.getZ()) + 2.e-8f)}; /// protecting from overflows adding the detector resolution const float sigmaZ{o2::gpu::CAMath::Sqrt(Sq(resolution) * Sq(tanLambda) * ((Sq(inverseR0) + sqInverseDeltaZ0) * Sq(meanDeltaR) + 1.f) + Sq(meanDeltaR * MSAngle))}; - const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; + const int4 selectedBinsRect{getBinsRect(currentCluster, layerIndex + 1, *utils, zAtRmin, zAtRmax, sigmaZ * NSigmaCut, phiCut)}; if (selectedBinsRect.x == 0 && selectedBinsRect.y == 0 && selectedBinsRect.z == 0 && selectedBinsRect.w == 0) { continue; } @@ -800,6 +802,44 @@ GPUg() void printCellSeeds(CellSeed* seed, int nCells, const unsigned int tId = } } } + +template +GPUhi() void cubExclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubExclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubExclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} + +template +GPUhi() void cubInclusiveScanInPlace(T* in_out, int num_items, cudaStream_t stream = nullptr) +{ + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, in_out, + in_out, num_items, stream)); + GPUChkErrS(cudaFree(d_temp_storage)); +} + +template +GPUhi() void cubInclusiveScanInPlace(Vector& in_out, int num_items, cudaStream_t stream = nullptr) +{ + cubInclusiveScanInPlace(thrust::raw_pointer_cast(in_out.data()), num_items, stream); +} } // namespace gpu template @@ -833,7 +873,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>( + gpu::computeLayerTrackletsMultiROFKernel<<>>( utils, multMask, iLayer, @@ -860,22 +901,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils, resolutions[iLayer], radii[iLayer + 1] - radii[iLayer], mulScatAng[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } @@ -913,55 +939,42 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils, const int nThreads) { for (int iLayer = 0; iLayer < nLayers - 1; ++iLayer) { - gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, - multMask, - iLayer, - startROF, - endROF, - maxROF, - deltaROF, - vertices, - rofPV, - nVertices, - vertexId, - clusters, - ROFClusters, - usedClusters, - clustersIndexTables, - tracklets, - trackletsLUTs, - iteration, - NSigmaCut, - phiCuts[iLayer], - resolutionPV, - minRs[iLayer + 1], - maxRs[iLayer + 1], - resolutions[iLayer], - radii[iLayer + 1] - radii[iLayer], - mulScatAng[iLayer]); + gpu::computeLayerTrackletsMultiROFKernel<<>>(utils, + multMask, + iLayer, + startROF, + endROF, + maxROF, + deltaROF, + vertices, + rofPV, + nVertices, + vertexId, + clusters, + ROFClusters, + usedClusters, + clustersIndexTables, + tracklets, + trackletsLUTs, + iteration, + NSigmaCut, + phiCuts[iLayer], + resolutionPV, + minRs[iLayer + 1], + maxRs[iLayer + 1], + resolutions[iLayer], + radii[iLayer + 1] - radii[iLayer], + mulScatAng[iLayer]); thrust::device_ptr tracklets_ptr(spanTracklets[iLayer]); thrust::sort(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::sort_tracklets()); auto unique_end = thrust::unique(thrust::device, tracklets_ptr, tracklets_ptr + nTracklets[iLayer], gpu::equal_tracklets()); nTracklets[iLayer] = unique_end - tracklets_ptr; if (iLayer > 0) { GPUChkErrS(cudaMemset(trackletsLUTsHost[iLayer], 0, nClusters[iLayer] * sizeof(int))); - gpu::compileTrackletsLookupTableKernel<<>>(spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - trackletsLUTsHost[iLayer], // d_in - trackletsLUTsHost[iLayer], // d_out - nClusters[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::compileTrackletsLookupTableKernel<<>>( + spanTracklets[iLayer], trackletsLUTsHost[iLayer], nTracklets[iLayer]); + gpu::cubExclusiveScanInPlace(trackletsLUTsHost[iLayer], nClusters[iLayer] + 1); } } } @@ -984,7 +997,8 @@ void countCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -998,22 +1012,7 @@ void countCellsHandler( maxChi2ClusterAttachment, // const float cellDeltaTanLambdaSigma, // const float nSigmaCut); // const float - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - cellsLUTsHost, // d_in - cellsLUTsHost, // d_out - nTracklets + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::cubExclusiveScanInPlace(cellsLUTsHost, nTracklets + 1); } void computeCellsHandler( @@ -1034,7 +1033,8 @@ void computeCellsHandler( const int nBlocks, const int nThreads) { - gpu::computeLayerCellsKernel<<>>( + gpu::computeLayerCellsKernel<<>>( sortedClusters, // const Cluster** unsortedClusters, // const Cluster** tfInfo, // const TrackingFrameInfo** @@ -1064,7 +1064,8 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, const int nBlocks, const int nThreads) { - gpu::computeLayerCellNeighboursKernel<<>>( + gpu::computeLayerCellNeighboursKernel<<>>( cellsLayersDevice, neighboursLUT, neighboursIndexTable, @@ -1076,39 +1077,10 @@ unsigned int countCellNeighboursHandler(CellSeed** cellsLayersDevice, nCells, maxCellNeighbours); - void *d_temp_storage = nullptr, *d_temp_storage_2 = nullptr; - size_t temp_storage_bytes = 0, temp_storage_bytes_2 = 0; - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::InclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - neighboursLUT, // d_in - neighboursLUT, // d_out - nCellsNext)); // num_items - - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - GPUChkErrS(cudaMalloc(&d_temp_storage_2, temp_storage_bytes_2)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage_2, // d_temp_storage - temp_storage_bytes_2, // temp_storage_bytes - neighboursIndexTable, // d_in - neighboursIndexTable, // d_out - nCells + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + gpu::cubInclusiveScanInPlace(neighboursLUT, nCellsNext); + gpu::cubExclusiveScanInPlace(neighboursIndexTable, nCells + 1); unsigned int nNeighbours; GPUChkErrS(cudaMemcpy(&nNeighbours, &neighboursLUT[nCellsNext - 1], sizeof(unsigned int), cudaMemcpyDeviceToHost)); - GPUChkErrS(cudaFree(d_temp_storage)); - GPUChkErrS(cudaFree(d_temp_storage_2)); return nNeighbours; } @@ -1143,32 +1115,18 @@ void computeCellNeighboursHandler(CellSeed** cellsLayersDevice, GPUChkErrS(cudaDeviceSynchronize()); } -int filterCellNeighboursHandler(std::vector& neighHost, // TODO: eventually remove this! - gpuPair* cellNeighbourPairs, +int filterCellNeighboursHandler(gpuPair* cellNeighbourPairs, int* cellNeighbours, unsigned int nNeigh) { thrust::device_ptr> neighVectorPairs(cellNeighbourPairs); thrust::device_ptr validNeighs(cellNeighbours); - thrust::device_vector keys(nNeigh); // TODO: externally allocate. - thrust::device_vector vals(nNeigh); // TODO: externally allocate. - thrust::copy(thrust::make_transform_iterator(neighVectorPairs, gpu::pair_to_second()), - thrust::make_transform_iterator(neighVectorPairs + nNeigh, gpu::pair_to_second()), - keys.begin()); - thrust::sequence(vals.begin(), vals.end()); - thrust::sort_by_key(keys.begin(), keys.end(), vals.begin()); - thrust::device_vector> sortedNeigh(nNeigh); - thrust::copy(thrust::make_permutation_iterator(neighVectorPairs, vals.begin()), - thrust::make_permutation_iterator(neighVectorPairs, vals.end()), - sortedNeigh.begin()); - GPUChkErrS(cudaDeviceSynchronize()); - auto trimmedBegin = thrust::find_if(sortedNeigh.begin(), sortedNeigh.end(), gpu::is_valid_pair()); // trim leading -1s - auto trimmedSize = sortedNeigh.end() - trimmedBegin; - neighHost.resize(trimmedSize); - thrust::transform(trimmedBegin, sortedNeigh.end(), validNeighs, gpu::pair_to_first()); - GPUChkErrS(cudaMemcpy(neighHost.data(), cellNeighbours, trimmedSize * sizeof(int), cudaMemcpyDeviceToHost)); + auto updatedEnd = thrust::remove_if(neighVectorPairs, neighVectorPairs + nNeigh, gpu::is_invalid_pair()); + size_t newSize = updatedEnd - neighVectorPairs; + thrust::stable_sort(neighVectorPairs, neighVectorPairs + newSize, gpu::sort_by_second()); + thrust::transform(neighVectorPairs, neighVectorPairs + newSize, validNeighs, gpu::pair_to_first()); - return trimmedSize; + return newSize; } template @@ -1190,137 +1148,117 @@ void processNeighboursHandler(const int startLayer, const int nBlocks, const int nThreads) { - thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. TODO: fix this. - // thrust::device_vector lastCellIds(lastCellIdHost); - // thrust::device_vector lastCellSeed(lastCellSeedHost); - thrust::device_vector lastCellId, updatedCellId; - thrust::device_vector lastCellSeed, updatedCellSeed; - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - void* d_temp_storage = nullptr; - size_t temp_storage_bytes = 0; - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[startLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - - updatedCellId.resize(foundSeedsTable.back()); - updatedCellSeed.resize(foundSeedsTable.back()); - - gpu::processNeighboursKernel<<>>(startLayer, - startLevel, - allCellSeeds, - currentCellSeeds, - nullptr, - nCells[startLayer], - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[startLayer - 1], - neighboursDeviceLUTs[startLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - auto t1 = updatedCellSeed.size(); - GPUChkErrS(cudaFree(d_temp_storage)); + thrust::device_vector foundSeedsTable(nCells[startLayer] + 1); // Shortcut: device_vector skips central memory management, we are relying on the contingency. + // TODO: fix this. + + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, nCells[startLayer] + 1); + + thrust::device_vector updatedCellId(foundSeedsTable.back()); + thrust::device_vector updatedCellSeed(foundSeedsTable.back()); + gpu::processNeighboursKernel<<>>( + startLayer, + startLevel, + allCellSeeds, + currentCellSeeds, + nullptr, + nCells[startLayer], + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[startLayer - 1], + neighboursDeviceLUTs[startLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + int level = startLevel; + thrust::device_vector lastCellId; + thrust::device_vector lastCellSeed; for (int iLayer{startLayer - 1}; iLayer > 0 && level > 2; --iLayer) { - temp_storage_bytes = 0; lastCellSeed.swap(updatedCellSeed); lastCellId.swap(updatedCellId); thrust::device_vector().swap(updatedCellSeed); thrust::device_vector().swap(updatedCellId); auto lastCellSeedSize{lastCellSeed.size()}; - foundSeedsTable.resize(nCells[iLayer] + 1); + foundSeedsTable.resize(lastCellSeedSize + 1); thrust::fill(foundSeedsTable.begin(), foundSeedsTable.end(), 0); - --level; - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - nullptr, - nullptr, - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(nullptr, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer - GPUChkErrS(cudaMalloc(&d_temp_storage, temp_storage_bytes)); - GPUChkErrS(cub::DeviceScan::ExclusiveSum(d_temp_storage, // d_temp_storage - temp_storage_bytes, // temp_storage_bytes - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_in - thrust::raw_pointer_cast(&foundSeedsTable[0]), // d_out - nCells[iLayer] + 1, // num_items - 0)); // NOLINT: this is the offset of the sum, not a pointer + + gpu::processNeighboursKernel<<>>( + iLayer, + --level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + nullptr, + nullptr, + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); + gpu::cubExclusiveScanInPlace(foundSeedsTable, foundSeedsTable.size()); + auto foundSeeds{foundSeedsTable.back()}; updatedCellId.resize(foundSeeds); thrust::fill(updatedCellId.begin(), updatedCellId.end(), 0); updatedCellSeed.resize(foundSeeds); thrust::fill(updatedCellSeed.begin(), updatedCellSeed.end(), CellSeed()); - gpu::processNeighboursKernel<<>>(iLayer, - level, - allCellSeeds, - thrust::raw_pointer_cast(&lastCellSeed[0]), - thrust::raw_pointer_cast(&lastCellId[0]), - lastCellSeedSize, - thrust::raw_pointer_cast(&updatedCellSeed[0]), - thrust::raw_pointer_cast(&updatedCellId[0]), - thrust::raw_pointer_cast(&foundSeedsTable[0]), - usedClusters, - neighbours[iLayer - 1], - neighboursDeviceLUTs[iLayer - 1], - foundTrackingFrameInfo, - bz, - maxChi2ClusterAttachment, - propagator, - matCorrType); - GPUChkErrS(cudaFree(d_temp_storage)); + gpu::processNeighboursKernel<<>>( + iLayer, + level, + allCellSeeds, + thrust::raw_pointer_cast(&lastCellSeed[0]), + thrust::raw_pointer_cast(&lastCellId[0]), + lastCellSeedSize, + thrust::raw_pointer_cast(&updatedCellSeed[0]), + thrust::raw_pointer_cast(&updatedCellId[0]), + thrust::raw_pointer_cast(&foundSeedsTable[0]), + usedClusters, + neighbours[iLayer - 1], + neighboursDeviceLUTs[iLayer - 1], + foundTrackingFrameInfo, + bz, + maxChi2ClusterAttachment, + propagator, + matCorrType); } + thrust::device_vector outSeeds(updatedCellSeed.size()); auto end = thrust::copy_if(updatedCellSeed.begin(), updatedCellSeed.end(), outSeeds.begin(), gpu::seed_selector(1.e3, maxChi2NDF * ((startLevel + 2) * 2 - 5))); auto s{end - outSeeds.begin()}; std::vector outSeedsHost(s); - thrust::copy(updatedCellSeed.begin(), updatedCellSeed.begin() + s, outSeedsHost.begin()); + thrust::copy(outSeeds.begin(), outSeeds.begin() + s, outSeedsHost.begin()); seedsHost.insert(seedsHost.end(), outSeedsHost.begin(), outSeedsHost.end()); } @@ -1339,7 +1277,8 @@ void trackSeedHandler(CellSeed* trackSeeds, const int nThreads) { thrust::device_vector minPts(minPtsHost); - gpu::fitTrackSeedsKernel<<>>( + gpu::fitTrackSeedsKernel<<>>( trackSeeds, // CellSeed* foundTrackingFrameInfo, // TrackingFrameInfo** tracks, // TrackITSExt* diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h index 58483e4aa9f6f..8f0a471b40c59 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Tracker.h @@ -50,21 +50,14 @@ class TrackerTraits; class Tracker { + using LogFunc = std::function; public: Tracker(TrackerTraits* traits); - Tracker(const Tracker&) = delete; - Tracker& operator=(const Tracker&) = delete; - ~Tracker(); - void adoptTimeFrame(TimeFrame& tf); - void clustersToTracks( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); - void clustersToTracksHybrid( - std::function = [](std::string s) { std::cout << s << std::endl; }, std::function = [](std::string s) { std::cerr << s << std::endl; }); - std::vector& getTracks(); + void clustersToTracks(LogFunc = [](std::string s) { std::cout << s << std::endl; }, LogFunc = [](std::string s) { std::cerr << s << std::endl; }); void setParameters(const std::vector&); std::vector& getParameters() { return mTrkParams; } @@ -74,7 +67,7 @@ class Tracker bool isMatLUT() const; void setNThreads(int n); int getNThreads() const; - std::uint32_t mTimeFrameCounter = 0; + void printSummary() const; private: void initialiseTimeFrame(int& iteration); @@ -82,16 +75,7 @@ class Tracker void computeCells(int& iteration); void findCellsNeighbours(int& iteration); void findRoads(int& iteration); - - void initialiseTimeFrameHybrid(int& iteration); - void computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex); - void computeCellsHybrid(int& iteration); - void findCellsNeighboursHybrid(int& iteration); - void findRoadsHybrid(int& iteration); - void findTracksHybrid(int& iteration); - void findShortPrimaries(); - void findTracks(); void extendTracks(int& iteration); // MC interaction @@ -100,7 +84,7 @@ class Tracker void rectifyClusterIndices(); template - float evaluateTask(void (Tracker::*)(T...), const char*, std::function logger, T&&... args); + float evaluateTask(void (Tracker::*)(T...), const char*, LogFunc logger, T&&... args); TrackerTraits* mTraits = nullptr; /// Observer pointer, not owned by this class TimeFrame* mTimeFrame = nullptr; /// Observer pointer, not owned by this class @@ -108,7 +92,9 @@ class Tracker std::vector mTrkParams; o2::gpu::GPUChainITS* mRecoChain = nullptr; - unsigned int mNumberOfRuns{0}; + unsigned int mNumberOfDroppedTFs{0}; + unsigned int mTimeFrameCounter{0}; + double mTotalTime{0}; }; inline void Tracker::setParameters(const std::vector& trkPars) @@ -117,8 +103,7 @@ inline void Tracker::setParameters(const std::vector& trkPar } template -float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, std::function logger, - T&&... args) +float Tracker::evaluateTask(void (Tracker::*task)(T...), const char* taskName, LogFunc logger, T&&... args) { float diff{0.f}; diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h index 46499db92d4d5..6b514c6e8d000 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackerTraits.h @@ -16,23 +16,12 @@ #ifndef TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ #define TRACKINGITSU_INCLUDE_TRACKERTRAITS_H_ -#include -#include #include -#include -#include -#include -#include -#include -#include #include "DetectorsBase/Propagator.h" -#include "DetectorsBase/MatLayerCylSet.h" #include "ITStracking/Configuration.h" -#include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" #include "ITStracking/TimeFrame.h" -#include "ITStracking/Road.h" // #define OPTIMISATION_OUTPUT @@ -52,30 +41,29 @@ class TrackerTraits virtual ~TrackerTraits() = default; virtual void adoptTimeFrame(TimeFrame* tf); virtual void initialiseTimeFrame(const int iteration); + virtual void computeLayerTracklets(const int iteration, int iROFslice, int iVertex); virtual void computeLayerCells(const int iteration); virtual void findCellsNeighbours(const int iteration); virtual void findRoads(const int iteration); - virtual void initialiseTimeFrameHybrid(const int iteration) { LOGP(error, "initialiseTimeFrameHybrid: this method should never be called with CPU traits"); } - virtual void computeTrackletsHybrid(const int iteration, int, int) { LOGP(error, "computeTrackletsHybrid: this method should never be called with CPU traits"); } - virtual void computeCellsHybrid(const int iteration) { LOGP(error, "computeCellsHybrid: this method should never be called with CPU traits"); } - virtual void findCellsNeighboursHybrid(const int iteration) { LOGP(error, "findCellsNeighboursHybrid: this method should never be called with CPU traits"); } - virtual void findRoadsHybrid(const int iteration) { LOGP(error, "findRoadsHybrid: this method should never be called with CPU traits"); } - virtual void findTracksHybrid(const int iteration) { LOGP(error, "findTracksHybrid: this method should never be called with CPU traits"); } - virtual void findTracks() { LOGP(error, "findTracks: this method is deprecated."); } + + virtual bool supportsExtendTracks() const noexcept { return true; } virtual void extendTracks(const int iteration); + virtual bool supportsFindShortPrimaries() const noexcept { return true; } virtual void findShortPrimaries(); - virtual void setBz(float bz); + virtual bool trackFollowing(TrackITSExt* track, int rof, bool outward, const int iteration); virtual void processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeed, std::vector& updatedCellId); void UpdateTrackingParameters(const std::vector& trkPars); TimeFrame* getTimeFrame() { return mTimeFrame; } - void setIsGPU(const unsigned char isgpu) { mIsGPU = isgpu; }; + virtual void setBz(float bz); float getBz() const; void setCorrType(const o2::base::PropagatorImpl::MatCorrType type) { mCorrType = type; } bool isMatLUT() const; + virtual const char* getName() const noexcept { return "CPU"; } + virtual bool isGPU() const noexcept { return false; } // Others GPUhd() static consteval int4 getEmptyBinsRect() { return int4{0, 0, 0, 0}; } @@ -109,13 +97,11 @@ class TrackerTraits o2::gpu::GPUChainITS* mChain = nullptr; TimeFrame* mTimeFrame; std::vector mTrkParams; - bool mIsGPU = false; }; inline void TrackerTraits::initialiseTimeFrame(const int iteration) { mTimeFrame->initialise(iteration, mTrkParams[iteration], mTrkParams[iteration].NLayers); - setIsGPU(false); } inline float TrackerTraits::getBz() const diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h index b584bf6b8008b..6eacb94ebb1ea 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackingInterface.h @@ -37,9 +37,7 @@ class ITSTrackingInterface const bool overrBeamEst) : mIsMC{isMC}, mUseTriggers{trgType}, - mOverrideBeamEstimation{overrBeamEst} - { - } + mOverrideBeamEstimation{overrBeamEst} {} void setClusterDictionary(const o2::itsmft::TopologyDictionary* d) { mDict = d; } void setMeanVertex(const o2::dataformats::MeanVertexObject* v) @@ -56,6 +54,7 @@ class ITSTrackingInterface void initialise(); template void run(framework::ProcessingContext& pc); + void printSummary() const; virtual void updateTimeDependentParams(framework::ProcessingContext& pc); virtual void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj); diff --git a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx index 50dc1f5dfd039..c23ba0576c625 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx @@ -25,8 +25,7 @@ #include "ReconstructionDataFormats/Track.h" #include -#include -#include +#include #include #include #include @@ -37,17 +36,16 @@ namespace its { using o2::its::constants::GB; -Tracker::Tracker(o2::its::TrackerTraits* traits) +Tracker::Tracker(o2::its::TrackerTraits* traits) : mTraits(traits) { /// Initialise standard configuration with 1 iteration mTrkParams.resize(1); - mTraits = traits; } -Tracker::~Tracker() = default; - -void Tracker::clustersToTracks(std::function logger, std::function error) +void Tracker::clustersToTracks(LogFunc logger, LogFunc error) { + LogFunc evalLog = [](const std::string&) {}; + double total{0}; mTraits->UpdateTrackingParameters(mTrkParams); int maxNvertices{-1}; @@ -62,22 +60,20 @@ void Tracker::clustersToTracks(std::function logger, std::f if (iteration == 3 && mTrkParams[0].DoUPCIteration) { mTimeFrame->swapMasks(); } - logger(fmt::format("ITS Tracking iteration {} summary:", iteration)); double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; int iVertex{std::min(maxNvertices, 0)}; + logger(std::format("==== ITS {} Tracking iteration {} summary ====", mTraits->getName(), iteration)); + total += evaluateTask(&Tracker::initialiseTimeFrame, "Timeframe initialisation", logger, iteration); do { for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); + timeTracklets += evaluateTask(&Tracker::computeTracklets, "Tracklet finding", evalLog, iteration, iROFs, iVertex); nTracklets += mTraits->getTFNumberOfTracklets(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -86,17 +82,16 @@ void Tracker::clustersToTracks(std::function logger, std::f } float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many tracklets per cluster ({}) in iteration {} in ROF span {}-{}:, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].TrackletsPerClusterLimit)); break; } - timeCells += evaluateTask( - &Tracker::computeCells, "Cell finding", [](std::string) {}, iteration); + timeCells += evaluateTask(&Tracker::computeCells, "Cell finding", evalLog, iteration); nCells += mTraits->getTFNumberOfCells(); if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations); - error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", + error(std::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.", iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB)); if (mTrkParams[iteration].DropTFUponFailure) { dropTF = true; @@ -105,131 +100,53 @@ void Tracker::clustersToTracks(std::function logger, std::f } float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", + error(std::format("Too many cells per cluster ({}) in iteration {} in ROF span {}-{}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTrkParams[iteration].CellsPerClusterLimit)); break; } - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighbours, "Neighbour finding", [](std::string) {}, iteration); + timeNeighbours += evaluateTask(&Tracker::findCellsNeighbours, "Neighbour finding", evalLog, iteration); nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); + timeRoads += evaluateTask(&Tracker::findRoads, "Road finding", evalLog, iteration); } iVertex++; } while (iVertex < maxNvertices && !dropTF); - logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); + logger(std::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); + logger(std::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); + logger(std::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); + logger(std::format(" - Track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); total += timeTracklets + timeCells + timeNeighbours + timeRoads; - if (mTrkParams[iteration].UseTrackFollower) { + if (mTraits->supportsExtendTracks() && mTrkParams[iteration].UseTrackFollower && !dropTF) { int nExtendedTracks{-mTimeFrame->mNExtendedTracks}, nExtendedClusters{-mTimeFrame->mNExtendedUsedClusters}; auto timeExtending = evaluateTask(&Tracker::extendTracks, "Extending tracks", [](const std::string&) {}, iteration); total += timeExtending; - logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); + logger(std::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending)); } if (dropTF) { - error(fmt::format("...Dropping Timeframe...")); + error("...Dropping Timeframe..."); mTimeFrame->dropTracks(); - break; // breaking out the iterations loop + ++mNumberOfDroppedTFs; + return; } } - total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); - - std::stringstream sstream; - if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + if (mTraits->supportsFindShortPrimaries() && mTrkParams[0].FindShortTracks) { + auto nTracksB = mTimeFrame->getNumberOfTracks(); + total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger); + auto nTracksA = mTimeFrame->getNumberOfTracks(); + logger(std::format(" `-> found {} additional tracks", nTracksA - nTracksB)); } - logger(sstream.str()); - if (mTimeFrame->hasMCinformation()) { - computeTracksMClabels(); - } - rectifyClusterIndices(); - mNumberOfRuns++; -} - -void Tracker::clustersToTracksHybrid(std::function logger, std::function error) -{ - double total{0.}; - mTraits->UpdateTrackingParameters(mTrkParams); - int maxNvertices{-1}; - if (mTrkParams[0].PerPrimaryVertexProcessing) { - for (int iROF{0}; iROF < mTimeFrame->getNrof(); ++iROF) { - maxNvertices = std::max(maxNvertices, (int)mTimeFrame->getPrimaryVertices(iROF).size()); - } - } - - for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) { - int nROFsIterations = mTrkParams[iteration].nROFsPerIterations > 0 ? mTimeFrame->getNrof() / mTrkParams[iteration].nROFsPerIterations + bool(mTimeFrame->getNrof() % mTrkParams[iteration].nROFsPerIterations) : 1; - logger(fmt::format("=========== ITS Hybrid Tracking iteration {} summary ===========", iteration, nROFsIterations, maxNvertices)); - double timeTracklets{0.}, timeCells{0.}, timeNeighbours{0.}, timeRoads{0.}; - int nTracklets{0}, nCells{0}, nNeighbours{0}, nTracks{-static_cast(mTimeFrame->getNumberOfTracks())}; - - total += evaluateTask(&Tracker::initialiseTimeFrameHybrid, "Hybrid Timeframe initialisation", logger, iteration); - int iVertex{std::min(maxNvertices, 0)}; - - do { - for (int iROFs{0}; iROFs < nROFsIterations; ++iROFs) { - timeTracklets += evaluateTask( - &Tracker::computeTrackletsHybrid, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex); - nTracklets += mTraits->getTFNumberOfTracklets(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during trackleting in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f; - if (trackletsPerCluster > mTrkParams[iteration].TrackletsPerClusterLimit) { - error(fmt::format("Too many tracklets per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", trackletsPerCluster, iteration, mTrkParams[iteration].TrackletsPerClusterLimit)); - break; - } - - timeCells += evaluateTask( - &Tracker::computeCellsHybrid, "Cell finding", [](std::string) {}, iteration); - nCells += mTraits->getTFNumberOfCells(); - if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) { - error(fmt::format("Too much memory used during cell finding in iteration {}, check the detector status and/or the selections.", iteration)); - break; - } - float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f; - if (cellsPerCluster > mTrkParams[iteration].CellsPerClusterLimit) { - error(fmt::format("Too many cells per cluster ({}) in iteration {}, check the detector status and/or the selections. Current limit is {}", cellsPerCluster, iteration, mTrkParams[iteration].CellsPerClusterLimit)); - break; - } - - timeNeighbours += evaluateTask( - &Tracker::findCellsNeighboursHybrid, "Neighbour finding", [](std::string) {}, iteration); - nNeighbours += mTimeFrame->getNumberOfNeighbours(); - timeRoads += evaluateTask( - &Tracker::findRoads, "Road finding", [](std::string) {}, iteration); - } - iVertex++; - } while (iVertex < maxNvertices); - logger(fmt::format(" - Hybrid tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets)); - logger(fmt::format(" - Hybrid cell finding: {} cells found in {:.2f} ms", nCells, timeCells)); - logger(fmt::format(" - Hybrid neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours)); - logger(fmt::format(" - Hybrid track finding: {} tracks found in {:.2f} ms", nTracks + mTimeFrame->getNumberOfTracks(), timeRoads)); - total += timeTracklets + timeCells + timeNeighbours + timeRoads; - // total += evaluateTask(&Tracker::extendTracks, "Hybrid extending tracks", logger, iteration); - } - - // total += evaluateTask(&Tracker::findShortPrimaries, "Hybrid short primaries finding", logger); - - std::stringstream sstream; if constexpr (constants::DoTimeBenchmarks) { - sstream << std::setw(2) << " - " - << "Timeframe " << mTimeFrameCounter++ << " processing completed in: " << total << "ms using " << mTraits->getNThreads() << " threads."; + logger(std::format("=== TimeFrame {} processing completed in: {:.2f} ms using {} thread(s) ===", mTimeFrameCounter, total, mTraits->getNThreads())); } - logger(sstream.str()); if (mTimeFrame->hasMCinformation()) { computeTracksMClabels(); } rectifyClusterIndices(); - mNumberOfRuns++; + ++mTimeFrameCounter; + mTotalTime += total; } void Tracker::initialiseTimeFrame(int& iteration) @@ -257,41 +174,6 @@ void Tracker::findRoads(int& iteration) mTraits->findRoads(iteration); } -void Tracker::initialiseTimeFrameHybrid(int& iteration) -{ - mTraits->initialiseTimeFrameHybrid(iteration); -} - -void Tracker::computeTrackletsHybrid(int& iteration, int& iROFslice, int& iVertex) -{ - mTraits->computeTrackletsHybrid(iteration, iROFslice, iVertex); // placeholder for the proper ROF/vertex slicing -} - -void Tracker::computeCellsHybrid(int& iteration) -{ - mTraits->computeCellsHybrid(iteration); -} - -void Tracker::findCellsNeighboursHybrid(int& iteration) -{ - mTraits->findCellsNeighboursHybrid(iteration); -} - -void Tracker::findRoadsHybrid(int& iteration) -{ - mTraits->findRoadsHybrid(iteration); -} - -void Tracker::findTracksHybrid(int& iteration) -{ - mTraits->findTracksHybrid(iteration); -} - -void Tracker::findTracks() -{ - mTraits->findTracks(); -} - void Tracker::extendTracks(int& iteration) { mTraits->extendTracks(iteration); @@ -575,5 +457,11 @@ int Tracker::getNThreads() const { return mTraits->getNThreads(); } + +void Tracker::printSummary() const +{ + LOGP(info, "Tracker summary: Processed {} TFs (dropped {}) in TOT={:.2f} s, AVG/TF={:.2f} s", mTimeFrameCounter, mNumberOfDroppedTFs, mTotalTime * 1.e-3, mTotalTime * 1.e-3 / ((mTimeFrameCounter > 0) ? (double)mTimeFrameCounter : -1.0)); +} + } // namespace its } // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx index 8dcb7bfd315c1..987e8e3128fb4 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackerTraits.cxx @@ -19,7 +19,9 @@ #include #include -#include +#ifdef OPTIMISATION_OUTPUT +#include +#endif #include "CommonConstants/MathConstants.h" #include "DetectorsBase/Propagator.h" @@ -38,7 +40,7 @@ using o2::base::PropagatorF; namespace { -float Sq(float q) +inline float Sq(float q) { return q * q; } @@ -57,7 +59,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("tracklets{}.txt", iter++)); + std::ofstream off(std::format("tracklets{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].TrackletsPerRoad(); ++iLayer) { @@ -173,7 +175,7 @@ void TrackerTraits::computeLayerTracklets(const int iteration, int iROFslice, in break; } } - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, label.isValid(), (tanLambda * (nextCluster.radius - currentCluster.radius) + currentCluster.zCoordinate - nextCluster.zCoordinate) / sigmaZ, tanLambda, resolution, sigmaZ) << std::endl; #endif if (deltaZ / sigmaZ < mTrkParams[iteration].NSigmaCut && @@ -270,7 +272,7 @@ void TrackerTraits::computeLayerCells(const int iteration) { #ifdef OPTIMISATION_OUTPUT static int iter{0}; - std::ofstream off(fmt::format("cells{}.txt", iter++)); + std::ofstream off(std::format("cells{}.txt", iter++)); #endif for (int iLayer = 0; iLayer < mTrkParams[iteration].CellsPerRoad(); ++iLayer) { @@ -318,7 +320,7 @@ void TrackerTraits::computeLayerCells(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{tf->getTrackletsLabel(iLayer)[iTracklet] == tf->getTrackletsLabel(iLayer + 1)[iNextTracklet]}; float signedDelta{currentTracklet.tanLambda - nextTracklet.tanLambda}; - off << fmt::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; + off << std::format("{}\t{:d}\t{}\t{}\t{}\t{}", iLayer, good, signedDelta, signedDelta / (mTrkParams[iteration].CellDeltaTanLambdaSigma), tanLambda, resolution) << std::endl; #endif if (deltaTanLambda / mTrkParams[iteration].CellDeltaTanLambdaSigma < mTrkParams[iteration].NSigmaCut) { @@ -402,7 +404,7 @@ void TrackerTraits::computeLayerCells(const int iteration) void TrackerTraits::findCellsNeighbours(const int iteration) { #ifdef OPTIMISATION_OUTPUT - std::ofstream off(fmt::format("cellneighs{}.txt", iteration)); + std::ofstream off(std::format("cellneighs{}.txt", iteration)); #endif for (int iLayer{0}; iLayer < mTrkParams[iteration].CellsPerRoad() - 1; ++iLayer) { const int nextLayerCellsNum{static_cast(mTimeFrame->getCells()[iLayer + 1].size())}; @@ -439,7 +441,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) #ifdef OPTIMISATION_OUTPUT bool good{mTimeFrame->getCellsLabel(iLayer)[iCell] == mTimeFrame->getCellsLabel(iLayer + 1)[iNextCell]}; - off << fmt::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; + off << std::format("{}\t{:d}\t{}", iLayer, good, chi2) << std::endl; #endif if (chi2 > mTrkParams[0].MaxChi2ClusterAttachment) { @@ -469,6 +471,7 @@ void TrackerTraits::findCellsNeighbours(const int iteration) void TrackerTraits::processNeighbours(int iLayer, int iLevel, const std::vector& currentCellSeed, const std::vector& currentCellId, std::vector& updatedCellSeeds, std::vector& updatedCellsIds) { + bool print = iLayer == 3 && iLevel == 2; if (iLevel < 2 || iLayer < 1) { std::cout << "Error: layer " << iLayer << " or level " << iLevel << " cannot be processed by processNeighbours" << std::endl; exit(1); @@ -723,10 +726,7 @@ void TrackerTraits::extendTracks(const int iteration) void TrackerTraits::findShortPrimaries() { - if (!mTrkParams[0].FindShortTracks) { - return; - } - auto propagator = o2::base::Propagator::Instance(); + const auto propagator = o2::base::Propagator::Instance(); mTimeFrame->fillPrimaryVerticesXandAlpha(); for (auto& cell : mTimeFrame->getCells()[0]) { diff --git a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx index 613402ce56e97..f0dad2722a301 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx @@ -310,18 +310,10 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc) mTimeFrame->setMultiplicityCutMask(processingMask); mTimeFrame->setROFMask(processUPCMask); // Run CA tracker - if constexpr (isGPU) { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracksHybrid(logger, fatalLogger); - } else { - mTracker->clustersToTracksHybrid(logger, errorLogger); - } + if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { + mTracker->clustersToTracks(logger, fatalLogger); } else { - if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) { - mTracker->clustersToTracks(logger, fatalLogger); - } else { - mTracker->clustersToTracks(logger, errorLogger); - } + mTracker->clustersToTracks(logger, errorLogger); } size_t totTracks{mTimeFrame->getNumberOfTracks()}, totClusIDs{mTimeFrame->getNumberOfUsedClusters()}; allTracks.reserve(totTracks); @@ -438,6 +430,11 @@ void ITSTrackingInterface::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) } } +void ITSTrackingInterface::printSummary() const +{ + mTracker->printSummary(); +} + void ITSTrackingInterface::setTraitsFromProvider(VertexerTraits* vertexerTraits, TrackerTraits* trackerTraits, TimeFrame* frame) diff --git a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx index 9e4c98ad6e9a1..abbb88aea42fa 100644 --- a/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx +++ b/Detectors/ITSMFT/ITS/workflow/src/TrackerSpec.cxx @@ -48,6 +48,7 @@ void TrackerDPL::init(InitContext& ic) void TrackerDPL::stop() { + mITSTrackingInterface.printSummary(); LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } @@ -69,6 +70,7 @@ void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) void TrackerDPL::endOfStream(EndOfStreamContext& ec) { + mITSTrackingInterface.printSummary(); LOGF(info, "ITS CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); } From 7175de4628fc409f267ae9eb3ebd2ccbe163b569 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Thu, 24 Apr 2025 09:15:53 +0200 Subject: [PATCH 0445/1914] Fixing member variable naming (#14217) * Fixing member variable naming * Please consider the following formatting changes * Changing to mPImplOrt --------- Co-authored-by: ALICE Action Bot --- Common/ML/include/ML/OrtInterface.h | 30 +-- Common/ML/src/OrtInterface.cxx | 250 +++++++++--------- .../Global/GPUChainTrackingClusterizer.cxx | 172 ++++++------ .../TPCClusterFinder/GPUTPCNNClusterizer.cxx | 78 +++--- .../TPCClusterFinder/GPUTPCNNClusterizer.h | 56 ++-- .../GPUTPCNNClusterizerHost.cxx | 82 +++--- .../GPUTPCNNClusterizerHost.h | 12 +- .../GPUTPCNNClusterizerKernels.cxx | 210 +++++++-------- 8 files changed, 444 insertions(+), 446 deletions(-) diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index ea70e28c0421c..b4f40f3f5c694 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -70,23 +70,23 @@ class OrtModel Ort::SessionOptions* getSessionOptions(); Ort::MemoryInfo* getMemoryInfo(); Ort::Env* getEnv(); - int32_t getIntraOpNumThreads() const { return intraOpNumThreads; } - int32_t getInterOpNumThreads() const { return interOpNumThreads; } + int32_t getIntraOpNumThreads() const { return mIntraOpNumThreads; } + int32_t getInterOpNumThreads() const { return mInterOpNumThreads; } // Setters - void setDeviceId(int32_t id) { deviceId = id; } + void setDeviceId(int32_t id) { mDeviceId = id; } void setIO(); - void setActiveThreads(int threads) { intraOpNumThreads = threads; } + void setActiveThreads(int threads) { mIntraOpNumThreads = threads; } void setIntraOpNumThreads(int threads) { - if (deviceType == "CPU") { - intraOpNumThreads = threads; + if (mDeviceType == "CPU") { + mIntraOpNumThreads = threads; } } void setInterOpNumThreads(int threads) { - if (deviceType == "CPU") { - interOpNumThreads = threads; + if (mDeviceType == "CPU") { + mInterOpNumThreads = threads; } } void setEnv(Ort::Env*); @@ -113,19 +113,19 @@ class OrtModel private: // ORT variables -> need to be hidden as pImpl struct OrtVariables; - OrtVariables* pImplOrt; + OrtVariables* mPImplOrt; // Input & Output specifications of the loaded network - std::vector inputNamesChar, outputNamesChar; + std::vector mInputNamesChar, mOutputNamesChar; std::vector mInputNames, mOutputNames; - std::vector> mInputShapes, mOutputShapes, inputShapesCopy, outputShapesCopy; // Input shapes - std::vector inputSizePerNode, outputSizePerNode; // Output shapes - int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs + std::vector> mInputShapes, mOutputShapes, mInputShapesCopy, mOutputShapesCopy; // Input shapes + std::vector mInputSizePerNode, mOutputSizePerNode; // Output shapes + int32_t mInputsTotal = 0, mOutputsTotal = 0; // Total number of inputs and outputs // Environment settings bool mInitialized = false; - std::string modelPath, envName = "", deviceType = "CPU", thread_affinity = ""; // device options should be cpu, rocm, migraphx, cuda - int32_t intraOpNumThreads = 1, interOpNumThreads = 1, deviceId = -1, enableProfiling = 0, loggingLevel = 0, allocateDeviceMemory = 0, enableOptimizations = 0; + std::string mModelPath, mEnvName = "", mDeviceType = "CPU", mThreadAffinity = ""; // device options should be cpu, rocm, migraphx, cuda + int32_t mIntraOpNumThreads = 1, mInterOpNumThreads = 1, mDeviceId = -1, mEnableProfiling = 0, mLoggingLevel = 0, mAllocateDeviceMemory = 0, mEnableOptimizations = 0; std::string printShape(const std::vector&); std::string printShape(const std::vector>&, std::vector&); diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index a8a20b11f9e64..df7f0a2deba82 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -41,7 +41,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c // General purpose void OrtModel::initOptions(std::unordered_map optionsMap) { - pImplOrt = new OrtVariables(); + mPImplOrt = new OrtVariables(); // Load from options map if (!optionsMap.contains("model-path")) { @@ -49,49 +49,49 @@ void OrtModel::initOptions(std::unordered_map optionsM } if (!optionsMap["model-path"].empty()) { - modelPath = optionsMap["model-path"]; - deviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); - deviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); - allocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); - intraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); - interOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); - loggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); - enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); - enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); - envName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); - - if (deviceType == "CPU") { - (pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads); - (pImplOrt->sessionOptions).SetInterOpNumThreads(interOpNumThreads); - if (intraOpNumThreads > 1 || interOpNumThreads > 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); - } else if (intraOpNumThreads == 1) { - (pImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); + mModelPath = optionsMap["model-path"]; + mDeviceType = (optionsMap.contains("device-type") ? optionsMap["device-type"] : "CPU"); + mDeviceId = (optionsMap.contains("device-id") ? std::stoi(optionsMap["device-id"]) : -1); + mAllocateDeviceMemory = (optionsMap.contains("allocate-device-memory") ? std::stoi(optionsMap["allocate-device-memory"]) : 0); + mIntraOpNumThreads = (optionsMap.contains("intra-op-num-threads") ? std::stoi(optionsMap["intra-op-num-threads"]) : 0); + mInterOpNumThreads = (optionsMap.contains("inter-op-num-threads") ? std::stoi(optionsMap["inter-op-num-threads"]) : 0); + mLoggingLevel = (optionsMap.contains("logging-level") ? std::stoi(optionsMap["logging-level"]) : 0); + mEnableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0); + mEnableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0); + mEnvName = (optionsMap.contains("onnx-environment-name") ? optionsMap["onnx-environment-name"] : "onnx_model_inference"); + + if (mDeviceType == "CPU") { + (mPImplOrt->sessionOptions).SetIntraOpNumThreads(mIntraOpNumThreads); + (mPImplOrt->sessionOptions).SetInterOpNumThreads(mInterOpNumThreads); + if (mIntraOpNumThreads > 1 || mInterOpNumThreads > 1) { + (mPImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_PARALLEL); + } else if (mIntraOpNumThreads == 1) { + (mPImplOrt->sessionOptions).SetExecutionMode(ExecutionMode::ORT_SEQUENTIAL); } - if (loggingLevel < 2) { - LOG(info) << "(ORT) CPU execution provider set with " << intraOpNumThreads << " (intraOpNumThreads) and " << interOpNumThreads << " (interOpNumThreads) threads"; + if (mLoggingLevel < 2) { + LOG(info) << "(ORT) CPU execution provider set with " << mIntraOpNumThreads << " (mIntraOpNumThreads) and " << mInterOpNumThreads << " (mInterOpNumThreads) threads"; } } // OrtROCMProviderOptions rocm_options{}; - // (pImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); + // (mPImplOrt->sessionOptions).AppendExecutionProvider_ROCM(rocm_options); - (pImplOrt->sessionOptions).DisableMemPattern(); - (pImplOrt->sessionOptions).DisableCpuMemArena(); + (mPImplOrt->sessionOptions).DisableMemPattern(); + (mPImplOrt->sessionOptions).DisableCpuMemArena(); - if (enableProfiling) { + if (mEnableProfiling) { if (optionsMap.contains("profiling-output-path")) { - (pImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); + (mPImplOrt->sessionOptions).EnableProfiling((optionsMap["profiling-output-path"] + "/ORT_LOG_").c_str()); } else { LOG(warning) << "(ORT) If profiling is enabled, optionsMap[\"profiling-output-path\"] should be set. Disabling profiling for now."; - (pImplOrt->sessionOptions).DisableProfiling(); + (mPImplOrt->sessionOptions).DisableProfiling(); } } else { - (pImplOrt->sessionOptions).DisableProfiling(); + (mPImplOrt->sessionOptions).DisableProfiling(); } - (pImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(enableOptimizations)); - (pImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(loggingLevel)); + (mPImplOrt->sessionOptions).SetGraphOptimizationLevel(GraphOptimizationLevel(mEnableOptimizations)); + (mPImplOrt->sessionOptions).SetLogSeverityLevel(OrtLoggingLevel(mLoggingLevel)); mInitialized = true; } else { @@ -101,9 +101,9 @@ void OrtModel::initOptions(std::unordered_map optionsM void OrtModel::initEnvironment() { - pImplOrt->env = std::make_shared( - OrtLoggingLevel(loggingLevel), - (envName.empty() ? "ORT" : envName.c_str()), + mPImplOrt->env = std::make_shared( + OrtLoggingLevel(mLoggingLevel), + (mEnvName.empty() ? "ORT" : mEnvName.c_str()), // Integrate ORT logging into Fairlogger [](void* param, OrtLoggingLevel severity, const char* category, const char* logid, const char* code_location, const char* message) { if (severity == ORT_LOGGING_LEVEL_VERBOSE) { @@ -121,20 +121,20 @@ void OrtModel::initEnvironment() } }, (void*)3); - (pImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events + (mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events } void OrtModel::initSession() { - if (allocateDeviceMemory) { - memoryOnDevice(deviceId); + if (mAllocateDeviceMemory) { + memoryOnDevice(mDeviceId); } - pImplOrt->session = std::make_shared(*pImplOrt->env, modelPath.c_str(), pImplOrt->sessionOptions); - pImplOrt->ioBinding = std::make_unique(*pImplOrt->session); + mPImplOrt->session = std::make_shared(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->ioBinding = std::make_unique(*mPImplOrt->session); setIO(); - if (loggingLevel < 2) { + if (mLoggingLevel < 2) { LOG(info) << "(ORT) Model loaded successfully! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")"; } } @@ -142,47 +142,47 @@ void OrtModel::initSession() void OrtModel::memoryOnDevice(int32_t deviceIndex) { if (deviceIndex >= 0) { - (pImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); - (pImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h - (pImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time - (pImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (mPImplOrt->runOptions).AddConfigEntry("disable_synchronize_execution_providers", "1"); + (mPImplOrt->sessionOptions).AddConfigEntry("session.use_device_allocator_for_initializers", "1"); // See kOrtSessionOptionsUseDeviceAllocatorForInitializers, https://github.com/microsoft/onnxruntime/blob/main/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h + (mPImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time + (mPImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; - // (pImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + // (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 std::string dev_mem_str = ""; - if (deviceType == "ROCM") { + if (mDeviceType == "ROCM") { dev_mem_str = "Hip"; } - if (deviceType == "CUDA") { + if (mDeviceType == "CUDA") { dev_mem_str = "Cuda"; } - pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); - if (loggingLevel < 2) { - LOG(info) << "(ORT) Memory info set to on-device memory for device type " << deviceType << " with ID " << deviceIndex << " and pImplOrt pointer " << pImplOrt; + mPImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, deviceIndex, OrtMemType::OrtMemTypeDefault); + if (mLoggingLevel < 2) { + LOG(info) << "(ORT) Memory info set to on-device memory for device type " << mDeviceType << " with ID " << deviceIndex << " and mPImplOrt pointer " << mPImplOrt; } } } void OrtModel::resetSession() { - pImplOrt->session = std::make_shared(*(pImplOrt->env), modelPath.c_str(), pImplOrt->sessionOptions); + mPImplOrt->session = std::make_shared(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); } // Getters Ort::SessionOptions* OrtModel::getSessionOptions() { - return &pImplOrt->sessionOptions; + return &mPImplOrt->sessionOptions; } Ort::MemoryInfo* OrtModel::getMemoryInfo() { - return &pImplOrt->memoryInfo; + return &mPImplOrt->memoryInfo; } Ort::Env* OrtModel::getEnv() { - return (pImplOrt->env).get(); + return (mPImplOrt->env).get(); } template @@ -202,37 +202,37 @@ std::vector OrtModel::v2v(std::vector& input, bool clearInput) void OrtModel::setIO() { - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputNames.push_back((pImplOrt->session)->GetInputNameAllocated(i, pImplOrt->allocator).get()); + for (size_t i = 0; i < (mPImplOrt->session)->GetInputCount(); ++i) { + mInputNames.push_back((mPImplOrt->session)->GetInputNameAllocated(i, mPImplOrt->allocator).get()); } - for (size_t i = 0; i < (pImplOrt->session)->GetInputCount(); ++i) { - mInputShapes.emplace_back((pImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + for (size_t i = 0; i < (mPImplOrt->session)->GetInputCount(); ++i) { + mInputShapes.emplace_back((mPImplOrt->session)->GetInputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputNames.push_back((pImplOrt->session)->GetOutputNameAllocated(i, pImplOrt->allocator).get()); + for (size_t i = 0; i < (mPImplOrt->session)->GetOutputCount(); ++i) { + mOutputNames.push_back((mPImplOrt->session)->GetOutputNameAllocated(i, mPImplOrt->allocator).get()); } - for (size_t i = 0; i < (pImplOrt->session)->GetOutputCount(); ++i) { - mOutputShapes.emplace_back((pImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); + for (size_t i = 0; i < (mPImplOrt->session)->GetOutputCount(); ++i) { + mOutputShapes.emplace_back((mPImplOrt->session)->GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape()); } - inputNamesChar.resize(mInputNames.size(), nullptr); - std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(inputNamesChar), + mInputNamesChar.resize(mInputNames.size(), nullptr); + std::transform(std::begin(mInputNames), std::end(mInputNames), std::begin(mInputNamesChar), [&](const std::string& str) { return str.c_str(); }); - outputNamesChar.resize(mOutputNames.size(), nullptr); - std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(outputNamesChar), + mOutputNamesChar.resize(mOutputNames.size(), nullptr); + std::transform(std::begin(mOutputNames), std::end(mOutputNames), std::begin(mOutputNamesChar), [&](const std::string& str) { return str.c_str(); }); - inputShapesCopy = mInputShapes; - outputShapesCopy = mOutputShapes; - inputSizePerNode.resize(mInputShapes.size(), 1); - outputSizePerNode.resize(mOutputShapes.size(), 1); + mInputShapesCopy = mInputShapes; + mOutputShapesCopy = mOutputShapes; + mInputSizePerNode.resize(mInputShapes.size(), 1); + mOutputSizePerNode.resize(mOutputShapes.size(), 1); mInputsTotal = 1; for (size_t i = 0; i < mInputShapes.size(); ++i) { if (mInputShapes[i].size() > 0) { for (size_t j = 1; j < mInputShapes[i].size(); ++j) { if (mInputShapes[i][j] > 0) { mInputsTotal *= mInputShapes[i][j]; - inputSizePerNode[i] *= mInputShapes[i][j]; + mInputSizePerNode[i] *= mInputShapes[i][j]; } } } @@ -243,7 +243,7 @@ void OrtModel::setIO() for (size_t j = 1; j < mOutputShapes[i].size(); ++j) { if (mOutputShapes[i][j] > 0) { mOutputsTotal *= mOutputShapes[i][j]; - outputSizePerNode[i] *= mOutputShapes[i][j]; + mOutputSizePerNode[i] *= mOutputShapes[i][j]; } } } @@ -252,7 +252,7 @@ void OrtModel::setIO() void OrtModel::setEnv(Ort::Env* env) { - pImplOrt->env = std::shared_ptr(env); + mPImplOrt->env = std::shared_ptr(env); } // Inference @@ -266,12 +266,12 @@ std::vector OrtModel::inference(std::vector& input) } std::vector inputTensor; if constexpr (std::is_same_v) { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); + inputTensor.emplace_back(Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(input.data()), input.size(), inputShape.data(), inputShape.size())); } else { - inputTensor.emplace_back(Ort::Value::CreateTensor(pImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); + inputTensor.emplace_back(Ort::Value::CreateTensor(mPImplOrt->memoryInfo, input.data(), input.size(), inputShape.data(), inputShape.size())); } // input.clear(); - auto outputTensors = (pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), inputTensor.data(), inputTensor.size(), outputNamesChar.data(), outputNamesChar.size()); + auto outputTensors = (mPImplOrt->session)->Run(mPImplOrt->runOptions, mInputNamesChar.data(), inputTensor.data(), inputTensor.size(), mOutputNamesChar.data(), mOutputNamesChar.size()); O* outputValues = outputTensors[0].template GetTensorMutableData(); std::vector outputValuesVec{outputValues, outputValues + inputShape[0] * mOutputShapes[0][1]}; outputTensors.clear(); @@ -292,22 +292,22 @@ void OrtModel::inference(I* input, int64_t input_size, O* output) std::vector inputShape{input_size, (int64_t)mInputShapes[0][1]}; Ort::Value inputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(input), input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } else { - inputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); + inputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, input, input_size * mInputShapes[0][1], inputShape.data(), inputShape.size()); } - (pImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); + (mPImplOrt->ioBinding)->BindInput(mInputNames[0].c_str(), inputTensor); std::vector outputShape{input_size, mOutputShapes[0][1]}; Ort::Value outputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { - outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + outputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, reinterpret_cast(output), input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); } else { - outputTensor = Ort::Value::CreateTensor(pImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); + outputTensor = Ort::Value::CreateTensor(mPImplOrt->memoryInfo, output, input_size * mOutputShapes[0][1], outputShape.data(), outputShape.size()); } - (pImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); + (mPImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); - (pImplOrt->session)->Run(pImplOrt->runOptions, *pImplOrt->ioBinding); + (mPImplOrt->session)->Run(mPImplOrt->runOptions, *mPImplOrt->ioBinding); } template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); @@ -318,56 +318,56 @@ template void OrtModel::inference(float*, int64_t, float*); template void OrtModel::inference(I** input, int64_t input_size, O* output) { - std::vector inputTensors(inputShapesCopy.size()); + std::vector inputTensors(mInputShapesCopy.size()); - for (size_t i = 0; i < inputShapesCopy.size(); ++i) { + for (size_t i = 0; i < mInputShapesCopy.size(); ++i) { - inputShapesCopy[i][0] = input_size; // batch-size - outputShapesCopy[i][0] = input_size; // batch-size + mInputShapesCopy[i][0] = input_size; // batch-size + mOutputShapesCopy[i][0] = input_size; // batch-size if constexpr (std::is_same_v) { inputTensors[i] = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(input[i]), - inputSizePerNode[i] * input_size, - inputShapesCopy[i].data(), - inputShapesCopy[i].size()); + mInputSizePerNode[i] * input_size, + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size()); } else { inputTensors[i] = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, input[i], - inputSizePerNode[i] * input_size, - inputShapesCopy[i].data(), - inputShapesCopy[i].size()); + mInputSizePerNode[i] * input_size, + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size()); } } Ort::Value outputTensor = Ort::Value(nullptr); if constexpr (std::is_same_v) { outputTensor = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(output), - outputSizePerNode[0] * input_size, // assumes that there is only one output node - outputShapesCopy[0].data(), - outputShapesCopy[0].size()); + mOutputSizePerNode[0] * input_size, // assumes that there is only one output node + mOutputShapesCopy[0].data(), + mOutputShapesCopy[0].size()); } else { outputTensor = Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, output, - outputSizePerNode[0] * input_size, // assumes that there is only one output node - outputShapesCopy[0].data(), - outputShapesCopy[0].size()); + mOutputSizePerNode[0] * input_size, // assumes that there is only one output node + mOutputShapesCopy[0].data(), + mOutputShapesCopy[0].size()); } // === Run inference === - pImplOrt->session->Run( - pImplOrt->runOptions, - inputNamesChar.data(), + mPImplOrt->session->Run( + mPImplOrt->runOptions, + mInputNamesChar.data(), inputTensors.data(), - inputNamesChar.size(), - outputNamesChar.data(), + mInputNamesChar.size(), + mOutputNamesChar.data(), &outputTensor, - outputNamesChar.size()); + mOutputNamesChar.size()); } template void OrtModel::inference(OrtDataType::Float16_t**, int64_t, OrtDataType::Float16_t*); @@ -382,37 +382,37 @@ std::vector OrtModel::inference(std::vector>& inputs) for (size_t i = 0; i < inputs.size(); ++i) { - inputShapesCopy[i][0] = inputs[i].size() / inputSizePerNode[i]; // batch-size + mInputShapesCopy[i][0] = inputs[i].size() / mInputSizePerNode[i]; // batch-size if constexpr (std::is_same_v) { input_tensors.emplace_back( Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, reinterpret_cast(inputs[i].data()), - inputSizePerNode[i] * inputShapesCopy[i][0], - inputShapesCopy[i].data(), - inputShapesCopy[i].size())); + mInputSizePerNode[i] * mInputShapesCopy[i][0], + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size())); } else { input_tensors.emplace_back( Ort::Value::CreateTensor( - pImplOrt->memoryInfo, + mPImplOrt->memoryInfo, inputs[i].data(), - inputSizePerNode[i] * inputShapesCopy[i][0], - inputShapesCopy[i].data(), - inputShapesCopy[i].size())); + mInputSizePerNode[i] * mInputShapesCopy[i][0], + mInputShapesCopy[i].data(), + mInputShapesCopy[i].size())); } } - int32_t totalOutputSize = mOutputsTotal * inputShapesCopy[0][0]; + int32_t totalOutputSize = mOutputsTotal * mInputShapesCopy[0][0]; // === Run inference === - auto output_tensors = pImplOrt->session->Run( - pImplOrt->runOptions, - inputNamesChar.data(), + auto output_tensors = mPImplOrt->session->Run( + mPImplOrt->runOptions, + mInputNamesChar.data(), input_tensors.data(), input_tensors.size(), - outputNamesChar.data(), - outputNamesChar.size()); + mOutputNamesChar.data(), + mOutputNamesChar.size()); // === Extract output values === O* output_data = output_tensors[0].template GetTensorMutableData(); @@ -428,9 +428,9 @@ template std::vector OrtModel::inferencesession->EndProfiling(); + // mPImplOrt->session->EndProfiling(); // } - LOG(info) << "(ORT) Size of pImplOrt: " << sizeof(*pImplOrt) << " bytes"; + LOG(info) << "(ORT) Size of mPImplOrt: " << sizeof(*mPImplOrt) << " bytes"; } // private diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 37c12b2a3b3f4..630c2200e5900 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -645,41 +645,41 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // bool recreateMemoryAllocator = false; mRec->runParallelOuterLoop(doGPU, numLanes, [&](uint32_t lane) { nnApplications[lane].init(nn_settings); - if (nnApplications[lane].modelsUsed[0]) { - SetONNXGPUStream(*(nnApplications[lane].model_class).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_class).setDeviceId(deviceId); - if (nnApplications[lane].model_class.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_class.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[0]) { + SetONNXGPUStream(*(nnApplications[lane].mModelClass).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelClass).setDeviceId(deviceId); + if (nnApplications[lane].mModelClass.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelClass.setIntraOpNumThreads(maxThreads); } - (nnApplications[lane].model_class).initEnvironment(); + (nnApplications[lane].mModelClass).initEnvironment(); // Registering this once seems to be enough, even with different environmnents / models. ONNX apparently uses this per device and stores the OrtAllocator internally. All models will then use the volatile allocation. // But environment must be valid, so we init the model environment first and use it here afterwards. // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); // recreateMemoryAllocator = true; - (nnApplications[lane].model_class).initSession(); + (nnApplications[lane].mModelClass).initSession(); } - if (nnApplications[lane].modelsUsed[1]) { - SetONNXGPUStream(*(nnApplications[lane].model_reg_1).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_reg_1).setDeviceId(deviceId); - if (nnApplications[lane].model_reg_1.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_reg_1.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[1]) { + SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelReg1).setDeviceId(deviceId); + if (nnApplications[lane].mModelReg1.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelReg1.setIntraOpNumThreads(maxThreads); } - // (nnApplications[lane].model_reg_1).setEnv((nnApplications[lane].model_class).getEnv()); - (nnApplications[lane].model_reg_1).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_reg_1).getEnv(), (nnApplications[lane].model_reg_1).getMemoryInfo(), mRec, recreateMemoryAllocator); - (nnApplications[lane].model_reg_1).initSession(); + // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv()); + (nnApplications[lane].mModelReg1).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].mModelReg1).initSession(); } - if (nnApplications[lane].modelsUsed[2]) { - SetONNXGPUStream(*(nnApplications[lane].model_reg_2).getSessionOptions(), lane, &deviceId); - (nnApplications[lane].model_reg_2).setDeviceId(deviceId); - if (nnApplications[lane].model_reg_2.getIntraOpNumThreads() > maxThreads) { - nnApplications[lane].model_reg_2.setIntraOpNumThreads(maxThreads); + if (nnApplications[lane].mModelsUsed[2]) { + SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId); + (nnApplications[lane].mModelReg2).setDeviceId(deviceId); + if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) { + nnApplications[lane].mModelReg2.setIntraOpNumThreads(maxThreads); } - (nnApplications[lane].model_reg_2).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].model_class).getEnv(), (nnApplications[lane].model_class).getMemoryInfo(), mRec, recreateMemoryAllocator); - (nnApplications[lane].model_reg_2).initSession(); + (nnApplications[lane].mModelReg2).initEnvironment(); + // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + (nnApplications[lane].mModelReg2).initSession(); } if (nn_settings.nnClusterizerVerbosity < 3) { LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId; @@ -689,14 +689,14 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector]; GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN; int32_t lane = sector % numLanes; - clustererNN.deviceId = deviceId; + clustererNN.mDeviceId = deviceId; clustererNN.mISector = sector; - clustererNN.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + clustererNN.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; nnApplications[lane].initClusterizer(nn_settings, clustererNN); if (doGPU) { - clustererNNShadow.deviceId = deviceId; + clustererNNShadow.mDeviceId = deviceId; clustererNNShadow.mISector = sector; - clustererNNShadow.nnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; + clustererNNShadow.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters; nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow); } AllocateRegisteredMemory(clustererNN.mMemoryId); @@ -975,62 +975,62 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) int withMC = (doGPU && propagateMCLabels); - if (clustererNNShadow.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { + if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } // float time_clusterizer = 0, time_fill = 0, time_networks = 0; - for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.nnClusterizerBatchedMode); batch++) { - uint batchStart = batch * clustererNNShadow.nnClusterizerBatchedMode; - size_t iSize = CAMath::Min((uint)clustererNNShadow.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); + for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / clustererNNShadow.mNnClusterizerBatchedMode); batch++) { + uint batchStart = batch * clustererNNShadow.mNnClusterizerBatchedMode; + size_t iSize = CAMath::Min((uint)clustererNNShadow.mNnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart)); // auto start0 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(iSize * clustererNNShadow.nnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, batchStart); // Filling the data + runKernel({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the data // auto stop0 = std::chrono::high_resolution_clock::now(); // auto start1 = std::chrono::high_resolution_clock::now(); // NN evaluations - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.modelProbabilities_32); + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_class).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.modelProbabilities_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32); } } - if (!clustererNNShadow.nnClusterizerUseCfRegression) { - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg1_32); + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_1).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg1_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32); } } - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { - if (clustererNNShadow.nnInferenceInputDType == 0) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_16, iSize, clustererNNShadow.outputDataReg2_32); + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + if (clustererNNShadow.mNnInferenceInputDType == 0) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_32); } - } else if (clustererNNShadow.nnInferenceInputDType == 1) { - if (clustererNNShadow.nnInferenceOutputDType == 0) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_16); - } else if (clustererNNShadow.nnInferenceOutputDType == 1) { - (nnApplication.model_reg_2).inference(clustererNNShadow.inputData_32, iSize, clustererNNShadow.outputDataReg2_32); + } else if (clustererNNShadow.mNnInferenceInputDType == 1) { + if (clustererNNShadow.mNnInferenceOutputDType == 0) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_16); + } else if (clustererNNShadow.mNnInferenceOutputDType == 1) { + (nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32); } } } @@ -1039,24 +1039,24 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // auto stopNNs = std::chrono::high_resolution_clock::now(); // Publishing kernels - if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels } else { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Assigning class labels + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels } - if (!clustererNNShadow.nnClusterizerUseCfRegression) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results - if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.model_reg_2.isInitialized()) { - runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results + if (!clustererNNShadow.mNnClusterizerUseCfRegression) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 1 regression results + if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) { + runKernel({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results } } // for(int i = 0; i < iSize; ++i) { - // if(clustererNNShadow.outputDataClass[i + batchStart] > 1) { - // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.modelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.outputDataClass[i + batchStart] << " thresh " << clustererNNShadow.nnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; + // if(clustererNNShadow.mOutputDataClass[i + batchStart] > 1) { + // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.mModelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.mOutputDataClass[i + batchStart] << " thresh " << clustererNNShadow.mNnClassThreshold << " instead of 0 or 1. Please check the model and the input data."; // // std::string input = "["; - // // for(int j = 0; j < clustererNNShadow.nnClusterizerElementSize; j++){ - // // input += std::to_string(clustererNNShadow.inputData_16[i * clustererNNShadow.nnClusterizerElementSize + j].ToFloat()) + ", "; + // // for(int j = 0; j < clustererNNShadow.mNnClusterizerElementSize; j++){ + // // input += std::to_string(clustererNNShadow.mInputData_16[i * clustererNNShadow.mNnClusterizerElementSize + j].ToFloat()) + ", "; // // } // // input += "]"; // // LOG(info) << "Input is: " << input; @@ -1069,19 +1069,19 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; // time_fill += std::chrono::duration_cast(stop0 - start0).count() / 1e9; } - if (clustererNNShadow.nnClusterizerUseCfRegression) { + if (clustererNNShadow.mNnClusterizerUseCfRegression) { // auto start1 = std::chrono::high_resolution_clock::now(); - runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.nnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 + runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0 // auto stop1 = std::chrono::high_resolution_clock::now(); // time_clusterizer += std::chrono::duration_cast(stop1 - start1).count() / 1e9; } - // if (clustererNNShadow.nnClusterizerVerbosity < 3) { + // if (clustererNNShadow.mNnClusterizerVerbosity < 3) { // int acceptedClusters = 0; // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) { - // if(clustererNNShadow.outputDataClass[i] > 1 || clustererNNShadow.outputDataClass[i] < 0) { - // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.outputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; + // if(clustererNNShadow.mOutputDataClass[i] > 1 || clustererNNShadow.mOutputDataClass[i] < 0) { + // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.mOutputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters; // } - // acceptedClusters += clustererNNShadow.outputDataClass[i]; + // acceptedClusters += clustererNNShadow.mOutputDataClass[i]; // } // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s"; // } @@ -1187,9 +1187,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { // if (GetProcessingSettings().nn.applyNNclusterizer) { // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; - // nnApplication.model_class.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.model_reg_1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.model_reg_2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); + // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); // } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx index 092af2ea393c5..da37c0771fe84 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx @@ -25,69 +25,69 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {} void* GPUTPCNNClusterizer::setIOPointers(void* mem) { - if (nnClusterizerBatchedMode > 0) { - if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData_16, nnClusterizerBatchedMode * nnClusterizerElementSize); - } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { - computePointerWithAlignment(mem, inputData_32, nnClusterizerBatchedMode * nnClusterizerElementSize); + if (mNnClusterizerBatchedMode > 0) { + if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, mInputData_16, mNnClusterizerBatchedMode * mNnClusterizerElementSize); + } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) { + computePointerWithAlignment(mem, mInputData_32, mNnClusterizerBatchedMode * mNnClusterizerElementSize); } - computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode); + computePointerWithAlignment(mem, mClusterFlags, 2 * mNnClusterizerBatchedMode); - if (nnInferenceOutputDType == 0 && nnClusterizerElementSize > 0) { - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities_16, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + if (mNnInferenceOutputDType == 0 && mNnClusterizerElementSize > 0) { + if (mNnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, mModelProbabilities_16, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes); } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1_16, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (!mNnClusterizerUseCfRegression) { + if (mNnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg1_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2_16, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + if (mNnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg2_16, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes); } } - } else if (nnInferenceOutputDType == 1 && nnClusterizerElementSize > 0) { - if (nnClusterizerModelClassNumOutputNodes > 0) { - computePointerWithAlignment(mem, modelProbabilities_32, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes); + } else if (mNnInferenceOutputDType == 1 && mNnClusterizerElementSize > 0) { + if (mNnClusterizerModelClassNumOutputNodes > 0) { + computePointerWithAlignment(mem, mModelProbabilities_32, mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes); } - if (!nnClusterizerUseCfRegression) { - if (nnClusterizerModelReg1NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg1_32, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes); + if (!mNnClusterizerUseCfRegression) { + if (mNnClusterizerModelReg1NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg1_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes); } - if (nnClusterizerModelReg2NumOutputNodes > 0) { - computePointerWithAlignment(mem, outputDataReg2_32, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes); + if (mNnClusterizerModelReg2NumOutputNodes > 0) { + computePointerWithAlignment(mem, mOutputDataReg2_32, mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes); } } } } - if (nnClusterizerTotalClusters > 0) { - computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters); + if (mNnClusterizerTotalClusters > 0) { + computePointerWithAlignment(mem, mOutputDataClass, mNnClusterizerTotalClusters); } return mem; } // std::vector GPUTPCNNClusterizer::pointerSizes() { // std::vector sizes(7, -1); -// if (nnClusterizerBatchedMode > 0) { -// if (nnInferenceInputDType == 0 && nnClusterizerElementSize > 0) { -// sizes[0] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData16 -// } else if (nnInferenceInputDType == 1 && nnClusterizerElementSize > 0) { -// sizes[1] = nnClusterizerBatchedMode * nnClusterizerElementSize; // inputData32 +// if (mNnClusterizerBatchedMode > 0) { +// if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) { +// sizes[0] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData16 +// } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) { +// sizes[1] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData32 // } -// sizes[2] = 2 * nnClusterizerBatchedMode; // clusterFlags -// if (nnClusterizerModelClassNumOutputNodes > 0) { -// sizes[3] = nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes; // modelProbabilities +// sizes[2] = 2 * mNnClusterizerBatchedMode; // mClusterFlags +// if (mNnClusterizerModelClassNumOutputNodes > 0) { +// sizes[3] = mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes; // modelProbabilities // } -// if (!nnClusterizerUseCfRegression) { -// if (nnClusterizerModelReg1NumOutputNodes > 0) { -// sizes[4] = nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes; // outputDataReg1 +// if (!mNnClusterizerUseCfRegression) { +// if (mNnClusterizerModelReg1NumOutputNodes > 0) { +// sizes[4] = mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes; // outputDataReg1 // } -// if (nnClusterizerModelReg2NumOutputNodes > 0) { -// sizes[5] = nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes; // outputDataReg2 +// if (mNnClusterizerModelReg2NumOutputNodes > 0) { +// sizes[5] = mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes; // outputDataReg2 // } // } // } -// if (nnClusterizerTotalClusters > 0) { -// sizes[6] = nnClusterizerTotalClusters; // outputDataClass +// if (mNnClusterizerTotalClusters > 0) { +// sizes[6] = mNnClusterizerTotalClusters; // mOutputDataClass // } // return sizes; // } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h index 022642f9f142e..f7c2d13407b0e 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h @@ -37,42 +37,42 @@ class GPUTPCNNClusterizer : public GPUProcessor // Neural network clusterization - int nnClusterizerSizeInputRow = 3; - int nnClusterizerSizeInputPad = 3; - int nnClusterizerSizeInputTime = 3; - int nnClusterizerElementSize = -1; - bool nnClusterizerAddIndexData = true; - float nnClassThreshold = 0.01; - bool nnSigmoidTrafoClassThreshold = 1; - int nnClusterizerUseCfRegression = 0; - int nnClusterizerBatchedMode = 1; - int nnClusterizerTotalClusters = 1; - int nnClusterizerVerbosity = 0; - int nnClusterizerBoundaryFillValue = -1; - int nnClusterizerModelClassNumOutputNodes = -1; - int nnClusterizerModelReg1NumOutputNodes = -1; - int nnClusterizerModelReg2NumOutputNodes = -1; - int nnInferenceInputDType = 0; // 0: float16, 1: float32 - int nnInferenceOutputDType = 0; // 0: float16, 1: float32 + int mNnClusterizerSizeInputRow = 3; + int mNnClusterizerSizeInputPad = 3; + int mNnClusterizerSizeInputTime = 3; + int mNnClusterizerElementSize = -1; + bool mNnClusterizerAddIndexData = true; + float mNnClassThreshold = 0.01; + bool mNnSigmoidTrafoClassThreshold = 1; + int mNnClusterizerUseCfRegression = 0; + int mNnClusterizerBatchedMode = 1; + int mNnClusterizerTotalClusters = 1; + int mNnClusterizerVerbosity = 0; + int mNnClusterizerBoundaryFillValue = -1; + int mNnClusterizerModelClassNumOutputNodes = -1; + int mNnClusterizerModelReg1NumOutputNodes = -1; + int mNnClusterizerModelReg2NumOutputNodes = -1; + int mNnInferenceInputDType = 0; // 0: float16, 1: float32 + int mNnInferenceOutputDType = 0; // 0: float16, 1: float32 int mISector = -1; - int deviceId = -1; + int mDeviceId = -1; // Memory allocation for neural network - bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr - int* outputDataClass = nullptr; + bool* mClusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptr + int* mOutputDataClass = nullptr; // FP32 - float* inputData_32 = nullptr; - float* modelProbabilities_32 = nullptr; - float* outputDataReg1_32 = nullptr; - float* outputDataReg2_32 = nullptr; + float* mInputData_32 = nullptr; + float* mModelProbabilities_32 = nullptr; + float* mOutputDataReg1_32 = nullptr; + float* mOutputDataReg2_32 = nullptr; // FP16 - OrtDataType::Float16_t* inputData_16 = nullptr; - OrtDataType::Float16_t* modelProbabilities_16 = nullptr; - OrtDataType::Float16_t* outputDataReg1_16 = nullptr; - OrtDataType::Float16_t* outputDataReg2_16 = nullptr; + OrtDataType::Float16_t* mInputData_16 = nullptr; + OrtDataType::Float16_t* mModelProbabilities_16 = nullptr; + OrtDataType::Float16_t* mOutputDataReg1_16 = nullptr; + OrtDataType::Float16_t* mOutputDataReg2_16 = nullptr; int16_t mMemoryId = -1; }; // class GPUTPCNNClusterizer diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index 31b71fd8f1ebe..ca2deec60601c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -45,7 +45,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set } } - OrtOptions = { + mOrtOptions = { {"model-path", class_model_path}, {"device-type", settings.nnInferenceDevice}, {"allocate-device-memory", std::to_string(settings.nnInferenceAllocateDevMem)}, @@ -57,60 +57,60 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set {"logging-level", std::to_string(settings.nnInferenceVerbosity)}, {"onnx-environment-name", "c1"}}; - model_class.initOptions(OrtOptions); - modelsUsed[0] = true; + mModelClass.initOptions(mOrtOptions); + mModelsUsed[0] = true; reg_model_paths_local = o2::utils::Str::tokenize(reg_model_path, ':'); if (!settings.nnClusterizerUseCfRegression) { if (reg_model_paths_local.size() == 1) { - OrtOptions["model-path"] = reg_model_paths_local[0]; - OrtOptions["onnx-environment-name"] = "r1"; - model_reg_1.initOptions(OrtOptions); - modelsUsed[1] = true; + mOrtOptions["model-path"] = reg_model_paths_local[0]; + mOrtOptions["onnx-environment-name"] = "r1"; + mModelReg1.initOptions(mOrtOptions); + mModelsUsed[1] = true; } else { - OrtOptions["model-path"] = reg_model_paths_local[0]; - OrtOptions["onnx-environment-name"] = "r1"; - model_reg_1.initOptions(OrtOptions); - modelsUsed[1] = true; - OrtOptions["model-path"] = reg_model_paths_local[1]; - OrtOptions["onnx-environment-name"] = "r2"; - model_reg_2.initOptions(OrtOptions); - modelsUsed[2] = true; + mOrtOptions["model-path"] = reg_model_paths_local[0]; + mOrtOptions["onnx-environment-name"] = "r1"; + mModelReg1.initOptions(mOrtOptions); + mModelsUsed[1] = true; + mOrtOptions["model-path"] = reg_model_paths_local[1]; + mOrtOptions["onnx-environment-name"] = "r2"; + mModelReg2.initOptions(mOrtOptions); + mModelsUsed[2] = true; } } } void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN) { - clustererNN.nnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; - clustererNN.nnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; - clustererNN.nnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; - clustererNN.nnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; - clustererNN.nnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; - clustererNN.nnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); - clustererNN.nnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; - clustererNN.nnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; - clustererNN.nnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; - if (clustererNN.nnSigmoidTrafoClassThreshold) { - clustererNN.nnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); + clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression; + clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow; + clustererNN.mNnClusterizerSizeInputPad = settings.nnClusterizerSizeInputPad; + clustererNN.mNnClusterizerSizeInputTime = settings.nnClusterizerSizeInputTime; + clustererNN.mNnClusterizerAddIndexData = settings.nnClusterizerAddIndexData; + clustererNN.mNnClusterizerElementSize = ((2 * settings.nnClusterizerSizeInputRow + 1) * (2 * settings.nnClusterizerSizeInputPad + 1) * (2 * settings.nnClusterizerSizeInputTime + 1)) + (settings.nnClusterizerAddIndexData ? 3 : 0); + clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode; + clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue; + clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold; + if (clustererNN.mNnSigmoidTrafoClassThreshold) { + clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold)); } else { - clustererNN.nnClassThreshold = settings.nnClassThreshold; + clustererNN.mNnClassThreshold = settings.nnClassThreshold; } if (settings.nnClusterizerVerbosity < 0) { - clustererNN.nnClusterizerVerbosity = settings.nnInferenceVerbosity; + clustererNN.mNnClusterizerVerbosity = settings.nnInferenceVerbosity; } else { - clustererNN.nnClusterizerVerbosity = settings.nnClusterizerVerbosity; + clustererNN.mNnClusterizerVerbosity = settings.nnClusterizerVerbosity; } - clustererNN.nnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; - clustererNN.nnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; - clustererNN.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1]; + clustererNN.mNnInferenceInputDType = settings.nnInferenceInputDType.find("32") != std::string::npos; + clustererNN.mNnInferenceOutputDType = settings.nnInferenceOutputDType.find("32") != std::string::npos; + clustererNN.mNnClusterizerModelClassNumOutputNodes = mModelClass.getNumOutputNodes()[0][1]; if (!settings.nnClusterizerUseCfRegression) { - if (model_class.getNumOutputNodes()[0][1] == 1 || !model_reg_2.isInitialized()) { - clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; + if (mModelClass.getNumOutputNodes()[0][1] == 1 || !mModelReg2.isInitialized()) { + clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1]; } else { - clustererNN.nnClusterizerModelReg1NumOutputNodes = model_reg_1.getNumOutputNodes()[0][1]; - clustererNN.nnClusterizerModelReg2NumOutputNodes = model_reg_2.getNumOutputNodes()[0][1]; + clustererNN.mNnClusterizerModelReg1NumOutputNodes = mModelReg1.getNumOutputNodes()[0][1]; + clustererNN.mNnClusterizerModelReg2NumOutputNodes = mModelReg2.getNumOutputNodes()[0][1]; } } } @@ -199,20 +199,20 @@ void MockedOrtAllocator::LeakCheck() void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) { - mockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); + mMockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); if (recreate) { Ort::ThrowOnError(Ort::GetApi().UnregisterAllocator((OrtEnv*)(*env), (OrtMemoryInfo*)(*memInfo))); } - Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mockedAlloc.get())); - memInfo = (Ort::MemoryInfo*)mockedAlloc->Info(); + Ort::ThrowOnError(Ort::GetApi().RegisterAllocator((OrtEnv*)(*env), mMockedAlloc.get())); + memInfo = (Ort::MemoryInfo*)mMockedAlloc->Info(); } const OrtMemoryInfo* GPUTPCNNClusterizerHost::getMockedMemoryInfo() { - return mockedAlloc->Info(); + return mMockedAlloc->Info(); } MockedOrtAllocator* GPUTPCNNClusterizerHost::getMockedAllocator() { - return mockedAlloc.get(); + return mMockedAlloc.get(); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index 0379b83d0ae02..e659753f21d7d 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -57,13 +57,11 @@ class GPUTPCNNClusterizerHost MockedOrtAllocator* getMockedAllocator(); const OrtMemoryInfo* getMockedMemoryInfo(); - std::unordered_map OrtOptions; - o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters - std::vector modelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 - int32_t deviceId = -1; - std::vector reg_model_paths; - - std::shared_ptr mockedAlloc = nullptr; + std::unordered_map mOrtOptions; + o2::ml::OrtModel mModelClass, mModelReg1, mModelReg2; // For splitting clusters + std::vector mModelsUsed = {false, false, false}; // 0: class, 1: reg_1, 2: reg_2 + int32_t mDeviceId = -1; + std::shared_ptr mMockedAlloc = nullptr; }; // class GPUTPCNNClusterizerHost } // namespace o2::gpu diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 413293502d3c6..47bc5e8da80ca 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -40,7 +40,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); @@ -56,56 +56,56 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif - for (int r = -clustererNN.nnClusterizerSizeInputRow; r <= clustererNN.nnClusterizerSizeInputRow; r++) { + for (int r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); - for (int p = -clustererNN.nnClusterizerSizeInputPad + pad_offset; p <= clustererNN.nnClusterizerSizeInputPad + pad_offset; p++) { - bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); - for (int t = -clustererNN.nnClusterizerSizeInputTime; t <= clustererNN.nnClusterizerSizeInputTime; t++) { + for (int p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { + bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); + for (int t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { if (!is_boundary) { CfChargePos tmp_pos(row + r, pad + p, time + t); - if (r == 0 && !clustererNN.clusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization - clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); - clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization + clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { - clustererNN.inputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.mInputData_32[write_idx] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { // Filling boundary just to make sure that no values are left unintentionally if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[write_idx] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[write_idx] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } write_idx++; } } } - if (clustererNN.nnClusterizerAddIndexData) { + if (clustererNN.mNnClusterizerAddIndexData) { if (dtype == 0) { - clustererNN.inputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); - clustererNN.inputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData_32[write_idx] = sector / 36.f; - clustererNN.inputData_32[write_idx + 1] = row / 152.f; - clustererNN.inputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.mInputData_32[write_idx] = sector / 36.f; + clustererNN.mInputData_32[write_idx + 1] = row / 152.f; + clustererNN.mInputData_32[write_idx + 2] = static_cast(pad) / GPUTPCGeometry::NPads(row); } } } @@ -116,62 +116,62 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; int row = static_cast(peak.row()), pad = static_cast(peak.pad()); - if (clustererNN.nnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.nnClusterizerElementSize - 1)) { - uint top_idx = (base_idx + 1) * clustererNN.nnClusterizerElementSize; + if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) { + uint top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d); - clustererNN.clusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); - clustererNN.clusterFlags[2 * glo_idx + 1] = clustererNN.clusterFlags[2 * glo_idx]; + clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]); + clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx]; } if (dtype == 0) { - clustererNN.inputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); - clustererNN.inputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); - clustererNN.inputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); + clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f); + clustererNN.mInputData_16[top_idx - 2] = (OrtDataType::Float16_t)(row / 152.f); + clustererNN.mInputData_16[top_idx - 1] = (OrtDataType::Float16_t)(static_cast(pad) / GPUTPCGeometry::NPads(row)); } else { - clustererNN.inputData_32[top_idx - 3] = sector / 36.f; - clustererNN.inputData_32[top_idx - 2] = row / 152.f; - clustererNN.inputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); + clustererNN.mInputData_32[top_idx - 3] = sector / 36.f; + clustererNN.mInputData_32[top_idx - 2] = row / 152.f; + clustererNN.mInputData_32[top_idx - 1] = static_cast(pad) / GPUTPCGeometry::NPads(row); } - } else if ((int32_t)transient_index < (clustererNN.nnClusterizerElementSize - 3)) { + } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) { int time = static_cast(peak.time()); - int r = CAMath::Floor(transient_index / ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1))) - clustererNN.nnClusterizerSizeInputRow; + int r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); if (is_row_boundary) { if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } else { - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.nnClusterizerSizeInputRow); + int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); - int rest_1 = transient_index % ((2 * clustererNN.nnClusterizerSizeInputPad + 1) * (2 * clustererNN.nnClusterizerSizeInputTime + 1)); - int p = CAMath::Floor(rest_1 / (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputPad + pad_offset; - bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.nnClusterizerSizeInputRow); + int rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); + int p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); if (!is_boundary) { float central_charge = static_cast(chargeMap[peak].unpack()); - int t = (rest_1 % (2 * clustererNN.nnClusterizerSizeInputTime + 1)) - clustererNN.nnClusterizerSizeInputTime; + int t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); } else if (dtype == 1) { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(chargeMap[tmp_pos].unpack()) / central_charge; } } else { if (dtype == 0) { - clustererNN.inputData_16[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.nnClusterizerBoundaryFillValue)); + clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(clustererNN.mNnClusterizerBoundaryFillValue)); } else { - clustererNN.inputData_32[base_idx * clustererNN.nnClusterizerElementSize + transient_index] = static_cast(clustererNN.nnClusterizerBoundaryFillValue); + clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast(clustererNN.mNnClusterizerBoundaryFillValue); } } } @@ -183,9 +183,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread processors.tpcNNClusterer[sector].nnClassThreshold); + processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold); } else if (dtype == 1) { - processors.tpcNNClusterer[sector].outputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].modelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].nnClassThreshold); + processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)(processors.tpcNNClusterer[sector].mModelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].mNnClassThreshold); } } @@ -194,29 +194,29 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.modelProbabilities_16[pIdx]); + current_max_prob = static_cast(clustererNN.mModelProbabilities_16[pIdx]); } else if (dtype == 1) { - current_max_prob = clustererNN.modelProbabilities_32[pIdx]; + current_max_prob = clustererNN.mModelProbabilities_32[pIdx]; } } else { if (dtype == 0) { - current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_16[pIdx].ToFloat()); + current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat()); } else if (dtype == 1) { - current_max_prob = CAMath::Max(current_max_prob, clustererNN.modelProbabilities_32[pIdx]); + current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]); } } } - // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" - clustererNN.outputDataClass[glo_idx + batchStart] = class_label; + // uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins" + clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label; if (class_label > 1) { - clustererNN.clusterFlags[2 * glo_idx] = 1; - clustererNN.clusterFlags[2 * glo_idx + 1] = 1; + clustererNN.mClusterFlags[2 * glo_idx] = 1; + clustererNN.mClusterFlags[2 * glo_idx + 1] = 1; } } @@ -235,11 +235,11 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread= 1)) { + if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; @@ -265,21 +265,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg1_16[model_output_index].ToFloat(), - clustererNN.outputDataReg1_16[model_output_index + 2].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_16[model_output_index + 1].ToFloat(), - clustererNN.outputDataReg1_16[model_output_index + 3].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg1_16[model_output_index + 4].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg1_16[model_output_index].ToFloat(), + clustererNN.mOutputDataReg1_16[model_output_index + 2].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg1_16[model_output_index + 1].ToFloat(), + clustererNN.mOutputDataReg1_16[model_output_index + 3].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg1_32[model_output_index + 4], - static_cast(peak.pad()) + clustererNN.outputDataReg1_32[model_output_index], - clustererNN.outputDataReg1_32[model_output_index + 2], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg1_32[model_output_index + 1], - clustererNN.outputDataReg1_32[model_output_index + 3], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg1_32[model_output_index + 4], + static_cast(peak.pad()) + clustererNN.mOutputDataReg1_32[model_output_index], + clustererNN.mOutputDataReg1_32[model_output_index + 2], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg1_32[model_output_index + 1], + clustererNN.mOutputDataReg1_32[model_output_index + 3], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } tpc::ClusterNative myCluster; @@ -330,9 +330,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 0) { + if (clustererNN.mOutputDataClass[full_glo_idx] > 0) { ClusterAccumulator pc; @@ -358,21 +358,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 4].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 2].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 6].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 8].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 4].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 2].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 6].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 8], - static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index], - clustererNN.outputDataReg2_32[model_output_index + 4], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 2], - clustererNN.outputDataReg2_32[model_output_index + 6], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 8], + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index], + clustererNN.mOutputDataReg2_32[model_output_index + 4], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 2], + clustererNN.mOutputDataReg2_32[model_output_index + 6], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } tpc::ClusterNative myCluster; @@ -403,21 +403,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(peak.pad()) + clustererNN.outputDataReg2_16[model_output_index + 1].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 5].ToFloat(), - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_16[model_output_index + 3].ToFloat(), - clustererNN.outputDataReg2_16[model_output_index + 7].ToFloat(), - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_16[model_output_index + 9].ToFloat(), + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_16[model_output_index + 1].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 5].ToFloat(), + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_16[model_output_index + 3].ToFloat(), + clustererNN.mOutputDataReg2_16[model_output_index + 7].ToFloat(), + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } else if (dtype == 1) { - pc.setFull(central_charge * clustererNN.outputDataReg2_32[model_output_index + 9], - static_cast(peak.pad()) + clustererNN.outputDataReg2_32[model_output_index + 1], - clustererNN.outputDataReg2_32[model_output_index + 5], - (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.outputDataReg2_32[model_output_index + 3], - clustererNN.outputDataReg2_32[model_output_index + 7], - clustererNN.clusterFlags[2 * glo_idx], - clustererNN.clusterFlags[2 * glo_idx + 1]); + pc.setFull(central_charge * clustererNN.mOutputDataReg2_32[model_output_index + 9], + static_cast(peak.pad()) + clustererNN.mOutputDataReg2_32[model_output_index + 1], + clustererNN.mOutputDataReg2_32[model_output_index + 5], + (clusterer.mPmemory->fragment).start + static_cast(peak.time()) + clustererNN.mOutputDataReg2_32[model_output_index + 3], + clustererNN.mOutputDataReg2_32[model_output_index + 7], + clustererNN.mClusterFlags[2 * glo_idx], + clustererNN.mClusterFlags[2 * glo_idx + 1]); } rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap); From 67b81698f9c95edff48e630623b063a3c6fa9b51 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 10:10:26 +0200 Subject: [PATCH 0446/1914] GPU TPC: Make cluster rejection based on chi2 from current track position optional if in rejection based on interpolation mode --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 2d8c2184e3b02..4c32c3e46e3a7 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -151,6 +151,7 @@ AddOptionRTC(forceEarlyTransform, int8_t, -1, "", 0, "Force early TPC transforma AddOptionRTC(dropLoopers, uint8_t, 0, "", 0, "Drop looping tracks starting from second loop") AddOptionRTC(mergerCovSource, uint8_t, 2, "", 0, "Method to obtain covariance in track merger: 0 = simple filterErrors method, 1 = use cov from track following, 2 = refit (default)") AddOptionRTC(mergerInterpolateErrors, uint8_t, 1, "", 0, "Use interpolation instead of extrapolation for chi2 based cluster rejection") +AddOptionRTC(mergerInterpolateRejectAlsoOnCurrentPosition, uint8_t, 1, "", 0, "When using mergerInterpolateErrors, reject based on chi2 twice computed with interpolated and current track position") AddOptionRTC(mergeCE, uint8_t, 1, "", 0, "Merge tracks accross the central electrode") AddOptionRTC(retryRefit, int8_t, 1, "", 0, "Retry refit with seeding errors and without cluster rejection when fit fails (=2 means retry in same kernel, =1 for separate kernel") AddOptionRTC(looperInterpolationInExtraPass, int8_t, -1, "", 0, "Perform looper interpolation in an extra pass") diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx index 9e23f9af3cf43..90612fc98f836 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx @@ -691,7 +691,7 @@ GPUd() int32_t GPUTPCGMPropagator::Update(float posY, float posZ, int32_t iRow, return 0; } - return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || rejectChi2 == rejectInterReject, err2Y, err2Z, ¶m); + return Update(posY, posZ, clusterState, rejectChi2 == rejectDirect || (param.rec.tpc.mergerInterpolateRejectAlsoOnCurrentPosition && rejectChi2 == rejectInterReject), err2Y, err2Z, ¶m); } GPUd() int32_t GPUTPCGMPropagator::InterpolateReject(const GPUParam& GPUrestrict() param, float posY, float posZ, int16_t clusterState, int8_t rejectChi2, gputpcgmmergertypes::InterpolationErrorHit* inter, float err2Y, float err2Z) From 8060987d94bb48a2b71c98066a8952630246723d Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 24 Apr 2025 12:06:38 +0200 Subject: [PATCH 0447/1914] Fix: TrackLTIntegral.addStep needs (q/p)^2 instead of 1/p^2 Since the charge dependence of beta is accounted internally. --- .../TrackLTIntegral.h | 2 +- .../TrackParametrization.h | 13 +++++++++++++ .../Reconstruction/src/TrackLTIntegral.cxx | 6 +++--- .../test/testLTOFIntegration.cxx | 4 ++-- Detectors/Base/src/Propagator.cxx | 18 +++++++++--------- Detectors/GlobalTracking/src/MatchTPCITS.cxx | 4 ++-- .../TRD/workflow/src/TRDGlobalTrackingSpec.cxx | 4 ++-- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h index 6cf9ceda8e195..e799804805972 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackLTIntegral.h @@ -63,7 +63,7 @@ class TrackLTIntegral } } - GPUd() void addStep(float dL, float p2Inv); + GPUd() void addStep(float dL, float q2p2); GPUd() void addX2X0(float d) { mX2X0 += d; } GPUd() void addXRho(float d) { mXRho += d; } diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index a51ec3b7010a7..bfd56eb8f024f 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -191,6 +191,7 @@ class TrackParametrization GPUd() value_t getPhi() const; GPUd() value_t getPhiPos() const; + GPUd() value_t getQ2P2() const; GPUd() value_t getPtInv() const; GPUd() value_t getP2Inv() const; GPUd() value_t getP2() const; @@ -555,6 +556,18 @@ GPUdi() auto TrackParametrization::getPhiPos() const -> value_t return phi; } +//____________________________________________________________ +template +GPUdi() auto TrackParametrization::getQ2P2() const -> value_t +{ + // return the (q/p)^2 + value_t q2pt2 = mP[kQ2Pt] * mP[kQ2Pt]; + if (q2pt2 < MinPTInv * MinPTInv) { + q2pt2 = MinPTInv * MinPTInv; + } + return q2pt2 / (1.f + getTgl() * getTgl()); +} + //____________________________________________________________ template GPUdi() auto TrackParametrization::getPtInv() const -> value_t diff --git a/DataFormats/Reconstruction/src/TrackLTIntegral.cxx b/DataFormats/Reconstruction/src/TrackLTIntegral.cxx index 3efddff00f512..426c3da04726c 100644 --- a/DataFormats/Reconstruction/src/TrackLTIntegral.cxx +++ b/DataFormats/Reconstruction/src/TrackLTIntegral.cxx @@ -39,9 +39,9 @@ GPUd() void TrackLTIntegral::print() const } //_____________________________________________________ -GPUd() void TrackLTIntegral::addStep(float dL, float p2Inv) +GPUd() void TrackLTIntegral::addStep(float dL, float q2p2) { - ///< add step in cm to integrals + ///< add step in cm to integrals, q2p2 is (q/p)^2. mL += dL; if (isTimeNotNeeded()) { return; @@ -49,7 +49,7 @@ GPUd() void TrackLTIntegral::addStep(float dL, float p2Inv) const float dTns = dL * 1000.f / o2::constants::physics::LightSpeedCm2NS; // time change in ps for beta = 1 particle for (int id = 0; id < getNTOFs(); id++) { const float m2z = track::PID::getMass2Z(id); - const float betaInv = math_utils::sqrt(1.f + m2z * m2z * p2Inv); + const float betaInv = math_utils::sqrt(1.f + m2z * m2z * q2p2); mT[id] += dTns * betaInv; } } diff --git a/DataFormats/Reconstruction/test/testLTOFIntegration.cxx b/DataFormats/Reconstruction/test/testLTOFIntegration.cxx index bb65c60d08d18..f737b1df53666 100644 --- a/DataFormats/Reconstruction/test/testLTOFIntegration.cxx +++ b/DataFormats/Reconstruction/test/testLTOFIntegration.cxx @@ -33,8 +33,8 @@ BOOST_AUTO_TEST_CASE(TrackLTIntegral) const int nStep = 100; const float dx2x0 = 0.01f; for (int i = 0; i < nStep; i++) { - lt.addStep(1., trc.getP2Inv()); - lt1.addStep(1., trc1.getP2Inv()); + lt.addStep(1., trc.getQ2P2()); + lt1.addStep(1., trc1.getQ2P2()); lt1.addX2X0(dx2x0); } trc.printParam(); diff --git a/Detectors/Base/src/Propagator.cxx b/Detectors/Base/src/Propagator.cxx index c7c7b461034e5..754c0c14e6f60 100644 --- a/Detectors/Base/src/Propagator.cxx +++ b/Detectors/Base/src/Propagator.cxx @@ -189,14 +189,14 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackParCov_t& track, va res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -258,14 +258,14 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackPar_t& track, value res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -324,14 +324,14 @@ GPUd() bool PropagatorImpl::propagateToX(TrackParCov_t& track, value_ty res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -390,14 +390,14 @@ GPUd() bool PropagatorImpl::propagateToX(TrackPar_t& track, value_type res = false; } if (tofInfo) { - tofInfo->addStep(mb.length, track.getP2Inv()); // fill L,ToF info using already calculated step length + tofInfo->addStep(mb.length, track.getQ2P2()); // fill L,ToF info using already calculated step length tofInfo->addX2X0(mb.meanX2X0); tofInfo->addXRho(mb.getXRho(signCorr)); } } else if (tofInfo) { // if tofInfo filling was requested w/o material correction, we need to calculate the step lenght auto xyz1 = track.getXYZGlo(); math_utils::Vector3D stepV(xyz1.X() - xyz0.X(), xyz1.Y() - xyz0.Y(), xyz1.Z() - xyz0.Z()); - tofInfo->addStep(stepV.R(), track.getP2Inv()); + tofInfo->addStep(stepV.R(), track.getQ2P2()); } return res; }; @@ -717,7 +717,7 @@ GPUd() value_T PropagatorImpl::estimateLTFast(o2::track::TrackLTIntegra // since we assume the track or its parent comes from the beam-line or decay, add XY(?) distance to it value_T dcaT = math_utils::detail::sqrt(xdca * xdca + ydca * ydca); length += dcaT; - lt.addStep(length, trc.getP2Inv()); + lt.addStep(length, trc.getQ2P2()); return dcaT; } diff --git a/Detectors/GlobalTracking/src/MatchTPCITS.cxx b/Detectors/GlobalTracking/src/MatchTPCITS.cxx index f689caed87351..c8c9dda6a4025 100644 --- a/Detectors/GlobalTracking/src/MatchTPCITS.cxx +++ b/Detectors/GlobalTracking/src/MatchTPCITS.cxx @@ -1714,7 +1714,7 @@ bool MatchTPCITS::refitTrackTPCITS(int slot, int iTPC, int& iITS, pmr::vectorestimateLTIncrement(tracOut, posStart, posEnd); - tofL.addStep(lInt, tracOut.getP2Inv()); + tofL.addStep(lInt, tracOut.getQ2P2()); tofL.addX2X0(lInt * mTPCmeanX0Inv); propagator->PropagateToXBxByBz(tracOut, o2::constants::geom::XTPCOuterRef, MaxSnp, 10., mUseMatCorrFlag, &tofL); @@ -1804,7 +1804,7 @@ bool MatchTPCITS::refitABTrack(int iITSAB, const TPCABSeed& seed, pmr::vectorestimateLTIncrement(tracOut, posStart, posEnd); - tofL.addStep(lInt, tracOut.getP2Inv()); + tofL.addStep(lInt, tracOut.getQ2P2()); tofL.addX2X0(lInt * mTPCmeanX0Inv); propagator->PropagateToXBxByBz(tracOut, o2::constants::geom::XTPCOuterRef, MaxSnp, 10., mUseMatCorrFlag, &tofL); const auto& trackTune = TrackTuneParams::Instance(); diff --git a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx index 375fa732007cc..b5a1530e83d8c 100644 --- a/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx +++ b/Detectors/TRD/workflow/src/TRDGlobalTrackingSpec.cxx @@ -638,7 +638,7 @@ bool TRDGlobalTracking::refitITSTPCTRDTrack(TrackTRD& trk, float timeTRD, o2::gl } auto posEnd = trk.getXYZGlo(); auto lInt = propagator->estimateLTIncrement(trk, posStart, posEnd); - trk.getLTIntegralOut().addStep(lInt, trk.getP2Inv()); + trk.getLTIntegralOut().addStep(lInt, trk.getQ2P2()); // trk.getLTIntegralOut().addX2X0(lInt * mTPCmeanX0Inv); // do we need to account for the material budget here? probably const auto& trackTune = TrackTuneParams::Instance(); @@ -733,7 +733,7 @@ bool TRDGlobalTracking::refitTPCTRDTrack(TrackTRD& trk, float timeTRD, o2::globa } auto posEnd = trk.getXYZGlo(); auto lInt = propagator->estimateLTIncrement(trk, posStart, posEnd); - trk.getLTIntegralOut().addStep(lInt, trk.getP2Inv()); + trk.getLTIntegralOut().addStep(lInt, trk.getQ2P2()); // trk.getLTIntegralOut().addX2X0(lInt * mTPCmeanX0Inv); // do we need to account for the material budget here? probably? if (!propagator->PropagateToXBxByBz(trk, o2::constants::geom::XTPCInnerRef, o2::base::Propagator::MAX_SIN_PHI, o2::base::Propagator::MAX_STEP, matCorr, &trk.getLTIntegralOut())) { From d56140caadf9045d58f85836bc9a0faed6a1a4c4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 11:16:19 +0200 Subject: [PATCH 0448/1914] GPU CMake: some cleanup / simplification --- GPU/Common/CMakeLists.txt | 6 +--- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 3 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 4 +-- GPU/GPUTracking/Base/opencl/CMakeLists.txt | 4 +-- GPU/GPUTracking/CMakeLists.txt | 33 +++++++++---------- GPU/GPUTracking/Interface/CMakeLists.txt | 2 +- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 + .../Standalone/tools/dumpGPUDefParam.C | 2 +- GPU/GPUTracking/display/CMakeLists.txt | 4 +-- GPU/TPCFastTransformation/CMakeLists.txt | 6 ++-- GPU/Utils/CMakeLists.txt | 3 +- 11 files changed, 32 insertions(+), 36 deletions(-) diff --git a/GPU/Common/CMakeLists.txt b/GPU/Common/CMakeLists.txt index 6951c3b2339b8..b1a4b2107019c 100644 --- a/GPU/Common/CMakeLists.txt +++ b/GPU/Common/CMakeLists.txt @@ -62,10 +62,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") COMPONENT_NAME GPU LABELS gpu) endif() - install(FILES ${HDRS_INSTALL} DESTINATION include/GPU) endif() -if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") - install(FILES ${HDRS_INSTALL} - DESTINATION include) -endif() +install(FILES ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index dbdf6b606df18..c31dd0c8d3fe2 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -120,7 +120,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${CMAKE_SOURCE_DIR}/DataFormats/Reconstruction/src ${CMAKE_CURRENT_SOURCE_DIR} TARGETVARNAME targetName) - install(FILES ${HDRS} DESTINATION include/GPU) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -132,6 +131,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") include_directories(${CMAKE_CURRENT_SOURCE_DIR}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) if (onnxruntime_FOUND) diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 4689fee02d31e..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -169,8 +169,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") ${GPUCA_HIP_SOURCE_DIR} TARGETVARNAME targetName) - install(FILES ${HDRS} DESTINATION include/GPU) - # o2_add_test(GPUsortHIP NAME test_GPUsortHIP # SOURCES test/testGPUsortHIP.hip # PUBLIC_LINK_LIBRARIES O2::GPUCommon hip::host hip::device hip::hipcub roc::rocthrust @@ -187,6 +185,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") include_directories(${GPUCA_HIP_SOURCE_DIR}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) if (onnxruntime_FOUND) diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 7ab70553958ef..99ec36615a1d1 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -96,8 +96,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") # the compile_defitions are not propagated automatically on purpose (they are # declared PRIVATE) so we are not leaking them outside of the GPU** # directories - - install(FILES ${HDRS} DESTINATION include/GPU) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -107,6 +105,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") set(targetName ${MODULE}) endif() +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + if(OPENCL_ENABLED_SPIRV) target_compile_definitions(${targetName} PRIVATE OPENCL_ENABLED_SPIRV) endif() diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 4c1de17025627..ec60d41484f81 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -242,12 +242,13 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h Definitions/GPUDefParametersLoad.template.inc) set(GENERATED_HEADERS_LIST "") -file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) +set(ON_THE_FLY_DIR ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) +file(MAKE_DIRECTORY ${ON_THE_FLY_DIR}) foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST}) get_filename_component(OUTPUT_FILE_NAME ${TEMPLATE_FILE} NAME) string(REPLACE ".template" "" OUTPUT_FILE_NAME ${OUTPUT_FILE_NAME}) - file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) - list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME}) + file(GENERATE OUTPUT ${ON_THE_FLY_DIR}/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE}) + list(APPEND GENERATED_HEADERS_LIST ${ON_THE_FLY_DIR}/${OUTPUT_FILE_NAME}) endforeach() set(GPUDEFPARAMETERSLBLIST "$,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n") string(APPEND GPUDEFPARAMETERSLBLIST "$,PREPEND,PAR_>,\n>\n") @@ -256,14 +257,14 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase CONTENT ${GPUDEFPARAMETERSLBLIST}) add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h - COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h + OUTPUT ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h + COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h COMMENT "Generating GPUDefParametersLoadPrepare.h" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase VERBATIM COMMAND_EXPAND_LISTS ) -list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h) +list(APPEND GENERATED_HEADERS_LIST ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h) set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST}) include(kernels.cmake) @@ -306,7 +307,7 @@ set(INCDIRS ${CMAKE_CURRENT_SOURCE_DIR}/Debug ${CMAKE_CURRENT_SOURCE_DIR}/DataCompression ${CMAKE_CURRENT_SOURCE_DIR}/TPCClusterFinder - ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly) + ${ON_THE_FLY_DIR}) # Main CMake part for O2 if(ALIGPU_BUILD_TYPE STREQUAL "O2") @@ -356,12 +357,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT_O2} ${HDRS_CINT_O2_ADDITIONAL} LINKDEF GPUTrackingLinkDef_O2.h) - install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} - DESTINATION include/GPU) - install(DIRECTORY utils - DESTINATION include/GPU - FILES_MATCHING PATTERN *.h) - o2_add_test_root_macro(Standalone/tools/createGeo.C PUBLIC_LINK_LIBRARIES O2::GPUTracking LABELS its COMPILE_ONLY) @@ -390,13 +385,15 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") else() target_compile_definitions(${targetName} PRIVATE GPUCA_NO_ROOT) endif() - install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} - DESTINATION include) - install(DIRECTORY utils - DESTINATION include - FILES_MATCHING PATTERN *.h) endif() +install(FILES ${HDRS_SRCS} ${HDRS_CINT_O2} ${HDRS_CINT_DATATYPES} ${HDRS_INSTALL} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) +install(DIRECTORY utils + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU + FILES_MATCHING PATTERN *.h) + + # GPUReconstructionLibrary needs to know which GPU backends are enabled for proper error messages configure_file(Base/GPUReconstructionAvailableBackends.template.h ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionAvailableBackends.h) set_source_files_properties(Base/GPUReconstructionLibrary.cxx diff --git a/GPU/GPUTracking/Interface/CMakeLists.txt b/GPU/GPUTracking/Interface/CMakeLists.txt index baec5b941b4b6..e532b24f97908 100644 --- a/GPU/GPUTracking/Interface/CMakeLists.txt +++ b/GPU/GPUTracking/Interface/CMakeLists.txt @@ -46,4 +46,4 @@ target_include_directories(${targetName} target_compile_definitions(${targetName} PRIVATE $) -install(FILES ${HDRS} DESTINATION include/GPU) +install(FILES ${HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 0859223187f00..088269ae73cab 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.27 FATAL_ERROR) project(GPUTrackingStandalone) include(FeatureSummary) +include(GNUInstallDirs) set(CMAKE_INSTALL_MESSAGE LAZY) set(CMAKE_INSTALL_BINDIR "${CMAKE_INSTALL_PREFIX}") diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index 785c049816252..dcb12db6c9118 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -13,7 +13,7 @@ /// \author David Rohr // Run e.g. as: -// ROOT_INCLUDE_PATH="`pwd`/include" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' +// ROOT_INCLUDE_PATH="`pwd`/include/GPU" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' // Logic for testing to load the default parameters /* #define GPUCA_GPUCODE diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index 592ba3b38ff30..d7d08f7f06101 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -133,8 +133,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") target_compile_definitions(${targetName} PRIVATE GPUCA_BUILD_EVENT_DISPLAY_GLFW GPUCA_DISPLAY_GL3W GPUCA_DISPLAY_OPENGL_CORE) - install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION include/GPU) - o2_add_executable(field-uniform-exporter TARGETVARNAME exporterName COMPONENT_NAME gpu @@ -157,6 +155,8 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) endif() +install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) + target_compile_definitions(${targetName} PRIVATE $) message(STATUS "Building GPU Event Display (Vulkan ${GPUCA_EVENT_DISPLAY_VULKAN}, Wayland ${GPUCA_EVENT_DISPLAY_WAYLAND}, Freetype ${GPUCA_EVENT_DISPLAY_FREETYPE}, Fontconfig ${Fontconfig_FOUND}, Qt ${GPUCA_EVENT_DISPLAY_QT})") diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index c7869467d15b5..317169c05f731 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -64,7 +64,6 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") HEADERS ${HDRS_CINT_O2} LINKDEF TPCFastTransformationLinkDef_O2.h) - install(FILES ${HDRS_CINT_O2} DESTINATION include/GPU) file(COPY ${HDRS_CINT_O2} DESTINATION ${CMAKE_BINARY_DIR}/stage/include/GPU) o2_add_test(${MODULE} @@ -115,6 +114,8 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") LABELS gpu tpc) endforeach() + install (FILES macro/TPCFastTransformInit.C + DESTINATION share/macro/) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") @@ -123,8 +124,7 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") target_include_directories(${targetName} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) endif() -install (FILES macro/TPCFastTransformInit.C - DESTINATION share/macro/) +install(FILES ${HDRS_CINT_O2} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) # # FIXME: this one is misplaced : it depends (at least) on TPCSimulation which is diff --git a/GPU/Utils/CMakeLists.txt b/GPU/Utils/CMakeLists.txt index bf23c792c2034..5ea8b59db5cad 100644 --- a/GPU/Utils/CMakeLists.txt +++ b/GPU/Utils/CMakeLists.txt @@ -31,5 +31,6 @@ if(ALIGPU_BUILD_TYPE STREQUAL "O2") HEADERS ${HDRS_CINT} LINKDEF GPUUtilsLinkDef.h) - install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION include/GPU) endif() + +install(FILES ${HDRS_CINT} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) From 68c73f4ca4c851d3542be7acbd217f3fd68a4127 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 11:17:39 +0200 Subject: [PATCH 0449/1914] GPU CMake: Create const param files for available default architectures to be loaded by RTC --- GPU/GPUTracking/CMakeLists.txt | 24 +++++++++++++++++ .../Definitions/GPUDefParametersDefaults.h | 11 ++++---- GPU/GPUTracking/Standalone/CMakeLists.txt | 1 + .../Standalone/tools/dumpGPUDefParam.C | 27 +++++++++---------- GPU/GPUTracking/display/CMakeLists.txt | 4 +-- GPU/TPCFastTransformation/CMakeLists.txt | 4 +-- 6 files changed, 47 insertions(+), 24 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index ec60d41484f81..b2852389398d0 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -407,6 +407,30 @@ target_sources(${targetName} FILES ${GENERATED_HEADERS_LIST} BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}) +make_directory(${CMAKE_CURRENT_BINARY_DIR}/genGPUArch) +set(GPU_CONST_PARAM_ARCHITECTUES AMPERE TURING VEGA MI2xx) +set(GPU_CONST_PARAM_FILES "") +foreach(GPU_ARCH ${GPU_CONST_PARAM_ARCHITECTUES}) + set(PARAMFILE ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch/gpu_const_param_${GPU_ARCH}.par) + add_custom_command( + OUTPUT ${PARAMFILE} + COMMAND bash -c + "echo -e '#define GPUCA_GPUTYPE_${GPU_ARCH}\\n#define PARAMETER_FILE \"GPUDefParametersDefaults.h\"\\ngInterpreter->AddIncludePath(\"${CMAKE_CURRENT_SOURCE_DIR}/Definitions\");\\ngInterpreter->AddIncludePath(\"${ON_THE_FLY_DIR}\");\\n.x ${CMAKE_CURRENT_SOURCE_DIR}/Standalone/tools/dumpGPUDefParam.C(\"${PARAMFILE}\")\\n.q\\n'" + | root -l -b > /dev/null + VERBATIM + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/genGPUArch + MAIN_DEPENDENCY Standalone/tools/dumpGPUDefParam.C + DEPENDS Definitions/GPUDefParametersDefaults.h + ${ON_THE_FLY_DIR}/GPUDefParametersLoadPrepare.h + ${ON_THE_FLY_DIR}/GPUDefParametersLoad.inc + COMMENT "Generating GPU parameter set for architecture ${GPU_ARCH}") + LIST(APPEND GPU_CONST_PARAM_FILES ${PARAMFILE}) +endforeach() + +add_custom_target(${MODULE}_GPU_CONST_PARAM_ARCHS ALL DEPENDS ${GPU_CONST_PARAM_FILES}) +install(FILES ${GPU_CONST_PARAM_FILES} DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/arch_param) + + # Add compile definitions and libraries depending on available optional dependencies if(GPUCA_QA) message(STATUS "Building GPU QA") diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index a56fb97771fe5..cdc5efd56ddfd 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -22,6 +22,7 @@ // GPU Run Configuration #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. + #define GPUCA_LB_SCAN 512 // GPU-architecture-dependent default settings #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 @@ -498,11 +499,11 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_THREAD_COUNT_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_THREAD_COUNT_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_LB_SCAN #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index 088269ae73cab..c112be6abac11 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -242,3 +242,4 @@ install(TARGETS ca TPCFastTransformation standalone_support) install(FILES "cmake/makefile" DESTINATION "${CMAKE_INSTALL_PREFIX}") install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${O2_DIR} ${CMAKE_INSTALL_PREFIX}/src)") install(CODE "execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_BINARY_DIR}/config.cmake ${CMAKE_INSTALL_PREFIX}/config.cmake)") +install(DIRECTORY tools DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU) diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index dcb12db6c9118..f82c537956ead 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -12,27 +12,24 @@ /// \file dumpGPUDefParam.C /// \author David Rohr -// Run e.g. as: -// ROOT_INCLUDE_PATH="`pwd`/include/GPU" root -l -q -b src/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C'()' +// Run e.g. as (replacing [FILE] and [OUTPUT]: +// echo -e '#define PARAMETER_FILE "[FILE]]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b +// To dump the defaults for AMPERE architecture, run +// echo -e '#define GPUCA_GPUTYPE_AMPERE\n#define PARAMETER_FILE "GPUDefParametersDefaults.h"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("default_AMPERE.par")\n.q\n' | root -l -b -// Logic for testing to load the default parameters -/* #define GPUCA_GPUCODE -#define GPUCA_GPUTYPE_AMPERE -#define GPUCA_MAXN 40 -#define GPUCA_ROW_COUNT 152 -#define GPUCA_TPC_COMP_CHUNK_SIZE 1024 -#include "GPUDefParametersConstants.h" -#include "GPUDefParametersDefaults.h" */ +#ifndef PARAMETER_FILE +#error Must provide the PARAMETER_FILE as preprocessor define, e.g. -DHEADER_TO_INCLUDE='"GPUDefParametersDefaults.h"' +#endif -// Alternatively, logic to load file that sets GPUDefParameters -#include "testParam.h" +#define GPUCA_GPUCODE +#include PARAMETER_FILE #include "GPUDefParametersLoad.inc" -void dumpGPUDefParam() +void dumpGPUDefParam(const char* outputfile = "parameters.out") { auto param = o2::gpu::internal::GPUDefParametersLoad(); - printf("Loaded params:\n%s", o2::gpu::internal::GPUDefParametersExport(param, false).c_str()); - FILE* fp = fopen("parameters.out", "w+b"); + printf("Loaded params:\n%s\nWriting them to %s\n", o2::gpu::internal::GPUDefParametersExport(param, false).c_str(), outputfile); + FILE* fp = fopen(outputfile, "w+b"); fwrite(¶m, 1, sizeof(param), fp); fclose(fp); } diff --git a/GPU/GPUTracking/display/CMakeLists.txt b/GPU/GPUTracking/display/CMakeLists.txt index d7d08f7f06101..a59d5189d6235 100644 --- a/GPU/GPUTracking/display/CMakeLists.txt +++ b/GPU/GPUTracking/display/CMakeLists.txt @@ -149,10 +149,10 @@ if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") target_link_libraries(${targetName} PUBLIC O2::GPUTracking) install(TARGETS ${MODULE}) - install(DIRECTORY filterMacros/ DESTINATION displayTrackFilter FILES_MATCHING PATTERN "*.C") + install(DIRECTORY filterMacros/ DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/displayTrackFilter FILES_MATCHING PATTERN "*.C") get_property(GPU_DISPLAY_INCLUDE_PATH DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES) configure_file(filterMacros/setinclude.sh.in setinclude.sh @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION displayTrackFilter) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/setinclude.sh PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/GPU/displayTrackFilter) endif() install(FILES ${HDRS} ${HDRS_INSTALL} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/GPU) diff --git a/GPU/TPCFastTransformation/CMakeLists.txt b/GPU/TPCFastTransformation/CMakeLists.txt index 317169c05f731..182a66fb28296 100644 --- a/GPU/TPCFastTransformation/CMakeLists.txt +++ b/GPU/TPCFastTransformation/CMakeLists.txt @@ -114,8 +114,8 @@ if(${ALIGPU_BUILD_TYPE} STREQUAL "O2") LABELS gpu tpc) endforeach() - install (FILES macro/TPCFastTransformInit.C - DESTINATION share/macro/) + install(FILES macro/TPCFastTransformInit.C + DESTINATION share/macro/) endif() if(ALIGPU_BUILD_TYPE STREQUAL "Standalone") From 3684fcc3d08b87fd2a0ff2d9077586a15d191e77 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 13:59:44 +0200 Subject: [PATCH 0450/1914] GPU: Remove obsolete .gitignore files --- GPU/GPUTracking/.gitignore | 1 - GPU/GPUTracking/Standalone/.gitignore | 5 ----- GPU/GPUTracking/Standalone/tools/.gitignore | 5 ----- GPU/GPUTracking/utils/.gitignore | 17 ----------------- 4 files changed, 28 deletions(-) delete mode 100644 GPU/GPUTracking/.gitignore delete mode 100644 GPU/GPUTracking/Standalone/.gitignore delete mode 100644 GPU/GPUTracking/Standalone/tools/.gitignore delete mode 100644 GPU/GPUTracking/utils/.gitignore diff --git a/GPU/GPUTracking/.gitignore b/GPU/GPUTracking/.gitignore deleted file mode 100644 index 7669bc79c7059..0000000000000 --- a/GPU/GPUTracking/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/utils/.svn diff --git a/GPU/GPUTracking/Standalone/.gitignore b/GPU/GPUTracking/Standalone/.gitignore deleted file mode 100644 index d0d3c34e96452..0000000000000 --- a/GPU/GPUTracking/Standalone/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -/config_options.mak -/release -/ca -/ca.exe -/libGPUTracking*.so diff --git a/GPU/GPUTracking/Standalone/tools/.gitignore b/GPU/GPUTracking/Standalone/tools/.gitignore deleted file mode 100644 index 6ddce7f156a23..0000000000000 --- a/GPU/GPUTracking/Standalone/tools/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -*.o -*.bc -*.cl -*.spv -*.spirv diff --git a/GPU/GPUTracking/utils/.gitignore b/GPU/GPUTracking/utils/.gitignore deleted file mode 100644 index ff145e23751f6..0000000000000 --- a/GPU/GPUTracking/utils/.gitignore +++ /dev/null @@ -1,17 +0,0 @@ -/get_private_profile.h -/os_low_level_helper.h -/affinity.cxx -/affinity.h -/qmath.h -/qmultialloc.* -/qmalloc.* -/sched_affinity_win32_wrapper.h -/switchtemplate.h -/util_adl.cxx -/util_adl.h -/vecpodtest.cxx -/*.cpp -/*.sh -/.svn -/as -/callvc.bat From 32319712858a1882e8826a7e62129ac619a548f9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 22:45:14 +0200 Subject: [PATCH 0451/1914] Update / add documentation for FST --- .../documentation/dpl-workflow-options.md | 55 ++++++++ .../documentation/env-variables.md | 51 +++++++ .../full-system-test-as-stress-test.md | 33 +++++ .../documentation/full-system-test-setup.md | 124 ++++++++++++++++++ .../full-system-test.md} | 6 +- .../documentation/raw-data-simulation.md | 43 ++++++ 6 files changed, 309 insertions(+), 3 deletions(-) create mode 100644 prodtests/full-system-test/documentation/dpl-workflow-options.md create mode 100644 prodtests/full-system-test/documentation/env-variables.md create mode 100644 prodtests/full-system-test/documentation/full-system-test-as-stress-test.md create mode 100644 prodtests/full-system-test/documentation/full-system-test-setup.md rename prodtests/full-system-test/{README.md => documentation/full-system-test.md} (95%) create mode 100644 prodtests/full-system-test/documentation/raw-data-simulation.md diff --git a/prodtests/full-system-test/documentation/dpl-workflow-options.md b/prodtests/full-system-test/documentation/dpl-workflow-options.md new file mode 100644 index 0000000000000..f79e481ce0723 --- /dev/null +++ b/prodtests/full-system-test/documentation/dpl-workflow-options.md @@ -0,0 +1,55 @@ +# Configuration options +You can use the following options to change the workflow behavior: +- `DDMODE` (default `processing`) : Must be `processing` (synchronous processing) or `processing-disk` (synchronous processing + storing of raw time frames to disk, note that this is the raw time frame not the CTF!). The `DDMODE` `discard` and `disk` are not compatible with the synchronous processing workflow, you must use the `no-processing.desc` workflow instead!. +- `WORKFLOW_DETECTORS` (default `ALL`) : Comma-separated list of detectors for which the processing is enabled. If these are less detectors than participating in the run, data of the other detectors is ignored. If these are more detectors than participating in the run, the processes for the additional detectors will be started but will not do anything. +- `WORKFLOW_DETECTORS_QC` (default `ALL`) : Comma-separated list of detectors for which to run QC, can be a subset of `WORKFLOW_DETECTORS` (for standalone detectors QC) and `WORKFLOW_DETECTORS_MATCHING` (for matching/vertexing QC). If a detector (matching/vertexing step) is not listed in `WORKFLOW_DETECTORS` (`WORKFLOW_DETECTORS_MATCHING`), the QC is automatically disabled for that detector. Only active if the `WORKFLOW_PARAMETER=QC` is set. +- `WORKFLOW_DETECTORS_CALIB` (default `ALL`) : Comma-separated list of detectors for which to run calibration, can be a subset of `WORKFLOW_DETECTORS`. If a detector is not listed in `WORKFLOW_DETECTORS`, the calibration is automatically disabled for that detector. Only active if the `WORKFLOW_PARAMETER=CALIB` is set. +- `WORKFLOW_DETECTORS_FLP_PROCESSING` (default `TOF` for sync processing on EPN, `NONE` otherwise) : Signals that these detectors have processing on the FLP enabled. The corresponding steps are thus inactive in the EPN epl-workflow, and the raw-proxy is configured to receive the FLP-processed data instead of the raw data in that case. +- `WORKFLOW_DETECTORS_RECO` (default `ALL`) : Comma-separated list of detectors for which to run reconstruction. +- `WORKFLOW_DETECTORS_CTF` (default `ALL`) : Comma-separated list of detectors to include in CTF. +- `WORKFLOW_DETECTORS_MATCHING` (default selected corresponding to default workflow for sync or async mode respectively) : Comma-separated list of matching / vertexing algorithms to run. Use `ALL` to enable all of them. Currently supported options (see LIST_OF_GLORECO in common/setenv.h): `ITSTPC`, `TPCTRD`, `ITSTPCTRD`, `TPCTOF`, `ITSTPCTOF`, `MFTMCH`, `PRIMVTX`, `SECVTX`. +- `WORKFLOW_EXTRA_PROCESSING_STEPS` Enable additional processing steps not in the preset for the SYNC / ASYNC mode. Possible values are: `MID_RECO` `MCH_RECO` `MFT_RECO` `FDD_RECO` `FV0_RECO` `ZDC_RECO` `ENTROPY_ENCODER` `MATCH_ITSTPC` `MATCH_TPCTRD` `MATCH_ITSTPCTRD` `MATCH_TPCTOF` `MATCH_ITSTPCTOF` `MATCH_MFTMCH` `MATCH_MFTMCH` `MATCH_PRIMVTX` `MATCH_SECVTX`. (Here `_RECO` means full async reconstruction, and can be used to enable it also in sync mode.) +- `WORKFLOW_PARAMETERS` (default `NONE`) : Comma-separated list, enables additional features of the workflow. Currently the following features are available: + - `GPU` : Performs the TPC processing on the GPU, otherwise everything is processed on the CPU. + - `CTF` : Write the CTF to disk (CTF creation is always enabled, but if this parameter is missing, it is not stored). + - `EVENT_DISPLAY` : Enable JSON export for event display. + - `QC` : Enable QC. + - `CALIB` : Enable calibration (not yet working!) +- `RECO_NUM_NODES_OVERRIDE` (default `0`) : Overrides the number of EPN nodes used for the reconstruction (`0` or empty means default). +- `MULTIPLICITY_FACTOR_RAWDECODERS` (default `1`) : Scales the number of parallel processes used for raw decoding by this factor. +- `MULTIPLICITY_FACTOR_CTFENCODERS` (default `1`) : Scales the number of parallel processes used for CTF encoding by this factor. +- `MULTIPLICITY_FACTOR_REST` (default `1`) : Scales the number of other reconstruction processes by this factor. +- `QC_JSON_EXTRA` (default `NONE`) : extra QC jsons to add (if does not fit to those defined in WORKFLOW_DETECTORS_QC & (WORKFLOW_DETECTORS | WORKFLOW_DETECTORS_MATCHING) +Most of these settings are configurable in the AliECS GUI. But some of the uncommon settings (`WORKFLOW_DETECTORS_FLP_PROCESSING`, `WORKFLOW_DETECTORS_CTF`, `WORKFLOW_DETECTORS_RECO`, `WORKFLOW_DETECTORS_MATCHING`, `WORKFLOW_EXTRA_PROCESSING_STEPS`, advanced `MULTIPLICITY_FACTOR` settings) can only be set via the "Additional environment variables field" in the GUI using bash syntax, e.g. `WORKFLOW_DETECTORS_FLP_PROCESSING=TPC`. + +# Process multiplicity factors +- The production workflow has internally a default value how many instances of a process to run in parallel (which was tuned for Pb-Pb processing) +- Some critical processes for synchronous pp processing are automatically scaled by the inverse of the number of nodes, i.e. the multiplicity is increased by a factor of 2 if 125 instead of 250 nodes are used, to enable the processing using only a subset of the nodes. +- Factors can be provided externally to scale the multiplicity of processes further. All these factors are multiplied. + - One factor can be provided based on the type of the processes: raw decoder (`MULTIPLICITY_FACTOR_RAWDECODERS`), CTF encoder (`MULTIPLICITY_FACTOR_CTFENCODERS`), or other reconstruction process (`MULTIPLICITY_FACTOR_REST`) + - One factor can be provided per detector via `MULTIPLICITY_FACTOR_DETECTOR_[DET]` using the 3 character detector representation, or `MATCH` for the global matching and vertexing workflows. + - One factor can be provided per process via `MULTIPLICITY_FACTOR_PROCESS_[PROCESS_NAME]`. In the process name, dashes `-` must be replaced by underscores `_`. +- The multiplicity of an individual process can be overridden externally (this is an override, no scaling factor) by using `MULTIPLICITY_PROCESS_[PROCESS_NAME]`. In the process name, dashes `-` must be replaced by underscores `_`. +- For example, creating the workflow with `MULTIPLICITY_FACTOR_RAWDECODERS=2 MULTIPLICITY_FACTOR_DETECTOR_ITS=3 MULTIPLICITY_FACTOR_PROCESS_mft_stf_decoder=5` will scale the number of ITS raw decoders by 6, of other ITS processes by 3, of other raw decoders by 2, and will run exactly 5 `mft-stf-decoder` processes. + +# Additional custom control variables +For user modification of the workflow settings, the folloing *EXTRA* environment variables exist: +- `ARGS_ALL_EXTRA` : Extra command line options added to all workflows +- `ALL_EXTRA_CONFIG` : Extra config key values added to all workflows +- `GPU_EXTRA_CONFIG` : Extra options added to the configKeyValues of the GPU workflow +- `ARGS_EXTRA_PROCESS_[WORKFLOW_NAME]` : Extra command line arguments for the workflow binary `WORKFLOW_NAME`. Dashes `-` must be replaced by underscores `_` in the name! E.g. `ARGS_EXTRA_PROCESS_o2_tof_reco_workflow='--output-type clusters'` +- `CONFIG_EXTRA_PROCESS_[WORKFLOW_NAME]` : Extra `--configKeyValues` arguments for the workflow binary `WORKFLOW_NAME`. Dashes `-` must be replaced by underscores `_` in the name! E.g. `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow='GPU_proc.debugLevel=1;GPU_proc.ompKernels=0;'` + +**IMPORTANT:** When providing additional environment variables please always use single quotes `'` instead of double quotes `"`, because otherwise there can be issues with whitespaces. E.g. `ARGS_EXTRA_PROCESS_o2_eve_display='--filter-time-min 0 --filter-time-max 120'` does work while `ARGS_EXTRA_PROCESS_o2_eve_display="--filter-time-min 0 --filter-time-max 120"` does not. + +In case the CTF dictionaries were created from the data drastically different from the one being compressed, the default memory allocation for the CTF buffer might be insufficient. One can apply scaling factor to the buffer size estimate (default=1.5) of particular detector by defining variable e.g. `TPC_ENC_MEMFACT=3.5` + +# File input for ctf-reader / raw-tf-reader +- The variable `$INPUT_FILE_LIST` can be a comma-seperated list of files, or a file with a file-list of CTFs/raw TFs. +- The variable `$INPUT_FILE_COPY_CMD` can provide a custom copy command (default is to fetch the files from EOS). + +# Remarks on QC +The JSON files for the individual detectors are merged into one JSON file, which is cached during the run on the shared EPN home folder. +The default JSON file per detector is defined in `qc-workflow.sh`. +JSONs per detector can be overridden by exporting `QC_JSON_[DETECTOR_NAME]`, e.g. `QC_JSON_TPC`, when creating the workflow. +The global section of the merged qc JSON config is taken from qc-sync/qc-global.json diff --git a/prodtests/full-system-test/documentation/env-variables.md b/prodtests/full-system-test/documentation/env-variables.md new file mode 100644 index 0000000000000..b93622c0a0f94 --- /dev/null +++ b/prodtests/full-system-test/documentation/env-variables.md @@ -0,0 +1,51 @@ +The `setenv-sh` script sets the following environment options +* `NTIMEFRAMES`: Number of time frames to process. +* `TFDELAY`: Delay in seconds between publishing time frames (1 / rate). +* `NGPUS`: Number of GPUs to use, data distributed round-robin. +* `GPUTYPE`: GPU Tracking backend to use, can be CPU / CUDA / HIP / OCL / OCL2. +* `SHMSIZE`: Size of the global shared memory segment. +* `DDSHMSIZE`: Size of shared memory unmanaged region for DataDistribution Input. +* `GPUMEMSIZE`: Size of allocated GPU memory (if GPUTYPE != CPU) +* `HOSTMEMSIZE`: Size of allocated host memory for GPU reconstruction (0 = default). + * For `GPUTYPE = CPU`: TPC Tracking scratch memory size. (Default 0 -> dynamic allocation.) + * Otherwise : Size of page-locked host memory for GPU processing. (Defauls 0 -> 1 GB.) +* `CREATECTFDICT`: Create CTF dictionary. +* `SAVECTF`: Save the CTF to a root file. + * 0: Read `ctf_dictionary.root` as input. + * 1: Create `ctf_dictionary.root`. Note that this was already done automatically if the raw data was simulated with `full_system_test.sh`. +* `SYNCMODE`: Run only reconstruction steps of the synchronous reconstruction. + * Note that there is no `ASYNCMODE` but instead the `CTFINPUT` option already enforces asynchronous processing. +* `NUMAGPUIDS`: NUMAID-aware GPU id selection. Needed for the full EPN configuration with 8 GPUs, 2 NUMA domains, 4 GPUs per domain. + In this configuration, 2 instances of `dpl-workflow.sh` must run in parallel. + To be used in combination with `NUMAID` to select the id per workflow. + `start_tmux.sh` will set up these variables automatically. +* `NUMAID`: SHM segment id to use for shipping data as well as set of GPUs to use (use `0` / `1` for 2 NUMA domains, 0 = GPUS `0` to `NGPUS - 1`, 1 = GPUS `NGPUS` to `2 * NGPUS - 1`) +* 0: Runs all reconstruction steps, of sync and of async reconstruction, using raw data input. +* 1: Runs only the steps of synchronous reconstruction, using raw data input. +* `EXTINPUT`: Receive input from raw FMQ channel instead of running o2-raw-file-reader. + * 0: `dpl-workflow.sh` can run as standalone benchmark, and will read the input itself. + * 1: To be used in combination with either `datadistribution.sh` or `raw-reader.sh` or with another DataDistribution instance. +* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to EXTINPUT=1. The CTF ROOT file can be stored via SAVECTF=1. +* `NHBPERTF`: Time frame length (in HBF) +* `GLOBALDPLOPT`: Global DPL workflow options appended to o2-dpl-run. +* `EPNPIPELINES`: Set default EPN pipeline multiplicities. + Normally the workflow will start 1 dpl device per processor. + For some of the CPU parts, this is insufficient to keep step with the GPU processing rate, e.g. one ITS-TPC matcher on the CPU is slower than the TPC tracking on multiple GPUs. + This option adds some multiplicies for CPU processes using DPL's pipeline feature. + The settings were tuned for EPN processing with 4 GPUs (i.e. the default multiplicities are per NUMA domain). + The multiplicities are scaled with the `NGPUS` setting, i.e. with 1 GPU only 1/4th are applied. + You can pass an option different to 1, and than it will be applied as factor on top of the multiplicities. + It is auto-selected by `start-tmux.sh`. +* `SEVERITY`: Log verbosity (e.g. info or error, default: info) +* `INFOLOGGER_SEVERITY`: Min severity for messages sent to Infologger. (default: `$SEVERITY`) +* `SHMTHROW`: Throw exception when running out of SHM memory. + It is suggested to leave this enabled (default) on tests on the laptop to get an actual error when it runs out of memory. + This is disabled in `start_tmux.sh`, to avoid breaking the processing while there is a chance that another process might free memory and we can continue. +* `NORATELOG`: Disable FairMQ Rate Logging. +* `INRAWCHANNAME`: FairMQ channel name used by the raw proxy, must match the name used by DataDistribution. +* `WORKFLOWMODE`: run (run the workflow (default)), print (print the command to stdout), dds (create partial DDS topology) +* `FILEWORKDIR`: directory for all input / output files. E.g. grp / geometry / dictionaries etc. are read from here, and dictionaries / ctf / etc. are written to there. + Some files have more fine grained control via other environment variables (e.g. to store the CTF to somewhere else). Such variables are initialized to `$FILEWORKDIR` by default but can be overridden. +* `EPNSYNCMODE`: Specify that this is a workflow running on the EPN for synchronous processing, e.g. logging goes to InfoLogger, DPL metrics to to the AliECS monitoring, etc. +* `BEAMTYPE`: Beam type, must be PbPb, pp, pPb, cosmic, technical. +* `IS_SIMULATED_DATA` : 1 for MC data, 0 for RAW data. diff --git a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md new file mode 100644 index 0000000000000..0c4637ece0920 --- /dev/null +++ b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md @@ -0,0 +1,33 @@ +This is a quick summary how to run the full system test (FST) as stress test on the EPN. (For the full FST documentation, see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md and https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md) + +# Preparing the data set +- I usually try to keep an up-to-date data set that can be used in `/home/drohr/alitest/tmp-fst*`. The folder with the highest number is the latest dataset. However, data formats are still evolving, and it requires rerunning the simulation regularly. I.e. please try my latest data set, if it doesn't work, please generate a new one as described below. +- Short overview how to generate a FST Pb-Pb 128 orbit data set: + - The O2 binaries installed on the EPN via RPMs use the `o2-dataflow` defaults and cannot run the simulation, and also they lack readout. Thus you need to build `O2PDPSuite` and `Readout` (the version matching the O2PDPSuite RPM you want to use for running the test) yourself with `alibuild` on an EPN: `aliBuild --defaults o2 build O2PDPSuite Readout --jobs 32 --debug`. The flag `--jobs` configures the number of parallel jobs and can be changed. + - Enter the O2PDPSuite environment either vie `alienv enter O2PDPSuite/latest Readout/latest`. + - Go to an empty directory. + - Run the FST simulation via: `NEvents=650 NEventsQED=10000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=40000000000 SPLITTRDDIGI=0 GENERATE_ITSMFT_DICTIONARIES=1 $O2_ROOT/prodtests/full_system_test.sh` + - Get a current matbud.root (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) and place it in that folder. + - Create a timeframe file from the raw files: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + - Prepare the ramdisk folder: `mv raw/timeframe raw/timeframe-org; mkdir raw/timeframe-tmpfs; ln -s timeframe-tmpfs raw/timeframe` + +# Running the full system test +- Enter the environment! On an EPN do `module load O2PDPSuite` (this will load the latest O2 software installed on that EPN). +- Go into the folder with the data set (you might need to create one, see above). +- Prepare the ramdisk with the data: `sudo mount -t tmpfs tmpfs raw/timeframe-tmpfs; sudo cp raw/timeframe-org/* raw/timeframe` + - (NOTE that the ramdisk might already be present from previous tests, or in a different folder. Check the mounted tmpfs filesystems (`mount | grep tmpfs`), and don't mount multiple of them since memory is critical!) + - If you do not have root permissions and cannot create a ramdisk, the test will also work without. In that case you should decrease the publishing rate below to `TFDELAY=5`. +- Make sure disk caches are cleared: as ROOT do: `echo 1 > /proc/sys/vm/drop_caches` +- In order to run the Full System Test, the workflow must be able to access the CCDB. Normally, if you run as user, you must make sure to have an alien token present. On the EPN, one can use the EPN-internal CCDB server instead, which does not require alien access. If you use the `start-tmux.sh`, the env variables are set automatically to access the EPN-internal CCDB server. +- Start the FST with 2 NUMA domains: `TFDELAY=2.5 NTIMEFRAMES=1000000 $O2_ROOT/prodtests/full-system-test/start_tmux.sh dd` + +This will start a tmux session with 3 shells, the upper 2 shells are the 2 DPL workflows, one per NUMA domain, for the processing. The lower shell is the input with DataDistribution's StfBuilder. Leave it running and check that the StfBuilder doesn't complain that its buffer is full. Then the EPN can sustain the rate. + +# **NOTE** +- Attached to this ticket is a screenshot of how the console should look like: + - The DD console (on the bottom) should not show warnings about full buffers. + - The other 2 consoles (1 per NUMA domain) should show the processing times per TF for the GPU reconstruction: + ``` + [2974450:gpu-reconstruction_t3]: [10:50:38][INFO] GPU Reoncstruction time for this TF 26.77 s (cpu), 17.8823 s (wall) + ``` + This should be 17 to 18 seconds, and you should see it for all 4 GPUs on both NUMA domains (`reconstruction_t0` to `reconstruction_t3`) diff --git a/prodtests/full-system-test/documentation/full-system-test-setup.md b/prodtests/full-system-test/documentation/full-system-test-setup.md new file mode 100644 index 0000000000000..82ef9b7d0c74f --- /dev/null +++ b/prodtests/full-system-test/documentation/full-system-test-setup.md @@ -0,0 +1,124 @@ +This is some documentation for the full system test setup. + +If you just want to test a small dataset, you can skip the following steps, and jusddt skip to the end, where you will find a download with a prepared data set! + +# Requirements: +- The FST needs a lot of memory. Please check the comments below, make sure your system has enough memory, and change the memory sizes in the command lines accordingly. +- ulimits: The FST needs large ulimits for memory and virtual memory (`ulimit -m` / `ulimit -v`). This is usually no problem since they are usually unlimited. If GPUs are used, the FST also needs `ulimit -l` (for locked memory) unlimited, which is usualy not the system default. Finally, if data is replayed from raw files (not with DataDistribution), the FST will open many files, and `ulimit -n` should be at least 4096. Note that in most distributions the hard ulimits are configured in `/etc/security/limits.conf`. +- The FST needs to access the CCDB. For this, you should run the FST with an alien token. Alternatively, if you are on the EPN you can use the EPN-internal CCDB server by exporting `ALL_EXTRA_CONFIG="NameConf.mCCDBServer=http://o2-ccdb.internal;"` and by setting the DPL CCDB backend on the command line. If you are using `start-tmux.sh` for the 8 GPU FST, the CCDB backends are set automatically. + +# Creating the raw data and run the FST: +1. First some remarks on the number of events and the memory size: + - Generation (simulation) of the full time frame with ~550 collisions will need ~256 GB, processing will take less. + - Due to the sampling of the bunch crossings, the exact number of collissions that will be in the TF is not clear, thus one should simulate 600 collisions to generate a full 128 orbit TF. + - The default shared memory size is 2 GB, and must be increased significantly for large time frames, 128 GB is sufficient for 128 orbit TF, 160 GB is needed if MC labels are present in addition. + - The GPU memory allocation should be set to ~13 GB for 70 orbits and 21 GB for 128 orbits. + - I'd suggest to do a first small test with 1-5 events to check the machinery, 100 events is already a good size which should not exhaust the memory, I'd go to 600 only after 100 works. +1. Compile O2 with GPU support, in addition you need O2sim, DataDistribution, and Readout (latest versions from alidist will do). + GPUs for O2 should be auto-detected, but you can set the environment variables ALIBUILD_ENABLE_CUDA / ALIBUILD_ENABLE_HIP to enforce it (and get a failure when detection fails). Look for CMake log messages "Building GPUTracking with CUDA support" (etc) to verify. + For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build.md +1. Optionally place some binary configuration files in the simulation folder. Default objects will be used if no such files are placed. There are instructions at the end of this post how to generate these files. (Currently, these files are: matbud.root, ITSdictionary.bin, ctf_dictionary.root, tpctransform.root, dedxsplines.root, and tpcpadgaincalib.root) +1. Load the O2sim environment (`alienv enter O2sim/latest`) and run the following full system test script for a full simulation and digits to raw conversion (this will already include 1 CPU reconstruction run): + ``` + NEvents=600 NEventsQED=35000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=30000000000 $O2_ROOT/prodtests/full_system_test.sh + ``` + - This create a full 128 orbit TF with 550 collisions and uses 35000 interactions for the QED background + - It uses 128 GB of shared memory + - The scratch memory size for the TPC reconstruction is set to 24 GB (Note, this is the CPU-equivalent of the GPU memory size, since this phase will only run on the CPU). +1. Test of the workflow using the raw-file-reader: Run the so far largest workflow, The GPU and SHM memory sizes must be reasonably large (see above). + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=100 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` + Note that This uses 128 GB of SHM, runs only on the CPU, and processes the time frame 10 times in a loop with 100 s delay between the publiushing. + - For a documentation of the options, see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md + - For running on the GPU (4 GPUs with the HIP backend), please do + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=10 GPUTYPE=HIP NGPUS=4 GPUMEMSIZE=22000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` +This will use 4 GPU with the HIP backend and allocate 22 GB of scratch memory on the GPU (should be sufficient for 128 orbit TF). You can change the GPU type as indicated in the linked README.md above, e.g. `GPUTYPE=CUDA NGPUS=1` for 1 CUDA GPU. +1. With this, the full chain is running inside O2 DPL. Next we are adding DataDistribution. + 1. Ceate the TF files as explained in the subtask (https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-data-simulation.md). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + 1. Enter the O2 environment, and run the following script (please adjust the variables as in the test before). + ``` + EXTINPUT=1 SHMSIZE=128000000000 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` + - As a first optional test without DataDistribution, we can take the RawReader to feed the data in the way DataDistribution does. Run the following script in a second shell within the O2 environment. (Please adjust the variables as noted above) + ``` + SHMSIZE=128000000000 NTIMEFRAMES=10 TFDELAY=100 $O2_ROOT/prodtests/full-system-test/raw-reader.sh + ``` + 1. In a second shell with DataDistribution, run the following script (adjust the 2 variables for memory size as needed for your data, and set the TF_DIR variable to the folder where you recorded the time frame). Make sure you start this script ONLY AFTER the DPL workflow has fully started! There is no number of timeframes, it will run in an endless loop + ``` + SHMSIZE=128000000000 DDSHMSIZE=32000 TFDELAY=100 $O2_ROOT/prodtests/full-system-test/datadistribution.sh + ``` +1. The full chain that will be running on the EPN farm is a bit more complicated. It consists of: + - 2 instances of the dpl-workflow driving 4 GPUs each, one per NUMA domain. + - 1 instance of data distribution feeding a shared input buffer. + The following script runs the full system test in the 8 GPU EPN configuration using tmux with 3 sessions:{code}TFDELAY=2.8457 NTIMEFRAMES=128 $O2_ROOT/prodtests/full-system-test/start-tmux.sh dd{code} + - Note that number of GPUs / memory sizes are automatically set by start-tmux.sh. + - This TFDELAY is the rate for processing 1/250th of 50 kHz Pb-Pb with average time frames. Since the occupancy of your simulated timeframe will fluctuate, it is suggested to scale the TFDELAY linearly with the number of tpc clusters (shown in the console output of the dpl-workflow), with the average corresponding to 2.8457 s being 313028012 clusters. + - You can for testing alternatively use the rawreader instead of datadistribution as input in the start_tmux.sh script by passing rr instead of dd. +1. On the EPN, an SHM management tool owns the memory in the background and keeps it locked. This is done in order to speed up the startup. This behavior can be reproduced in the full system test, by setting the env variable `SHM_MANAGER_SHMID` to the shm id to be used (must be set for both `start_tmux.sh` and `shm-tool.sh`) you can juse use `SHM_MANAGER_SHMID=1` for a test) and running in a separate shell before starting `start_tmux.sh` + ``` + SHM_MANAGER_SHMID=1 SHMSIZE=$((128<<30)) DDSHMSIZE=$((128<<10)) $O2_ROOT/prodtests/full-system-test/shm-tool.sh + SHM_MANAGER_SHMID=1 TFDELAY=2.8457 NTIMEFRAMES=8 $O2_ROOT/prodtests/full-system-test/start-tmux.sh dd + ``` + +--- + +# Remarks for running with distortions: +1. To run the digitization with distortions, add the following to the digitizer command (using map inputSCDensity3D_8000_0 from file../InputSCDensityHistograms_8000events.root): + ``` + --distortionType 2 --initialSpaceChargeDensity=../InputSCDensityHistograms_8000events.root,inputSCDensity3D_8000_0 + ``` +1. To rerun the digitization with the same BC sampling for the collisions add + ``` + --incontext collisioncontext.root + ``` +1. To create the tpc fast transform map from the SCD object run: + ``` + root -l -q -b ~/alice/O2/Detectors/TPC/reconstruction/macro/createTPCSpaceChargeCorrection.C++'("../InputSCDensityHistograms_8000events.root", "inputSCDensity3D_8000_0")' + ``` +1. In order to use the fast transform map for TPC tracking, add to the tpc-recop-workflow: + ``` + --configKeyValues "GPU_global.transformationFile=tpctransform.root" + ``` + +--- + +# Remarks for creating other prerequisite binary files: +1. To create the CTF dictionary: Run the full system test workflow once setting the env variable CREATECTFDICT=1: + ``` + CREATECTFDICT=1 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh + ``` +1. Create the ITS pattern dictionary + ``` + o2-its-reco-workflow --trackerCA --disable-mc --configKeyValues "fastMultConfig.cutMultClusLow=30000;fastMultConfig.cutMultClusHigh=2000000;fastMultConfig.cutMultVtxHigh=500" + root -b -q ~/alice/O2/Detectors/ITSMFT/ITS/macros/test/CheckTopologies.C++ + ``` + - Note that the ITS dictionary used for raw generation and for reconstruction must be the same. I.e., if you change this, you have to either restart from scratch with the new dictionary file or rerun the ITS raw generation part of `$O2_ROOT/prodtests/full_system_test.sh`. +1. To create the material lookup table + ``` + root -l -q -b $O2_ROOT/Detectors/Base/test/buildMatBudLUT.C + ``` +1. missing here: dedxsplines.root, tpcpadgaincalib.root + +--- + +# Measuring startup time: +- In order to measure the time for each individual GPU memory registration step, please add `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.benchmarkMemoryRegistration=1;"`. This should show you 2 times ~2 seconds per GPU process for the 2 large segments (DD and the global segment, could also report some additional smaller segments, only 1 in case you don't use the readout proxy). +- In order to measure the total startup time, you can use the `start_tmux.sh` script with the option `FST_BENCHMARK_STARTUP=1`. It will print for both DPL chains 2 times at the beginning: The first is when it starts the workflow JSON generation, the second is after the JSON generation when the actual workflow is started. For the process startup time, you have to take the difference from that time until the time when the last process has reched the READY state. (Note that this should be done with the `$O2_ROOT/prodtests/full-system-test/shm-tool.sh` as instructed above.) + ``` + Fri Jan 28 11:25:48 CET 2022 + Fri Jan 28 11:25:56 CET 2022 + [...] + [1456583:gpu-reconstruction_t0]: [11:26:18][INFO] fair::mq::Device running... + ``` + - This corresponds to a JSON creation time of 8 seconds (will usually not cound for the startup since it is cached, and a process startup time of 22 seconds. +--- + +# Other remarks:# Other remarks: +1. To run with low b-field, add to o2-sim: + ``` + --field -2 + ``` +1. To create a sample of multiple TF files for StfBuilder, use the script `$O2_ROOT/prodtests/full-system-test/generate_timeframe_files.sh`. diff --git a/prodtests/full-system-test/README.md b/prodtests/full-system-test/documentation/full-system-test.md similarity index 95% rename from prodtests/full-system-test/README.md rename to prodtests/full-system-test/documentation/full-system-test.md index a52dfbc5d1203..80cc08baa2255 100644 --- a/prodtests/full-system-test/README.md +++ b/prodtests/full-system-test/documentation/full-system-test.md @@ -10,7 +10,7 @@ The full system test consists of 2 parts (detailed below): The relevant scripts are `/prodtests/full_system_test.sh` and all scripts in `/prodtests/full-system-test`. Note that by default the `full_system_test.sh` script will do both, run the generation and then the sysc and the async workflow. -This is only a quickstart guide, for more information see https://alice.its.cern.ch/jira/browse/O2-1492. +This is only a quickstart guide, for more information see https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md. In order to run the full system test, you need to run in the O2sim environment (`alienv enter O2sim/latest`): ``` @@ -50,7 +50,7 @@ The generation part (in `prodtests/full_system_test.sh` runs the following steps The `prodtests/full_system_test.sh` uses `Utilities/Tools/jobutils.sh` for running the jobs, which creates a log file for each step, and which will automatically skip steps that have already succeeded if the test is rerun in the current folder. I.e. if you break the FST or it failed at some point, you can rerun the same command line and it will continue after the last successful step. See `Utilities/Tools/jobutils.sh` for details. Note that by default, the generation produces raw files, which can be consumed by the `raw-file-reader-workflow` and by `o2-readout-exe`. -The files can be converted into timeframes files readable by the StfBuilder as described in https://alice.its.cern.ch/jira/browse/O2-1492. +The files can be converted into timeframes files readable by the StfBuilder as described in https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md. ## Full system test DPL-workflow configuration and scripts @@ -80,7 +80,7 @@ The `dpl-workflow.sh` can run both the synchronous and the asynchronous workflow All settings are configured via environment variables. The default settings (if no env variable is exported) are defined in `setenv.sh` which is sourced by all other scripts. (Please note that `start_tmux.sh` overrides a couple of options with EPN defaults). -The environment variables are documented here: https://github.com/AliceO2Group/O2DPG/blob/master/DATA/common/README.md +The environment variables are documented here: https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-env-variables.md ## Files produced / required by the full system test diff --git a/prodtests/full-system-test/documentation/raw-data-simulation.md b/prodtests/full-system-test/documentation/raw-data-simulation.md new file mode 100644 index 0000000000000..fbf6ace7d6934 --- /dev/null +++ b/prodtests/full-system-test/documentation/raw-data-simulation.md @@ -0,0 +1,43 @@ +This procedure will create (S)TF files from raw data prepared as described in the main ticket. The data must be using RDHv6. +Create configuration for the readout.exe with all input files we want in the TF. This will create rdo_TF.cfg file. + +  +``` +ulimit -n 4096 # Make sure we can open sufficiently many files cd raw# ls raw: ITS TPC TOF ... + +# copy gen_rdo_cfg.sh script attached here to the raw directory +# Run the script with number of HBF/TF and list directories you want to include in the TF + +~raw> ./gen_rdo_cfg.sh 128 TPC ITS TOF # ... others{code} +```  + +In a separate shell load a recent DataDistribution module and start StfBuilder to record the TF: +``` +export TF_PATH=$(pwd) +StfBuilder --id=stfb --detector-rdh=6 --detector-subspec=feeid --stand-alone --channel-config "name=readout,type=pull,method=connect,address=ipc:///tmp/readout-to-datadist-0,transport=shmem,rateLogging=1" --data-sink-dir=${TF_PATH} --data-sink-sidecar --data-sink-enable +``` + +Start the readout.exe (at least v1.4.3) using the generated config file. The dataflow will have a 10-20 seconds of delay, in order to have all input files loaded. +``` +ulimit -n 4096 # Make sure we can open sufficiently many files +~raw> readout.exe file:rdo_TF.cfg{code} +``` +  +Upon data transfer to StfBuilder, readout will print the stats, like: +``` +2020-06-23 18:07:59.003364 Last interval (1.00s): blocksRx=0, block rate=0.00, bytesRx=0, rate=0.000 b/s +2020-06-23 18:08:00.003382 Last interval (1.00s): blocksRx=2930, block rate=2930.00, bytesRx=1156508880, rate=9.252 Gb/s +2020-06-23 18:08:01.003384 Last interval (1.00s): blocksRx=0, block rate=0.00, bytesRx=0, rate=0.000 b/s{noformat} +``` + +StfBuilder will print one warning regarding the timeout on the last received TF. This can be ignored in this case. The log should look like : + +```  +{noformat}[2020-06-23 18:07:59.928][I] readout[0]: in: 1224 (1156.52 MB) out: 0 (0 MB) +[2020-06-23 18:08:01.733][W] READOUT INTERFACE: finishing STF on a timeout. stf_id=1 size=1156508880 +[2020-06-23 18:08:02.607][I] Sending STF out. stf_id=1 channel=standalone-chan[0] stf_size=1156508880 unique_equipments=1224{noformat} +``` + +After this, both processes can be closed with Ctrl-C. The resulting TFs are stored in a new directory under TF_PATH (the name of the dir is the time of running) + +  From 8ed4d1083b9403972662c8bcf0cec3a29487e244 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 22:55:03 +0200 Subject: [PATCH 0452/1914] GPU: Add documentation --- GPU/documentation/README.md | 0 GPU/documentation/build-O2.md | 62 +++++++++++++++++++ GPU/documentation/build-standalone.md | 86 +++++++++++++++++++++++++++ 3 files changed, 148 insertions(+) create mode 100644 GPU/documentation/README.md create mode 100644 GPU/documentation/build-O2.md create mode 100644 GPU/documentation/build-standalone.md diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/GPU/documentation/build-O2.md b/GPU/documentation/build-O2.md new file mode 100644 index 0000000000000..809d1fe0d5439 --- /dev/null +++ b/GPU/documentation/build-O2.md @@ -0,0 +1,62 @@ +This ticket will serve as documentation how to enable which GPU features and collect related issues. + +So far, the following features exist: + * GPU Tracking with CUDA + * GPU Tracking with HIP + * GPU Tracking with OpenCL (>= 2.1) + * OpenGL visualization of the tracking + * ITS GPU tracking + +GPU support should be detected and enabled automatically. +If you just want to reproduce the GPU build locally without running it, it might be easiest to use the GPU CI container (see below). +The provisioning script of the container also demonstrates which patches need to be applied such that everything works correctly. + +*GPU Tracking with CUDA* + * The CMake option -DENABLE_CUDA=ON/OFF/AUTO steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. + * The CMake option -DCUDA_COMPUTETARGET= fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) + * CUDA is detected via the CMake language feature, so essentially nvcc must be in the Path. + * We require CUDA version >= 11.2 + * CMake will report "Building GPUTracking with CUDA support" when enabled. + +*GPU Tracking with HIP* + * HIP and HCC must be installed, and CMake must be able to detect HIP via find_package(hip). + * If HIP and HCC are not installed to /opt/rocm, the environment variables $HIP_PATH and $HCC_HOME must point to the installation directories. + * HIP from ROCm >= 4.0 is required. + * The CMake option -DHIP_AMDGPUTARGET= forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). + * CMake will report "Building GPUTracking with HIP support" when enabled. + * It may be that some patches must be applied to ROCm after the installation. You find the details in the provisioning script of the GPU CI container below. + +*GPU Tracking with OpenCL (Needs Clang >= 18 for compilation)* + * Needs OpenCL library with version >= 2.1, detectable via CMake find_package(OpenCL). + * Needs the SPIR-V LLVM translator together with LLVM to create the SPIR-V binaries, also detectable via CMake. + +*OpenGL visualization of TPC tracking* + * Needs the following libraries (all detectable via CMake find_package): libOpenGL, libGLEW, libGLFW, libGLU. + * OpenGL must be at least version 4.5, but this is not detectable at CMake time. If the supported OpenGL version is below, the display is not/partially built, and not available at runtime. (Whether it is not or partially built depends on whether the maximum OpenGL version supported by GLEW or that of the system runtime in insufficient.) + * Note: If ROOT does not detect the system GLEW library, ROOT will install its own very outdated GLEW library, which will be insufficient for the display. Since the ROOT include path will come first in the order, this will prevent the display from being built. + * CMake will report "Building GPU Event Display" when enabled. + +*Vulkan visualization* + * similar to OpenCL visualization, but with Vulkan. + +*ITS GPU Tracking* + * So far supports only CUDA and HIP, support for OpenCL might come. + * The build is enabled when the "GPU Tracking with CUDA" (as explained above) detects CUDA, same for HIP. + * CMake will report "Building ITS CUDA tracker" when enabled, same for HIP. + +*Using the GPU CI container* + * Setting up everything locally might be somewhat time-consuming, instead you can use the GPU CI cdocker container. + * The docker images is `alisw/slc8-gpu-builder`. + * The container exports the `ALIBUILD_O2_FORCE_GPU` env variable, which force-enables all GPU builds. + * Note that it might not be possible out-of-the-box to run the GPU version from within the container. In case of HIP it should work when you forwards the necessary GPU devices in the container. For CUDA however, you would either need to (in addition to device forwarding) match the system CUDA driver and toolkit installation to the files present in the container, or you need to use the CUDA docker runtime, which is currently not installed in the container. + * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh. If you want to reproduce the installation locally, it is recommended to follow the steps from the script. + +*Summary* + +If you want to enforce the GPU builds on a system without GPU, please set the following CMake settings: + * ENABLE_CUDA=ON + * ENABLE_HIP=ON + * ENABLE_OPENCL=ON + * HIP_AMDGPUTARGET=gfx906;gfx908 + * CUDA_COMPUTETARGET=86 89 +Alternatively you can set the environment variables ALIBUILD_ENABLE_CUDA and ALIBUILD_ENABLE_HIP to enforce building CUDA or HIP without modifying the alidist scripts. diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md new file mode 100644 index 0000000000000..d4e9da5cd5bf3 --- /dev/null +++ b/GPU/documentation/build-standalone.md @@ -0,0 +1,86 @@ +This ticket describes how to build the O2 GPU TPC Standalone benchmark (in its 2 build types), and how to run it. + +The purpose of the standalone benchmark is to make the O2 GPU TPC reconstruction code available standalone. It provides +- external tests when people do not have / want to build O2, have no access to alien for CCDB, etc. +- fast standalone tests without running O2 workflows and overhead from CCTD. +- faster build times than rebuilding O2 for development. + +# Compiling + +The standalone benchmark is build as part of O2, and it can be built standalone. + +As part of O2, it is available from the normal O2 build as the executable `o2-gpu-standalone-benchmark`, GPU support is available for all GPU types supported by the O2 build. + +Building it as standalone benchmark requires several dependencies, and provides more control which features to enable / disable. +The dependencies can be taken from the system, or we can use alidist to build O2 and take the dependencies from there. + +In order to do the latter, please execute: +``` +cd ~/alice # or your alice folder +aliBuild build --defaults o2 O2 +source O2/GPU/GPUTracking/Standalone/cmake/prepare.sh +``` + +Then, in order to compile the standalone tool, assuming to have it in ~/standalone and build in ~/standalone/build, please run: +``` +mkdir -p ~/standalone/build +cd ~/standalone/build +cmake -DCMAKE_INSTALL_PREFIX=../ ~/alice/O2/GPU/GPUTracking/Standalone/ +nano config.cmake # edit config file to enable / disable dependencies as needed. In case cmake failed, and you disabled the dependency, just rerun the above command. +make install -j32 +``` + +You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in O2-786. +And there are plenty of additional settings to enable/disable event display, qa, usage of ROOT, FMT, etc. libraries. + +This will create the `ca` binary in `~/standalone`, which is basically the same as the `o2-gpu-standalone-benchmark`, but built outside of O2. + +# Running + +The following command lines will use `./ca`, in case you use the executable from the O2 build, please replace by `o2-gpu-standalone-benchmark`. + +You can get a list of command line options by `./ca --help` and `./ca --helpall`. + +In order to run, you need a dataset. See the next section for how to create a dataset. Datasets are stored in `~/standalone/events`, and are identified by their folder names. The following commands assume a testdataset of name `o2-pbpb-100`. + +To run on that data, the simpled command is `./ca -e o2-pbpb-100`. This will automatically use a GPU if available, trying all backends, otherwise fall back to CPU. +You can force using GPU or CPU with `-g` and `-c`. +You can select the backend via `--gpuType CUDA|HIP|OCL|OCL2`, and inside the backend you can select the device number, if multiple devices exist, via `--gpuDevice i`. + +The flag `--debug` (-2 to 6) enables increasingly extensive debug output, and `--debug 6` stores full data dumpts of all intermediate steps to files. +>= `--debug 1` has a performance impact since it adds serialization points for debugging. For timing individual kernels, `--debug 1` prints timing information for all kernels. +An example line would .e.g. be +``` +./ca -e o2-pbpb-100 -g --gpuType CUDA --gpuDevice 0 --debug 1 +``` + +Some other noteworthy options are `--display` to run the GPU event display, `--qa` to run a QA task on MC data, `--runs` and `--runs2` to run multiple iterations of the benchmark, `--printSettings` to print all the settings that were used, `--memoryStat` to print memory statistics, `--sync` to run with settings for online reco, `--syncAsync` to run online reco first, and then offline reco on the produced TPC CTF data, `--setO2Settings` to use some defaults as they are in O2 not in the standalone version, `--PROCdoublePipeline` to enable the double-threaded pipeline for best performance (works only with multiple iterations, and not in async mode), and `--RTCenable` to enable the run time compilation improvements (check also `--RTCcacheOutput`). +An example for a benchmark in online mode would be: +``` +./ca -e o2-pbpb-100 -g --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 +``` + +# Generating a dataset + +The standalone benchmark supports running on Run2 data exported from AliRoot, or to run on Run3 data from O2. This document covers only the O2 case. +In o2, `o2-tpc-reco-workflow` and the `o2-gpu-reco-workflow` can dump event data with the `configKeyValue` `GPU_global.dump=1;`. +This will dump the event data to the local folder, all dumped files have a `.dump` file extension. If there are multiple TFs/events processed, there will be multiple `event.i.dump` files. In order to create a standalone dataset out of these, just copy all the `.dump` files to a subfolder in `~/standalone/events/[FOLDERNAME]`. + +Data can be dumped from raw data, or from MC data, e.g. generated by the Full System Test. In case of MC data, also MC labels are dumped, such that they are used in the `./ca --qa` mode. + +To get a dump from simulated data, please run e.g. the FST simulation as described in O2-2633. +A simple run as +``` +DISABLE_PROCESSING=1 NEvents=5 NEventsQED=100 SHMSIZE=16000000000 $O2_ROOT/prodtests/full_system_test.sh +``` +should be enough. + +Afterwards run the following command to dump the data: +``` +SYNCMODE=1 CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh +``` + +To dump standalone data from CTF raw data in `myctf.root`, you can use the same script, e.g.: +``` +CTFINPUT=1 INPUT_FILE_LIST=myctf.root CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh +``` From 80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 24 Apr 2025 16:39:38 +0200 Subject: [PATCH 0453/1914] GPU: Make TPC CF CF_SCAN_WORKGROUP_SIZE configureable --- GPU/GPUTracking/Base/GPUProcessor.h | 18 ++++++++++++++--- .../Definitions/GPUDefParametersConstants.h | 2 -- .../Definitions/GPUDefParametersDefaults.h | 17 ++++++++++------ .../Global/GPUChainTrackingClusterizer.cxx | 14 +++++++------ .../GPUTPCCFStreamCompaction.cxx | 14 +++++++++++-- .../GPUTPCCFStreamCompaction.h | 14 ++++++------- .../TPCClusterFinder/GPUTPCClusterFinder.cxx | 20 +++++++++++-------- .../TPCClusterFinder/GPUTPCClusterFinder.h | 5 ++--- GPU/GPUTracking/kernels.cmake | 3 ++- 9 files changed, 69 insertions(+), 38 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUProcessor.h b/GPU/GPUTracking/Base/GPUProcessor.h index 2e0e0a003f87d..df551c9f0330d 100644 --- a/GPU/GPUTracking/Base/GPUProcessor.h +++ b/GPU/GPUTracking/Base/GPUProcessor.h @@ -63,7 +63,7 @@ class GPUProcessor } template - static inline size_t getAlignmentMod(size_t addr) + static constexpr inline size_t getAlignmentMod(size_t addr) { static_assert((alignment & (alignment - 1)) == 0, "Invalid alignment, not power of 2"); if (alignment <= 1) { @@ -72,7 +72,7 @@ class GPUProcessor return addr & (alignment - 1); } template - static inline size_t getAlignment(size_t addr) + static constexpr inline size_t getAlignment(size_t addr) { size_t mod = getAlignmentMod(addr); if (mod == 0) { @@ -81,10 +81,22 @@ class GPUProcessor return (alignment - mod); } template - static inline size_t nextMultipleOf(size_t size) + static constexpr inline size_t nextMultipleOf(size_t size) { return size + getAlignment(size); } + static constexpr inline size_t nextMultipleOf(size_t size, size_t alignment) + { + if (alignment & (alignment - 1)) { + size_t tmp = size % alignment; + if (tmp) { + size += alignment - tmp; + } + return size; + } else { + return (size + alignment - 1) & ~(alignment - 1); + } + } template static inline void* alignPointer(void* ptr) { diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h index dd4a5dcbe7ba8..78036e47fc49d 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersConstants.h @@ -18,8 +18,6 @@ #define GPUDEFPARAMETERSCONSTANTS_H // clang-format off -#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix! - #if defined(__CUDACC__) || defined(__HIPCC__) #define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant #endif diff --git a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h index cdc5efd56ddfd..7879789bf91c8 100644 --- a/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h +++ b/GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h @@ -22,7 +22,6 @@ // GPU Run Configuration #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection. - #define GPUCA_LB_SCAN 512 // GPU-architecture-dependent default settings #if defined(GPUCA_GPUTYPE_MI2xx) #define GPUCA_WARP_SIZE 64 @@ -499,11 +498,11 @@ #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels #define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_LB_SCAN - #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_LB_SCAN + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE #define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER #define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER @@ -541,6 +540,9 @@ #ifndef GPUCA_PAR_COMP_GATHER_MODE #define GPUCA_PAR_COMP_GATHER_MODE 2 #endif + #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 512 + #endif #endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) #ifndef GPUCA_GPUCODE_GENRTC @@ -578,6 +580,9 @@ #ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK #define GPUCA_PAR_NO_ATOMIC_PRECHECK 0 #endif + #ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE + #define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 0 + #endif #ifndef GPUCA_PAR_DEDX_STORAGE_TYPE #define GPUCA_PAR_DEDX_STORAGE_TYPE float #endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 630c2200e5900..f188388e76a02 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -23,6 +23,7 @@ #include "CfChargePos.h" #include "CfArray2D.h" #include "GPUGeneralKernels.h" +#include "GPUDefParametersRuntime.h" #include "GPUTPCCFStreamCompaction.h" #include "GPUTPCCFChargeMapFiller.h" #include "GPUTPCCFDecodeZS.h" @@ -402,27 +403,28 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust exit(1); } + int32_t scanWorkgroupSize = mRec->getGPUParameters(doGPU).par_CF_SCAN_WORKGROUP_SIZE; size_t tmpCount = count; if (nSteps > 1) { for (uint32_t i = 1; i < nSteps; i++) { counts.push_back(tmpCount); if (i == 1) { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, stage); } else { - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, tmpCount); } - tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize; + tmpCount = (tmpCount + scanWorkgroupSize - 1) / scanWorkgroupSize; } - runKernel({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount); + runKernel({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, nSteps, tmpCount); for (uint32_t i = nSteps - 1; i > 1; i--) { tmpCount = counts[i - 1]; - runKernel({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount); + runKernel({GetGrid(tmpCount - scanWorkgroupSize, scanWorkgroupSize, lane), {iSector}}, i, scanWorkgroupSize, tmpCount); } } - runKernel({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out); + runKernel({GetGrid(count, scanWorkgroupSize, lane), {iSector}}, 1, stage, in, out); } else { auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks; auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx index 1da5a1158a8c2..d43e96b19c5d0 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx @@ -24,6 +24,7 @@ using namespace o2::gpu::tpccf; template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage) { +#ifdef GPUCA_GPUCODE int32_t nElems = CompactionElems(clusterer, stage); const auto* predicate = clusterer.mPisPeak; @@ -35,17 +36,19 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread(smem, pred); + int32_t nElemsInBlock = CfUtils::blockPredicateSum(smem, pred); int32_t lastThread = nThreads - 1; if (iThread == lastThread) { scanOffset[iBlock] = nElemsInBlock; } +#endif } template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems) { +#ifdef GPUCA_GPUCODE auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1); auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf); @@ -59,11 +62,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread if (iThread == lastThread) { scanOffsetNext[iBlock] = offsetInBlock; } +#endif } template <> GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems) { +#ifdef GPUCA_GPUCODE int32_t iThreadGlobal = get_global_id(0); int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1); @@ -74,11 +79,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems) { +#ifdef GPUCA_GPUCODE int32_t iThreadGlobal = get_global_id(0) + offset; int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1); @@ -89,11 +96,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread GPUdii() void GPUTPCCFStreamCompaction::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out) { +#ifdef GPUCA_GPUCODE uint32_t nElems = CompactionElems(clusterer, stage); SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks; @@ -105,7 +114,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread= nElems); int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal]; - int32_t offsetInBlock = CfUtils::blockPredicateScan(smem, pred); + int32_t offsetInBlock = CfUtils::blockPredicateScan(smem, pred); SizeT globalOffsetOut = offsetInBlock; if (iBlock > 0) { @@ -129,6 +138,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Threadcounters.nPeaks = nFinal; } } +#endif } GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage) diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h index a72907fe55e89..a5ea8b24e9522 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h @@ -35,14 +35,14 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate compactDigits = 4, }; - struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { - }; #if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); - static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); + struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64 { + }; + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown)); + static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits)); #endif typedef GPUTPCClusterFinder processorType; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx index 051391f12cc6d..541edaa689c6c 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx @@ -17,6 +17,7 @@ #include "GPUMemorySizeScalers.h" #include "GPUHostDataTypes.h" #include "GPUSettings.h" +#include "GPUDefParametersRuntime.h" #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/ZeroSuppression.h" @@ -90,9 +91,10 @@ void* GPUTPCClusterFinder::SetPointersScratch(void* mem) computePointerWithAlignment(mem, mPisPeak, mNMaxDigitsFragment); computePointerWithAlignment(mem, mPchargeMap, TPCMapMemoryLayout::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen)); computePointerWithAlignment(mem, mPpeakMap, TPCMapMemoryLayout::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen)); - computePointerWithAlignment(mem, mPbuf, mBufSize * mNBufs); computePointerWithAlignment(mem, mPclusterByRow, GPUCA_ROW_COUNT * mNMaxClusterPerRow); - + if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) { + computePointerWithAlignment(mem, mPscanBuf, mBufSize * mNBufs); + } return mem; } @@ -129,14 +131,15 @@ void GPUTPCClusterFinder::SetMaxData(const GPUTrackingInOutPointers& io) if (mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow) { mNMaxClusterPerRow = std::max(mNMaxClusterPerRow, mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow); } - - mBufSize = nextMultipleOf(GPUCA_MEMALIGN, mScanWorkGroupSize)>(mNMaxDigitsFragment); - mNBufs = getNSteps(mBufSize); + if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) { + mBufSize = nextMultipleOf(mNMaxDigitsFragment, std::max(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE)); + mNBufs = getNSteps(mBufSize); + } } void GPUTPCClusterFinder::SetNMaxDigits(size_t nDigits, size_t nPages, size_t nDigitsFragment, size_t nDigitsEndpointMax) { - mNMaxDigits = nextMultipleOf(GPUCA_MEMALIGN, mScanWorkGroupSize)>(nDigits); + mNMaxDigits = nextMultipleOf(nDigits, std::max(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE)); mNMaxPages = nPages; mNMaxDigitsFragment = nDigitsFragment; mNMaxDigitsEndpoint = nDigitsEndpointMax; @@ -148,9 +151,10 @@ uint32_t GPUTPCClusterFinder::getNSteps(size_t items) const return 0; } uint32_t c = 1; - size_t capacity = mScanWorkGroupSize; + const size_t scanWorkgroupSize = mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE; + size_t capacity = scanWorkgroupSize; while (items > capacity) { - capacity *= mScanWorkGroupSize; + capacity *= scanWorkgroupSize; c++; } return c; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 96efe08be6dc6..37399f5e4863f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -111,10 +111,10 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t* mPclusterInRow = nullptr; tpc::ClusterNative* mPclusterByRow = nullptr; GPUTPCClusterMCInterimArray* mPlabelsByRow = nullptr; - int32_t* mPbuf = nullptr; + int32_t* mPscanBuf = nullptr; Memory* mPmemory = nullptr; - GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPbuf + iBuf * mBufSize; } + GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPscanBuf + iBuf * mBufSize; } o2::dataformats::ConstMCTruthContainerView const* mPinputLabels = nullptr; uint32_t* mPlabelsInRow = nullptr; @@ -122,7 +122,6 @@ class GPUTPCClusterFinder : public GPUProcessor uint32_t mPlabelsDataGlobalOffset = 0; int32_t mISector = 0; - constexpr static int32_t mScanWorkGroupSize = GPUCA_THREAD_COUNT_SCAN; uint32_t mNMaxClusterPerRow = 0; uint32_t mNMaxClusters = 0; uint32_t mNMaxPages = 0; diff --git a/GPU/GPUTracking/kernels.cmake b/GPU/GPUTracking/kernels.cmake index 937a92fef33df..08d879fbb8e9a 100644 --- a/GPU/GPUTracking/kernels.cmake +++ b/GPU/GPUTracking/kernels.cmake @@ -146,7 +146,8 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP NO_ATOMIC_PRECHECK COMP_GATHER_KERNEL COMP_GATHER_MODE - SORT_STARTHITS) + SORT_STARTHITS + CF_SCAN_WORKGROUP_SIZE) o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE MERGER_INTERPOLATION_ERROR_TYPE) From f1e0a1298ca6a1ba2bea97e4e8b403487dbfa13f Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Fri, 25 Apr 2025 10:14:07 +0200 Subject: [PATCH 0454/1914] Update CODEOWNERS for ALICE3 (#14221) @njacazio --- CODEOWNERS | 1 + 1 file changed, 1 insertion(+) diff --git a/CODEOWNERS b/CODEOWNERS index a22b122d0e6cd..5337622522bbb 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -73,6 +73,7 @@ /Detectors/TPC @davidrohr @wiechula @shahor02 /Detectors/TRD @f3sch @bazinski @wille10 /Detectors/Upgrades @mconcas +/Detectors/Upgrades/ALICE3 @mconcas @njacazio /Detectors/Upgrades/ITS3 @fgrosa @arossi81 @mconcas @f3sch /Detectors/ZDC @coppedis /Detectors/CTF @shahor02 From fc3ace17eca580c338751163ef4528e3ec47f9d6 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Fri, 25 Apr 2025 10:14:46 +0200 Subject: [PATCH 0455/1914] ALICE3-TRK: Add skeleton for the TRK reconstruction WF (#14222) --------- Co-authored-by: ALICE Builder --- .../ALICE3/TRK/workflow/CMakeLists.txt | 22 ++-- .../include/TRKWorkflow/RecoWorkflow.h | 33 +++++ .../include/TRKWorkflow/TrackerSpec.h | 56 +++++++++ .../ALICE3/TRK/workflow/src/RecoWorkflow.cxx | 31 +++++ .../ALICE3/TRK/workflow/src/TrackerSpec.cxx | 116 ++++++++++++++++++ .../TRK/workflow/src/trk-reco-workflow.cxx | 80 ++++++++++++ 6 files changed, 326 insertions(+), 12 deletions(-) create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx create mode 100644 Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt index c9f4099017717..e86ed7982c85b 100644 --- a/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/CMakeLists.txt @@ -10,23 +10,21 @@ # or submit itself to any jurisdiction. o2_add_library(TRKWorkflow + TARGETVARNAME targetName SOURCES src/DigitReaderSpec.cxx src/DigitWriterSpec.cxx - # src/RecoWorkflow.cxx - # src/ClusterWriterWorkflow.cxx - # src/ClustererSpec.cxx - # src/ClusterWriterSpec.cxx - # src/TrackerSpec.cxx - # src/TrackWriterSpec.cxx - # src/TrackReaderSpec.cxx - # src/VertexReaderSpec.cxx + src/TrackerSpec.cxx + src/RecoWorkflow.cxx PUBLIC_LINK_LIBRARIES O2::Framework + O2::GPUWorkflow O2::SimConfig O2::DataFormatsITSMFT O2::SimulationDataFormat O2::DPLUtils) -# o2_add_executable(reco-workflow -# SOURCES src/trk-reco-workflow.cxx -# COMPONENT_NAME alice3-trk -# PUBLIC_LINK_LIBRARIES O2::TRKWorkflow) \ No newline at end of file +o2_add_executable(reco-workflow + SOURCES src/trk-reco-workflow.cxx + COMPONENT_NAME alice3-trk + PUBLIC_LINK_LIBRARIES O2::TRKWorkflow + O2::TRKSimulation + O2::ITStracking) \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h new file mode 100644 index 0000000000000..0c2489aa4b9c4 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/RecoWorkflow.h @@ -0,0 +1,33 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef O2_TRK_RECOWORKFLOW_H +#define O2_TRK_RECOWORKFLOW_H + +#include "Framework/WorkflowSpec.h" +#include "GPUDataTypes.h" + +namespace o2::trk +{ +namespace reco_workflow +{ + +o2::framework::WorkflowSpec getWorkflow(bool useMC, + bool upstreamDigits = false, + bool upstreamClusters = false, + bool disableRootOutput = false, + bool useGPUWF = false, + o2::gpu::GPUDataTypes::DeviceType dType = o2::gpu::GPUDataTypes::DeviceType::CPU); +} + +} // namespace o2::trk + +#endif diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h new file mode 100644 index 0000000000000..3c82a4fd7b89d --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/include/TRKWorkflow/TrackerSpec.h @@ -0,0 +1,56 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// @file TrackerSpec.h + +#ifndef O2_TRK_TRACKERDPL +#define O2_TRK_TRACKERDPL + +#include "DataFormatsITSMFT/TopologyDictionary.h" + +#include "Framework/DataProcessorSpec.h" +#include "Framework/Task.h" + +#include "ITStracking/TrackingInterface.h" +#include "GPUDataTypes.h" + +#include "DetectorsBase/GRPGeomHelper.h" + +#include "TStopwatch.h" + +namespace o2::trk +{ +class TrackerDPL : public framework::Task +{ + public: + TrackerDPL(std::shared_ptr gr, + bool isMC, + gpu::GPUDataTypes::DeviceType dType = gpu::GPUDataTypes::DeviceType::CPU); + ~TrackerDPL() override = default; + void init(framework::InitContext& ic) final; + void run(framework::ProcessingContext& pc) final; + void endOfStream(framework::EndOfStreamContext& ec) final; + // void finaliseCCDB(framework::ConcreteDataMatcher& matcher, void* obj) final; + void stop() final; + + private: + void updateTimeDependentParams(framework::ProcessingContext& pc); + // std::unique_ptr mRecChain = nullptr; + // std::unique_ptr mChainITS = nullptr; + // std::shared_ptr mGGCCDBRequest; + // ITSTrackingInterface mITSTrackingInterface; + TStopwatch mTimer; +}; + +framework::DataProcessorSpec getTrackerSpec(bool useMC, gpu::GPUDataTypes::DeviceType dType = gpu::GPUDataTypes::DeviceType::CPU); + +} // namespace o2::trk +#endif /* O2_TRK_TRACKERDPL */ \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx new file mode 100644 index 0000000000000..3b2b44729b259 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/RecoWorkflow.cxx @@ -0,0 +1,31 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKWorkflow/RecoWorkflow.h" +#include "TRKWorkflow/TrackerSpec.h" +#include "Framework/CCDBParamSpec.h" + +namespace o2::trk::reco_workflow +{ + +framework::WorkflowSpec getWorkflow(bool useMC, + bool upstreamDigits, + bool upstreamClusters, + bool disableRootOutput, + bool useGPUWF, + o2::gpu::GPUDataTypes::DeviceType dtype) +{ + framework::WorkflowSpec specs; + specs.emplace_back(o2::trk::getTrackerSpec(useMC, dtype)); + return specs; +} + +} // namespace o2::trk::reco_workflow \ No newline at end of file diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx new file mode 100644 index 0000000000000..4057bab3b948f --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/TrackerSpec.cxx @@ -0,0 +1,116 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include + +#include "Framework/ControlService.h" +#include "Framework/ConfigParamRegistry.h" +#include "Framework/CCDBParamSpec.h" +#include "TRKWorkflow/TrackerSpec.h" + +namespace o2 +{ +using namespace framework; +namespace trk +{ +using Vertex = o2::dataformats::Vertex>; + +TrackerDPL::TrackerDPL(std::shared_ptr gr, + bool isMC, + o2::gpu::GPUDataTypes::DeviceType dType) +{ + // mITSTrackingInterface.setTrackingMode(trMode); +} + +void TrackerDPL::init(InitContext& ic) +{ + // mTimer.Stop(); + // mTimer.Reset(); + // o2::base::GRPGeomHelper::instance().setRequest(mGGCCDBRequest); + // mChainITS.reset(mRecChain->AddChain()); + // mITSTrackingInterface.setTraitsFromProvider(mChainITS->GetITSVertexerTraits(), + // mChainITS->GetITSTrackerTraits(), + // mChainITS->GetITSTimeframe()); +} + +void TrackerDPL::stop() +{ + LOGF(info, "CPU Reconstruction total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); +} + +void TrackerDPL::run(ProcessingContext& pc) +{ + auto cput = mTimer.CpuTime(); + auto realt = mTimer.RealTime(); + mTimer.Start(false); + // mITSTrackingInterface.updateTimeDependentParams(pc); + // mITSTrackingInterface.run(pc); + mTimer.Stop(); + LOGP(info, "CPU Reconstruction time for this TF {} s (cpu), {} s (wall)", mTimer.CpuTime() - cput, mTimer.RealTime() - realt); +} + +// void TrackerDPL::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) +// { +// // mITSTrackingInterface.finaliseCCDB(matcher, obj); +// } + +void TrackerDPL::endOfStream(EndOfStreamContext& ec) +{ + LOGF(info, "TRK CA-Tracker total timing: Cpu: %.3e Real: %.3e s in %d slots", mTimer.CpuTime(), mTimer.RealTime(), mTimer.Counter() - 1); +} + +DataProcessorSpec getTrackerSpec(bool useMC, o2::gpu::GPUDataTypes::DeviceType dType) +{ + std::vector inputs; + + // inputs.emplace_back("compClusters", "TRK", "COMPCLUSTERS", 0, Lifetime::Timeframe); + // inputs.emplace_back("patterns", "TRK", "PATTERNS", 0, Lifetime::Timeframe); + // inputs.emplace_back("ROframes", "TRK", "CLUSTERSROF", 0, Lifetime::Timeframe); + + // inputs.emplace_back("itscldict", "TRK", "CLUSDICT", 0, Lifetime::Condition, ccdbParamSpec("ITS/Calib/ClusterDictionary")); + // inputs.emplace_back("itsalppar", "TRK", "ALPIDEPARAM", 0, Lifetime::Condition, ccdbParamSpec("ITS/Config/AlpideParam")); + auto ggRequest = std::make_shared(false, // orbitResetTime + false, // GRPECS=true + false, // GRPLHCIF + false, // GRPMagField + false, // askMatLUT + o2::base::GRPGeomRequest::None, // geometry, but ignored until it will be put in the CCDB + inputs, + true); + std::vector outputs; + outputs.emplace_back("TRK", "TRACKS", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRACKCLSID", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRKTrackROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICES", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "IRFRAMES", 0, Lifetime::Timeframe); + + if (useMC) { + // inputs.emplace_back("trkmclabels", "TRK", "CLUSTERSMCTR", 0, Lifetime::Timeframe); + // inputs.emplace_back("TRKMC2ROframes", "TRK", "CLUSTERSMC2ROF", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESMCTR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "VERTICESMCPUR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRACKSMCTR", 0, Lifetime::Timeframe); + // outputs.emplace_back("TRK", "TRKTrackMC2ROF", 0, Lifetime::Timeframe); + } + + return DataProcessorSpec{ + "trk-tracker", + inputs, + outputs, + AlgorithmSpec{adaptFromTask(ggRequest, + useMC, + dType)}, + Options{}}; +} + +} // namespace trk +} // namespace o2 diff --git a/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx b/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx new file mode 100644 index 0000000000000..0f75d42710400 --- /dev/null +++ b/Detectors/Upgrades/ALICE3/TRK/workflow/src/trk-reco-workflow.cxx @@ -0,0 +1,80 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "TRKWorkflow/RecoWorkflow.h" +#include "CommonUtils/ConfigurableParam.h" +#include "ITStracking/TrackingConfigParam.h" +#include "ITStracking/Configuration.h" + +#include "Framework/CallbacksPolicy.h" +#include "Framework/ConfigContext.h" +#include "Framework/CompletionPolicyHelpers.h" + +#include + +using namespace o2::framework; + +void customize(std::vector& policies) +{ + // o2::raw::HBFUtilsInitializer::addNewTimeSliceCallback(policies); +} + +void customize(std::vector& policies) +{ + // ordered policies for the writers + policies.push_back(CompletionPolicyHelpers::consumeWhenAllOrdered(".*(?:TRK|trk).*[W,w]riter.*")); +} + +void customize(std::vector& workflowOptions) +{ + // option allowing to set parameters + std::vector options{ + {"digits-from-upstream", VariantType::Bool, false, {"digits will be provided from upstream, skip digits reader"}}, + {"clusters-from-upstream", VariantType::Bool, false, {"clusters will be provided from upstream, skip clusterizer"}}, + {"disable-root-output", VariantType::Bool, false, {"do not write output root files"}}, + {"disable-mc", VariantType::Bool, false, {"disable MC propagation even if available"}}, + {"disable-tracking", VariantType::Bool, false, {"disable tracking step"}}, + {"configKeyValues", VariantType::String, "", {"Semicolon separated key=value strings"}}, + {"use-gpu-workflow", VariantType::Bool, false, {"use GPU workflow (default: false)"}}, + {"gpu-device", VariantType::Int, 1, {"use gpu device: CPU=1,CUDA=2,HIP=3 (default: CPU)"}}}; + std::swap(workflowOptions, options); +} + +#include "Framework/runDataProcessing.h" +#include "Framework/Logger.h" + +WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) +{ + // Update the (declared) parameters if changed from the command line + auto useMC = !configcontext.options().get("disable-mc"); + auto useGpuWF = configcontext.options().get("use-gpu-workflow"); + auto gpuDevice = static_cast(configcontext.options().get("gpu-device")); + auto extDigits = configcontext.options().get("digits-from-upstream"); + auto extClusters = configcontext.options().get("clusters-from-upstream"); + auto disableRootOutput = configcontext.options().get("disable-root-output"); + o2::conf::ConfigurableParam::updateFromString(configcontext.options().get("configKeyValues")); + + // write the configuration used for the reco workflow + o2::conf::ConfigurableParam::writeINI("o2itsrecoflow_configuration.ini"); + + return o2::trk::reco_workflow::getWorkflow(useMC, extDigits, extClusters, disableRootOutput, useGpuWF, gpuDevice); +} From ec8cf07940d0e6faedce2cb9815bd2043a5a7028 Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Tue, 22 Apr 2025 16:25:33 +0200 Subject: [PATCH 0456/1914] Fix energy values for special runs --- Generators/share/egconfig/pythia8_NeNe.cfg | 8 ++++---- Generators/share/egconfig/pythia8_OO.cfg | 2 +- Generators/share/egconfig/pythia8_pO.cfg | 8 +++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Generators/share/egconfig/pythia8_NeNe.cfg b/Generators/share/egconfig/pythia8_NeNe.cfg index fff1dbb5f3d59..75a77236f5b4e 100644 --- a/Generators/share/egconfig/pythia8_NeNe.cfg +++ b/Generators/share/egconfig/pythia8_NeNe.cfg @@ -1,8 +1,8 @@ ### beams Beams:idA 1000100200 # Neon -Beams:idB 1000100200 # Neon -Beams:eCM 10720. # GeV +Beams:idB 1000100200 # Neon +Beams:eCM 5360. # GeV ### decays -ParticleDecays:limitTau0 on -ParticleDecays:tau0Max 10. +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. diff --git a/Generators/share/egconfig/pythia8_OO.cfg b/Generators/share/egconfig/pythia8_OO.cfg index ff098e6b65135..9a4419309a800 100644 --- a/Generators/share/egconfig/pythia8_OO.cfg +++ b/Generators/share/egconfig/pythia8_OO.cfg @@ -1,7 +1,7 @@ ### beams Beams:idA 1000080160 # Oxygen Beams:idB 1000080160 # Oxygen -Beams:eCM 10720. # GeV +Beams:eCM 5360. # GeV ### decays ParticleDecays:limitTau0 on diff --git a/Generators/share/egconfig/pythia8_pO.cfg b/Generators/share/egconfig/pythia8_pO.cfg index aff9d3337cd9d..64efc6954b363 100644 --- a/Generators/share/egconfig/pythia8_pO.cfg +++ b/Generators/share/egconfig/pythia8_pO.cfg @@ -1,8 +1,10 @@ ### beams +Beams:frameType 2 # back-to-back beams of different energies and particles Beams:idA 2212 # proton Beams:idB 1000080160 # Oxygen -Beams:eCM 13600. # GeV +Beams:eA 6800. # Energy of proton beam in GeV moving in the +z direction +Beams:eB 3400. # Energy in GeV per Oxygen nucleon (6.8 Z TeV) moving in the -z direction ### decays -ParticleDecays:limitTau0 on -ParticleDecays:tau0Max 10. +ParticleDecays:limitTau0 on +ParticleDecays:tau0Max 10. \ No newline at end of file From 2a11afc3af82fde89b936c0bb86648326e34a08a Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Apr 2025 10:17:29 +0200 Subject: [PATCH 0457/1914] Improve / Add GPU documentation --- GPU/documentation/README.md | 13 ++++++++ GPU/documentation/build-O2.md | 24 +++++++------- GPU/documentation/build-standalone.md | 4 +-- GPU/documentation/deterministic-mode.md | 31 +++++++++++++++++++ GPU/documentation/run-time-compilation.md | 21 +++++++++++++ .../full-system-test/documentation/README.md | 17 ++++++++++ .../documentation/env-variables.md | 4 +-- .../full-system-test-as-stress-test.md | 2 +- .../documentation/full-system-test-setup.md | 4 +-- ...ata-simulation.md => raw-tf-conversion.md} | 0 10 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 GPU/documentation/deterministic-mode.md create mode 100644 GPU/documentation/run-time-compilation.md create mode 100644 prodtests/full-system-test/documentation/README.md rename prodtests/full-system-test/documentation/{raw-data-simulation.md => raw-tf-conversion.md} (100%) diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md index e69de29bb2d1d..de888ab6e2436 100644 --- a/GPU/documentation/README.md +++ b/GPU/documentation/README.md @@ -0,0 +1,13 @@ +[build-O2.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md) : +- Instructions how to build O2 with GPU support. +- Description of the CMake variables used. + +[build-standalone.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-standalone.md) : +- Instructions how to build and run the standalone benchmark. +- Instructions how to extract data sets for the standalone benchmark from real data or using simulation. + +[deterministic-mode.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/deterministic-mode.md) : +- Instructions how to use the deterministic mode for both the standalone benchmark and O2. + +[run-time-compilation.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/run-time-compilation.md) : +- Instructions how to use run time compilation (RTC) for the GPU code. diff --git a/GPU/documentation/build-O2.md b/GPU/documentation/build-O2.md index 809d1fe0d5439..098629f45a832 100644 --- a/GPU/documentation/build-O2.md +++ b/GPU/documentation/build-O2.md @@ -12,17 +12,17 @@ If you just want to reproduce the GPU build locally without running it, it might The provisioning script of the container also demonstrates which patches need to be applied such that everything works correctly. *GPU Tracking with CUDA* - * The CMake option -DENABLE_CUDA=ON/OFF/AUTO steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. - * The CMake option -DCUDA_COMPUTETARGET= fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) + * The CMake option `-DENABLE_CUDA=ON/OFF/AUTO` steers whether CUDA is forced enabled / unconditionally disabled / auto-detected. + * The CMake option `-DCUDA_COMPUTETARGET=...` fixes a GPU target, e.g. 61 for PASCAL or 75 for Turing (if unset, it compiles for the lowest supported architecture) * CUDA is detected via the CMake language feature, so essentially nvcc must be in the Path. - * We require CUDA version >= 11.2 + * We require CUDA version >= 12.8 * CMake will report "Building GPUTracking with CUDA support" when enabled. *GPU Tracking with HIP* * HIP and HCC must be installed, and CMake must be able to detect HIP via find_package(hip). - * If HIP and HCC are not installed to /opt/rocm, the environment variables $HIP_PATH and $HCC_HOME must point to the installation directories. + * If HIP and HCC are not installed to /opt/rocm, the environment variables `$HIP_PATH` and `$HCC_HOME` must point to the installation directories. * HIP from ROCm >= 4.0 is required. - * The CMake option -DHIP_AMDGPUTARGET= forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). + * The CMake option `-DHIP_AMDGPUTARGET=...` forces a GPU target, e.g. gfx906 for Radeon VII (if unset, it auto-detects the GPU). * CMake will report "Building GPUTracking with HIP support" when enabled. * It may be that some patches must be applied to ROCm after the installation. You find the details in the provisioning script of the GPU CI container below. @@ -49,14 +49,14 @@ The provisioning script of the container also demonstrates which patches need to * The docker images is `alisw/slc8-gpu-builder`. * The container exports the `ALIBUILD_O2_FORCE_GPU` env variable, which force-enables all GPU builds. * Note that it might not be possible out-of-the-box to run the GPU version from within the container. In case of HIP it should work when you forwards the necessary GPU devices in the container. For CUDA however, you would either need to (in addition to device forwarding) match the system CUDA driver and toolkit installation to the files present in the container, or you need to use the CUDA docker runtime, which is currently not installed in the container. - * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh. If you want to reproduce the installation locally, it is recommended to follow the steps from the script. + * There are currently some patches needed to install all the GPU backends in a proper way and together. Please refer to the container provisioning script [provision.sh](https://github.com/alisw/docks/blob/master/slc9-gpu-builder/provision.sh). If you want to reproduce the installation locally, it is recommended to follow the steps from the script. *Summary* If you want to enforce the GPU builds on a system without GPU, please set the following CMake settings: - * ENABLE_CUDA=ON - * ENABLE_HIP=ON - * ENABLE_OPENCL=ON - * HIP_AMDGPUTARGET=gfx906;gfx908 - * CUDA_COMPUTETARGET=86 89 -Alternatively you can set the environment variables ALIBUILD_ENABLE_CUDA and ALIBUILD_ENABLE_HIP to enforce building CUDA or HIP without modifying the alidist scripts. + * `ENABLE_CUDA=ON` + * `ENABLE_HIP=ON` + * `ENABLE_OPENCL=ON + * `HIP_AMDGPUTARGET=default` + * `CUDA_COMPUTETARGET=default` +Alternatively you can set the environment variables `ALIBUILD_ENABLE_CUDA=1` and `ALIBUILD_ENABLE_HIP=1` to enforce building CUDA or HIP without modifying the alidist scripts. diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md index d4e9da5cd5bf3..891d16b4dc2c4 100644 --- a/GPU/documentation/build-standalone.md +++ b/GPU/documentation/build-standalone.md @@ -30,7 +30,7 @@ nano config.cmake # edit config file to enable / disable dependencies as needed. make install -j32 ``` -You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in O2-786. +You can edit certain build settings in `config.cmake`. Some of them are identical to the GPU build settings for O2, as described in [build-O2.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md). And there are plenty of additional settings to enable/disable event display, qa, usage of ROOT, FMT, etc. libraries. This will create the `ca` binary in `~/standalone`, which is basically the same as the `o2-gpu-standalone-benchmark`, but built outside of O2. @@ -68,7 +68,7 @@ This will dump the event data to the local folder, all dumped files have a `.dum Data can be dumped from raw data, or from MC data, e.g. generated by the Full System Test. In case of MC data, also MC labels are dumped, such that they are used in the `./ca --qa` mode. -To get a dump from simulated data, please run e.g. the FST simulation as described in O2-2633. +To get a dump from simulated data, please run e.g. the FST simulation as described in [full-system-test-setup.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md). A simple run as ``` DISABLE_PROCESSING=1 NEvents=5 NEventsQED=100 SHMSIZE=16000000000 $O2_ROOT/prodtests/full_system_test.sh diff --git a/GPU/documentation/deterministic-mode.md b/GPU/documentation/deterministic-mode.md new file mode 100644 index 0000000000000..9c8db2930ceaa --- /dev/null +++ b/GPU/documentation/deterministic-mode.md @@ -0,0 +1,31 @@ +The TPC tracking code is not fully deterministic, i.e. running multiple times on the same data set might yield a slightly different number of tracks on the O(per mille) level. +- This comes from concurrency, i.e. when tracks are processed in parallel, the output order might change, which might have small effects on the consecutive steps. +- Also compile options and optimizations play a row, e.g. using ffast-math or fused-multiply-add might slightly change the rounding of floating point, and in rare cases lead to the acceptance or rejection of a track, and thus a different number of tracks. + +For debugging, testing, and validation, a deterministic mode is implemented, which should yield 100% reproducible results, on CPU and on GPU and when running multiple times. +It uses a combination of +- Compile time options, e.g. disabling all optimizations that change floating point rounding. +- Run time options, e.g. to use deterministic sorting, and add additional sorting steps after kernels to make the output deterministic, also intermediate outputs. + +This is steered by 3 options: +- The `-DGPUCA_DETERMINISTIC_MODE` Cmake setting : Compile-time setting. +- The `--PROCdeterministicGPUReconstruction` command line option / `GPU_proc.deterministicGPUReconstruction` `--configKeyValue` setting : Run time setting. +- The `--RTCdeterministic` command line option / `GPU_proc_rtc.deterministic` `--configKeyValue` setting. (Auto-enabled by the `deterministicGPUReconstruction` setting.) : Compile-time setting for RTC code. + +In order to be fully deterministic, all settings must be enabled, where the RTC setting is automatically enabled if not explicitly disabled. + +`GPUCA_DETERMINISTIC_MODE` has multiple levels, which are described here: [FindO2GPU.cmake](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/dependencies/FindO2GPU.cmake#L72). +- In order to have fully deterministic GPUReconstruction (i.e. all algorithms that come with the GPUTracking library, like TPC tracking), the level `GPUCA_DETERMINISTIC_MODE=GPU` is needed. +- In order to apply it to all of O2, e.g. for ITS tracking, please use `GPUCA_DETERMINISTIC_MODE=WHOLEO2` + +Enabling the options is a bit different for O2 and for the standalone benchmark: +- For enabling it in the standalone benchmark, please set GPUCA_DETERMINISTIC_MODE=GPU in [config.cmake](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/GPUTracking/Standalone/cmake/config.cmake) and use the command line argument `--PROCdeterministicGPUReconstruction 1`. +- For O2, Either add `set(GPUCA_DETERMINISTIC_MODE GPU)` to the beginning of the [GPU CMakeLists.txt](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/CMakeLists.txt) or add `set(GPUCA_DETERMINISTIC_MODE WHOLEO2)` to the beginning of the [Global CMakeLists.txt](https://github.com/AliceO2Group/AliceO2/blob/dev/CMakeLists.txt), and use the `configKeyValue` `GPU_proc.deterministicGPUReconstruction`. In order to enable this for the Full-System-Test or with [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/dpl-workflow.sh), please export `CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow=GPU_proc.deterministicGPUReconstruction=1;`. + +With these settings, if one runs multiple times, the number of clusters and number of tracks should be always fully identical. +Note that this yields a significant performance penalty during the processing, therefore the deterministic mode is not compiled in by default, but it must be enabled explicitly and code must be recompiled. + +Beyond comparing only the number of clusters and number of tracks, it is also possible to compare intermediate results. To do so, please use the standalone benchmark (either `./ca` or `o2-gpu-standalone-benchmark` binary) with the `--debug 6` option. +It will create a dump container all (most) intermediate results in text form, which can be compared. The output files is called `CPU.out` if using the CPU backend, and `GPU.out` for the GPU backend. +Note that the dump files will be huge and the processing will be slow and consume much more memory than normal with `--debug 6 . It has been tested with datasets containing up to 50 Pb-Pb collisions, and might fail for larger data. +The dump files (if the deterministic mode is used with both compile- and runtime-activation), the files should be 100% identical and can just be compared with `diff`. diff --git a/GPU/documentation/run-time-compilation.md b/GPU/documentation/run-time-compilation.md new file mode 100644 index 0000000000000..accfceb47b870 --- /dev/null +++ b/GPU/documentation/run-time-compilation.md @@ -0,0 +1,21 @@ +Run time compilation is a feature of the GPUReconstruction library, which can recompile the GPU code for HIP and for CUDA at runtime, and apply some optimizations and changes. It is planned to add support for CPU code and OpenCL code in the future. + +The changes that can be applied are: +- `constexpr` optimization: configuration values that are constant during the processing are replaced by `constexpr` expressions, which allows the compiler to optimize the code better. Benchmarks in 2024 habe shown 5% performance improvement with CUDA and 2% improvement with HIP. +- Disabling of unused code, in particular this is currently used to remove the TPC code for V/M shape correction during online processing, simplifying the code, and yielding better compiler optimization, for a 20%-30% speedup on the MI50 GPUs. +- Use different GPU constant parameters / launch bounds: These are tuning parameters, which are architecutre-dependent. The default values are taken from the first architecture the GPU code is compiled for in the normal compilation phase. If the architecture we are running on is different, different parameters can be loaded for RTC. +- Compiling for different target architectures. This allows us to enable running on hardware, for which the code was not compiled in the original compilation. + +Generally, RTC is enabled via the `--RTCenable` flag for the standalone benchmark, or via the `GPU_proc_rtc.enable=1` `configKeyValue` for O2. +For a list of RTC options, please see [GPUSettingsList.h](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/GPU/GPUTracking/Definitions/GPUSettingsList.h#L215). + +Caching the output: +- The RTC output can be cached and reused, so that when running multiple times, compilation is not repeated. This is enabled via the `--RTCcacheOutput` setting. The folder to store the cache files can be selected via `--RTCTECHcacheFolder` and with `--RTCTECHcacheMutex` (default: enabled), a file-lock mutex can be used to synchronize access to the cache folder. The cached code is checked against the to-be-compiled source code with SHA1 hashes, and only if the code is not change the cache is used, otherwise the code is recompiled and the cache updated. It is possible to force using outdated cache files via the `--RTCTECHignoreCacheValid` option. + +For chaning the launch bounds and other parameters, please consider `--RTCTECHloadLaunchBoundsFromFile` (and `--RTCTECHprintLaunchBounds`), which can launch a parameter set which can be created via [dumpGPUDefParam.C](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C). A set of default parameters is stored in `[INSTALL_FOLDER]/share/GPU`. + +It is possible to select a different target architecture for the compilation via `--RTCTECHoverrideArchitecture`, and the compilation can be prepended by a command with `--RTCTECHprependCommand`, e.g. for CPU pinning. See for example [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/80a80a17f5a1d9cb77743e2a39b15b653fe1a4f9/prodtests/full-system-test/dpl-workflow.sh#L335). + +`--RTCdeterministic` enables the [Deterministic Mode](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/deterministic-mode.md) (compile-time setting) for RTC. Usually you don't need to bother, as for the deterministic mode it is autoenabled from `--PROCdeterministicGPUReconstruction`, but the explicit `--RTCdeterministic` is available for tests. + +Finally, `--RTCoptConstexpr` and `--RTCoptSpecialCode` enable the constexpr and code removal optimizations. For an example how the TPC V/M shape corrections are removed, see [TPCFastTransform.h](https://github.com/AliceO2Group/AliceO2/blob/fc3ace17eca580c338751163ef4528e3ec47f9d6/GPU/TPCFastTransformation/TPCFastTransform.h#L445). diff --git a/prodtests/full-system-test/documentation/README.md b/prodtests/full-system-test/documentation/README.md new file mode 100644 index 0000000000000..1fdef1da36ecd --- /dev/null +++ b/prodtests/full-system-test/documentation/README.md @@ -0,0 +1,17 @@ +[full-system-test.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test.md) : +- Full system test quick start guide + +[full-system-test-setup.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-setup.md) : +- More detailed description of full-system-test scripts, simulation of data set, and script to run the workflow + +[full-system-test-as-stress-test.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md) : +- Details on how to use the full system test as stress test and for validation of an EPN online compute node + +[dpl-workflow-options.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/dpl-workflow-options.md) : +- Description of the main workflow script [dpl-workflow.sh](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/dpl-workflow.sh) and its options. + +[env-variables.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/env-variables.md) : +- List of common environment variables used by the workflow scripts (defaults set by https://github.com/davidrohr/O2DPG/blob/master/DATA/common/setenv.sh) + +[raw-tf-conversion.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-tf-conversion.md) : +- This is automated in a script now, but just in case details how readout files are converted to a .tf file for usage in the full system test with replay from DataDistribution. diff --git a/prodtests/full-system-test/documentation/env-variables.md b/prodtests/full-system-test/documentation/env-variables.md index b93622c0a0f94..5a13f2ee9e19d 100644 --- a/prodtests/full-system-test/documentation/env-variables.md +++ b/prodtests/full-system-test/documentation/env-variables.md @@ -1,4 +1,4 @@ -The `setenv-sh` script sets the following environment options +The [setenv-sh](https://github.com/davidrohr/O2DPG/blob/master/DATA/common/setenv.sh) script sets the following environment options * `NTIMEFRAMES`: Number of time frames to process. * `TFDELAY`: Delay in seconds between publishing time frames (1 / rate). * `NGPUS`: Number of GPUs to use, data distributed round-robin. @@ -25,7 +25,7 @@ The `setenv-sh` script sets the following environment options * `EXTINPUT`: Receive input from raw FMQ channel instead of running o2-raw-file-reader. * 0: `dpl-workflow.sh` can run as standalone benchmark, and will read the input itself. * 1: To be used in combination with either `datadistribution.sh` or `raw-reader.sh` or with another DataDistribution instance. -* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to EXTINPUT=1. The CTF ROOT file can be stored via SAVECTF=1. +* `CTFINPUT`: Read input from CTF ROOT file. This option is incompatible to `EXTINPUT=1`. The CTF ROOT file can be stored via `SAVECTF=1`. * `NHBPERTF`: Time frame length (in HBF) * `GLOBALDPLOPT`: Global DPL workflow options appended to o2-dpl-run. * `EPNPIPELINES`: Set default EPN pipeline multiplicities. diff --git a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md index 0c4637ece0920..c78d81b236c1c 100644 --- a/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md +++ b/prodtests/full-system-test/documentation/full-system-test-as-stress-test.md @@ -7,7 +7,7 @@ This is a quick summary how to run the full system test (FST) as stress test on - Enter the O2PDPSuite environment either vie `alienv enter O2PDPSuite/latest Readout/latest`. - Go to an empty directory. - Run the FST simulation via: `NEvents=650 NEventsQED=10000 SHMSIZE=128000000000 TPCTRACKERSCRATCHMEMORY=40000000000 SPLITTRDDIGI=0 GENERATE_ITSMFT_DICTIONARIES=1 $O2_ROOT/prodtests/full_system_test.sh` - - Get a current matbud.root (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) and place it in that folder. + - Material budget table (e.g. from here https://alice.its.cern.ch/jira/browse/O2-2288) now comes from CCDB, no need any more to pull it manually. - Create a timeframe file from the raw files: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. - Prepare the ramdisk folder: `mv raw/timeframe raw/timeframe-org; mkdir raw/timeframe-tmpfs; ln -s timeframe-tmpfs raw/timeframe` diff --git a/prodtests/full-system-test/documentation/full-system-test-setup.md b/prodtests/full-system-test/documentation/full-system-test-setup.md index 82ef9b7d0c74f..e90a3984dd3da 100644 --- a/prodtests/full-system-test/documentation/full-system-test-setup.md +++ b/prodtests/full-system-test/documentation/full-system-test-setup.md @@ -16,7 +16,7 @@ If you just want to test a small dataset, you can skip the following steps, and - I'd suggest to do a first small test with 1-5 events to check the machinery, 100 events is already a good size which should not exhaust the memory, I'd go to 600 only after 100 works. 1. Compile O2 with GPU support, in addition you need O2sim, DataDistribution, and Readout (latest versions from alidist will do). GPUs for O2 should be auto-detected, but you can set the environment variables ALIBUILD_ENABLE_CUDA / ALIBUILD_ENABLE_HIP to enforce it (and get a failure when detection fails). Look for CMake log messages "Building GPUTracking with CUDA support" (etc) to verify. - For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build.md + For more information, see https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/build-O2.md 1. Optionally place some binary configuration files in the simulation folder. Default objects will be used if no such files are placed. There are instructions at the end of this post how to generate these files. (Currently, these files are: matbud.root, ITSdictionary.bin, ctf_dictionary.root, tpctransform.root, dedxsplines.root, and tpcpadgaincalib.root) 1. Load the O2sim environment (`alienv enter O2sim/latest`) and run the following full system test script for a full simulation and digits to raw conversion (this will already include 1 CPU reconstruction run): ``` @@ -37,7 +37,7 @@ If you just want to test a small dataset, you can skip the following steps, and ``` This will use 4 GPU with the HIP backend and allocate 22 GB of scratch memory on the GPU (should be sufficient for 128 orbit TF). You can change the GPU type as indicated in the linked README.md above, e.g. `GPUTYPE=CUDA NGPUS=1` for 1 CUDA GPU. 1. With this, the full chain is running inside O2 DPL. Next we are adding DataDistribution. - 1. Ceate the TF files as explained in the subtask (https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-data-simulation.md). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. + 1. Ceate the TF files as explained in the subtask ([raw-tf-conversion.md](https://github.com/AliceO2Group/AliceO2/blob/dev/prodtests/full-system-test/documentation/raw-tf-conversion.md)). For convenience, there is a script that should do it automatically, from a shell that has loaded both DataDistribution and Readout: `$O2_ROOT/prodtests/full-system-test/convert-raw-to-tf-file.sh`. 1. Enter the O2 environment, and run the following script (please adjust the variables as in the test before). ``` EXTINPUT=1 SHMSIZE=128000000000 GPUTYPE=CPU $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh diff --git a/prodtests/full-system-test/documentation/raw-data-simulation.md b/prodtests/full-system-test/documentation/raw-tf-conversion.md similarity index 100% rename from prodtests/full-system-test/documentation/raw-data-simulation.md rename to prodtests/full-system-test/documentation/raw-tf-conversion.md From 175d0147d5ecd5dcf776a774f8da28a6e893d509 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 25 Apr 2025 13:19:21 +0200 Subject: [PATCH 0458/1914] GPU: Add documentation for TPC Tracking QA --- GPU/documentation/README.md | 3 ++ GPU/documentation/tpc-tracking-qa.md | 78 ++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 GPU/documentation/tpc-tracking-qa.md diff --git a/GPU/documentation/README.md b/GPU/documentation/README.md index de888ab6e2436..0f6ba79df63ce 100644 --- a/GPU/documentation/README.md +++ b/GPU/documentation/README.md @@ -11,3 +11,6 @@ [run-time-compilation.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/run-time-compilation.md) : - Instructions how to use run time compilation (RTC) for the GPU code. + +[tpc-tracking-qa.md](https://github.com/AliceO2Group/AliceO2/blob/dev/GPU/documentation/tpc-tracking-qa.md) : +- Instructions how to run the QA for TPC tracking (efficiency, resolution, etc.). diff --git a/GPU/documentation/tpc-tracking-qa.md b/GPU/documentation/tpc-tracking-qa.md new file mode 100644 index 0000000000000..4487d10d0fe70 --- /dev/null +++ b/GPU/documentation/tpc-tracking-qa.md @@ -0,0 +1,78 @@ +This is a quick documentation on the TPC Tracking QA for Resolution, Efficiency, and Cluster Attachment based on MC data. + +The TPC QA can produce 3 different output types: +* *mergeble* histograms: A collection of ROOT histograms that can be merged from different inputs. The need to be postprocessed to get meaningful output. +* *postprocessed* histograms: Histograms showing the efficiencies, resolutions etc. These histograms can no longer be merged from multiple inputs. +* *layouts*: TCanvases with multiple postprocessed histograms arranged in reasonable layouts. + +The TPC Tracking QA consists of multiple QA subtasks ((de)activated via a bitmask): +* *Efficiency* / *Clone Rate* / *Fake Rate* (1) +* *Resolution* (2) +* *Pulls* (4) +* *Cluster Attachment Statistics* (8) +* *nClusters and pt distribution* (16) +* *Cluster rejection counts* (32) (both as aggregate text report and as histogram) + +The TPC QA can run in 3 different ways: +* *Standalone* inside the tracking (o2-tpc-reco-workflow), it will write its output in pdf format to the plots folder in the current directory: supports *all subtasks* and will always produce the *layouts* output. +* As *external source* to QC: the QA is running inside the o2-tpc-reco-workflow and ships the histograms to QC via DPL: supports *all subtasks*, can produce the *postprocessed* or the *layouts* output. +* As *independent* QC tasks, operating on DPL input, that can e.g. be read from ROOT files: supports *subtasks* *1*, *2*, and *4*, currently is hardcoded to the *mergeable* output, but could be made configurable. +_(Note: the reason that the independent QC supports fewer subtasks is that the other tasks require internal tracking data structures that are no available a posteriori.)_ + +Remark on the *Cluster Rejection count histograms* for the *online QC*: +* These are mainly meant for monitoring the TPC compression during data taking without MC information, while most other subtasks rely on MC information. +* These are always in the mergeable format, the postprocessing will just forward them. +* By default they are disabled and only aggregate text output shows the rejection ratios, they must be enabled explicitly as explained below. + +Running the TPC QA standalone: +* It must be enabled via the configKeyValue GPU_proc.runQA in the o2-tpc-reco-workflow. The QA will run as part of the normal TPC tracking and will have access to all data structures of the tracking. Otherwise, the normal settings for the o2-tpc-reco-workflow apply. +* Example to run on digits: +{code}o2-tpc-reco-workflow -b --infile tpcdigits.root --configKeyValues "GPU_proc.runQA=1;" --output-type clusters,tracks{code} +* Example to run on clusters: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks --configKeyValues "GPU_proc.runQA=1"{code} + +Running the TPC QA inside the o2-tpc-reco-workflow as external source for qc: +* As in the standalone mode, the TPC Tracking QA will run as part of the o2-tpc-reco-workflow with full access to the tracking data structures. +* The output is shipped to QC as external qc-input in the form of ROOT histograms, which disables some subtasks that do not produce ROOT histograms (such as the cluster counts (32)). +* A merged workflow of the o2-tpc-reco-workflow and qc must be configures, such as: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +(Note that by default, the output will be uploaded and visible at https://qcg-test.cern.ch/) +* By default, this will create postprocessed histograms, and it can be switched via configKeyValues to layouts output via: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa --configKeyValues "GPU_QA.shipToQCAsCanvas=true" | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +* This mode can be combined with the standalone QA mode. +* Running the *online cluster rejection histogram QA*: +** This mode works also without MC information, it must be enabled explicitly. Irrespective of the output mode, it will always furnish the same mergeable histograms, since there is no postprocessing. An example to run them on ROOT files: +{code}o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type tracks,qa --disable-mc --configKeyValues "GPU_QA.clusterRejectionHistograms=1" | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTrackingFromExternal_direct.json{code} +** In order to set the x-axis scale (number of clusters), use the configKeyValue GPU_QA.histMaxNClusters. + +Running the QA as independent QC task: +* In this mode, the QA runs independently from the o2-tpc-reco-workflow, getting clusters, tracks, and MC labels via DPL. +* The example below uses the track reader and the reco workflow to fetch the input and ship them via DPL, but the inputs can of course also come from other sources. +* This mode is mostly foreseen for the mergeble output. In this way, many instance can run in parallel and the output can be merged before being postprocessed. +* Currently, the mergeble output is hardcoded, but the task could easily be extended for other outputs (see below). +* To run the tasks on ROOT file input, you can use the following example: +{code}o2-tpc-track-reader | o2-tpc-reco-workflow --input-type clusters --infile tpc-native-clusters.root --output-type disable-writer | o2-qc --config json:/${QUALITYCONTROL_ROOT}/etc/tpcQCTracking_direct.json{code} + +Postprocessing the mergeble output: +* The o2::tpc::qc::Tracking class can be used for the postprocessing in a standalone way. +* Initialize the class with the settings postprocessOnly = true and outputMode = outputPostprocessed or outputLayout. +* Call the postprocess(...) function, passing in std::vectors of the 3 types of ROOT histograms (TH1F, TH2F, TH1D) which are used by the QA. Note that the order of the histograms must be the same as obtained in the mergeble output. +* Depending on the outputMode setting, the class will fill the out object witl either the postprocessed histograms or the canvas layouts. + +The following classes in O2 / QC belong to the TPC tracking QA: +* o2::gpu::GPUQA (O2/GPU/GPUTracking/Standalone/qa/GPUQA.cxx): The main QA class, which can produce the standalone output, or can run with external input driven from the o2::gpu::GPUO2InterfaceQA. +* o2::gpu::GPUO2InterfaceQA (O2/GPU/GPUTracking/Interface/GPUO2InterfaceQA.cxx): Internal interface class, uses o2::gpu::GPUQA. +* o2::tpc::qc::Tracking (O2/Detectors/TPC/qc/src/Tracking.cxx): The main QC class for running the TPC QA independent from the o2-tpc-reco-workflow, uses o2::gpu::GPUO2InterfaceQA. +** Can produce all output types from tracks / clusters as input. +** Can produce postprocessed / layout output from mergeble input. +** Is limited to subtasks that do not require access to the internal tracking data structures. +* o2::quality_control_modules::tpc::Tracking (QC/Modules/TPC/src/Tracking.cxx): Mostly a QC wrapper for o2::tpc::qc::Tracking, with the necessary framework code to receive clusters / tracks / MC labels via DPL. + +Several additional settings can be configured via configKeyValues as listed in https://github.com/AliceO2Group/AliceO2/blob/dev/Detectors/TPC/qc/include/TPCQC/Tracking.h#L44: +* "GPU_QA.strict=[bool]" Strict QA mode: Only consider resolution of tracks where the fit ended within 5 cm of the reference, and remove outliers. (Default: true) +* "GPU_QA.qpt=[float]" Set cut for Q/Pt. (Default: 10.0) +* "GPU_QA.recThreshold=[float]" Compute the efficiency including impure tracks with fake contamination. (Default 0.9) +* "GPU_QA.maxResX=[float]" Maxmimum X (~radius) for reconstructed track position to take into accound for resolution QA in cm (Default: no limit) +* "GPU_QA.nativeFitResolutions=[bool]" Create resolution histograms in the native fit units (sin(phi), tan(lambda), Q/Pt) (Default: false) +* "GPU_QA.filterCharge=[int]" Filter for positive (+1) or negative (-1) charge (Default: no filter) +* "GPU_QA.filterPID=[int]" Filter for Particle Type (0 Electron, 1 Muon, 2 Pion, 3 Kaon, 4 Proton) (Default: no filter) From 3eadf367abe68bd24ac39c6eb3ea1470dd596663 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 29 Apr 2025 10:52:37 +0200 Subject: [PATCH 0459/1914] dpl-workflow.sh: Add GEN_TOPO_NO_TF_RATE_UPSCALING setting --- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 2 +- prodtests/full-system-test/dpl-workflow.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index d2d06df7b5710..db7a3b5884a12 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -117,7 +117,7 @@ class GPUTPCGMPropagator GPUd() float PredictChi2(float posY, float posZ, float err2Y, float err2Z) const; GPUd() int32_t RejectCluster(float chiY, float chiZ, uint8_t clusterState) { - if (chiY > 9.f || chiZ > 9.f) { + if (chiY > 9.f || chiZ > 9.f) { // TODO: Check how a track can have chi2/ncl > 18 return 2; } if ((chiY > 6.25f || chiZ > 6.25f) && (clusterState & (GPUTPCGMMergedTrackHit::flagSplit | GPUTPCGMMergedTrackHit::flagShared))) { diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index bb2712bedd92e..ebe6410d2c804 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -51,7 +51,11 @@ if [[ $EPNSYNCMODE == 1 ]] || type numactl >/dev/null 2>&1 && [[ `numactl -H | g [[ $NUMAGPUIDS != 0 ]] && ARGS_ALL+=" --child-driver 'numactl --membind $NUMAID --cpunodebind $NUMAID'" fi if [[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && [[ $DIGITINPUT != 1 ]]; then - RECO_NUM_NODES_WORKFLOW_CMP=$(($RECO_NUM_NODES_WORKFLOW > 15 ? ($RECO_NUM_NODES_WORKFLOW < 230 ? $RECO_NUM_NODES_WORKFLOW : 230) : 15)) + if [[ ${GEN_TOPO_NO_TF_RATE_UPSCALING:-0} == 1 ]]; then + RECO_NUM_NODES_WORKFLOW_CMP=$RECO_NUM_NODES_WORKFLOW + else + RECO_NUM_NODES_WORKFLOW_CMP=$(($RECO_NUM_NODES_WORKFLOW > 15 ? ($RECO_NUM_NODES_WORKFLOW < 230 ? $RECO_NUM_NODES_WORKFLOW : 230) : 15)) + fi TIMEFRAME_RATE_LIMIT=$((12 * 230 / ${RECO_NUM_NODES_WORKFLOW_CMP} * ($NUMAGPUIDS != 0 ? 1 : 2) * 128 / $NHBPERTF)) [[ $BEAMTYPE != "PbPb" && ${HIGH_RATE_PP:-0} == 0 ]] && TIMEFRAME_RATE_LIMIT=$(($TIMEFRAME_RATE_LIMIT * 3)) ! has_detector TPC && TIMEFRAME_RATE_LIMIT=$(($TIMEFRAME_RATE_LIMIT * 4)) From 2482a563ec50e43d51e1edba40dbfdf89ba36e21 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 30 Apr 2025 16:09:20 +0200 Subject: [PATCH 0460/1914] dpl-workflow.sh: make some MCH config key values fixed default --- prodtests/full-system-test/dpl-workflow.sh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index ebe6410d2c804..b51594115154c 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -355,16 +355,14 @@ has_detector_reco MID && has_detector_matching MCHMID && MFTMCHConf="FwdMatching if has_processing_step MUON_SYNC_RECO; then [[ -z ${ARGS_EXTRA_PROCESS_o2_mid_reco_workflow:-} ]] && ARGS_EXTRA_PROCESS_o2_mid_reco_workflow="--mid-tracker-keep-best" [[ -z ${ARGS_EXTRA_PROCESS_o2_mch_reco_workflow:-} ]] && ARGS_EXTRA_PROCESS_o2_mch_reco_workflow="--digits" - if [[ -z ${CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow:-} ]]; then - if [[ $IS_SIMULATED_DATA == 1 ]]; then - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow="MCHTimeClusterizer.peakSearchSignalOnly=false;MCHDigitFilter.rejectBackground=false;" - elif [[ $RUNTYPE == "PHYSICS" && $BEAMTYPE == "pp" ]] || [[ $RUNTYPE == "COSMICS" ]]; then - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow="MCHTracking.chamberResolutionX=0.4;MCHTracking.chamberResolutionY=0.4;MCHTracking.sigmaCutForTracking=7.;MCHTracking.sigmaCutForImprovement=6.;" - fi - has_detector_reco ITS && [[ $RUNTYPE != "COSMICS" ]] && CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHTimeClusterizer.irFramesOnly=true;" - [[ ! -z ${CUT_RANDOM_FRACTION_MCH:-} ]] && CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHTimeClusterizer.rofRejectionFraction=$CUT_RANDOM_FRACTION_MCH;" - CONFIG_EXTRA_PROCESS_o2_mch_reco_workflow+="MCHStatusMap.useHV=false;MCHDigitFilter.statusMask=3;" + if [[ $IS_SIMULATED_DATA == 1 ]]; then + MCH_CONFIG_KEY+="MCHTimeClusterizer.peakSearchSignalOnly=false;MCHDigitFilter.rejectBackground=false;" + elif [[ $RUNTYPE == "PHYSICS" && $BEAMTYPE == "pp" ]] || [[ $RUNTYPE == "COSMICS" ]]; then + MCH_CONFIG_KEY+="MCHTracking.chamberResolutionX=0.4;MCHTracking.chamberResolutionY=0.4;MCHTracking.sigmaCutForTracking=7.;MCHTracking.sigmaCutForImprovement=6.;" fi + has_detector_reco ITS && [[ $RUNTYPE != "COSMICS" ]] && MCH_CONFIG_KEY+="MCHTimeClusterizer.irFramesOnly=true;" + [[ ! -z ${CUT_RANDOM_FRACTION_MCH:-} ]] && MCH_CONFIG_KEY+="MCHTimeClusterizer.rofRejectionFraction=$CUT_RANDOM_FRACTION_MCH;" + MCH_CONFIG_KEY+="MCHStatusMap.useHV=false;MCHDigitFilter.statusMask=3;" [[ $RUNTYPE == "COSMICS" ]] && [[ -z ${CONFIG_EXTRA_PROCESS_o2_mft_reco_workflow:-} ]] && CONFIG_EXTRA_PROCESS_o2_mft_reco_workflow="MFTTracking.FullClusterScan=true" fi [[ $SYNCRAWMODE == 1 ]] && [[ -z ${CONFIG_EXTRA_PROCESS_o2_zdc_digits_reco:-} ]] && CONFIG_EXTRA_PROCESS_o2_zdc_digits_reco='RecoParamZDC.tdc_calib[9]=1;RecoParamZDC.tdc_calib[0]=1;RecoParamZDC.tdc_calib[8]=1;RecoParamZDC.tdc_calib[1]=1;RecoParamZDC.tdc_calib[3]=1;RecoParamZDC.tdc_calib[6]=1;RecoParamZDC.tdc_calib[5]=1;RecoParamZDC.tdc_calib[4]=1;RecoParamZDC.tdc_calib[2]=1;RecoParamZDC.tdc_calib[7]=1;RecoParamZDC.energy_calib[13]=1;RecoParamZDC.energy_calib[12]=1;RecoParamZDC.energy_calib[11]=1;RecoParamZDC.energy_calib[6]=1;RecoParamZDC.energy_calib[25]=1;RecoParamZDC.energy_calib[14]=1;RecoParamZDC.energy_calib[20]=1;RecoParamZDC.energy_calib[5]=1;RecoParamZDC.energy_calib[0]=1;RecoParamZDC.energy_calib[19]=1;RecoParamZDC.tower_calib[1]=1;RecoParamZDC.tower_calib[2]=1;RecoParamZDC.tower_calib[3]=1;RecoParamZDC.tower_calib[4]=1;RecoParamZDC.tower_calib[24]=1;RecoParamZDC.tower_calib[21]=1;RecoParamZDC.tower_calib[22]=1;RecoParamZDC.tower_calib[23]=1;RecoParamZDC.tower_calib[18]=1;RecoParamZDC.tower_calib[16]=1;RecoParamZDC.tower_calib[17]=1;RecoParamZDC.tower_calib[15]=1;RecoParamZDC.tower_calib[8]=1;RecoParamZDC.tower_calib[9]=1;RecoParamZDC.tower_calib[7]=1;RecoParamZDC.tower_calib[10]=1' From 82782fd2272801c9b0a961a1b2204c949091dd69 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 2 May 2025 11:26:58 +0200 Subject: [PATCH 0461/1914] DPL Analysis: fix case in which booleans are stored in more than one chunk (#14230) --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 4481c06a8c6d9..90b862e5fc8d1 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -209,7 +209,7 @@ auto readBoolValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) int readLast = 0; while (readEntries < op.rootBranchEntries) { auto beginValue = readLast; - auto readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); + readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); int size = readLast * op.listSize; readEntries += readLast; for (int i = beginValue; i < beginValue + size; ++i) { From 77beb78fb1f066153157ee06738e1405544d381d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 2 May 2025 12:56:31 +0200 Subject: [PATCH 0462/1914] DPL Analysis: modernize expression parsing code (#14185) --- .../Core/include/Framework/Expressions.h | 31 +++--- Framework/Core/src/Expressions.cxx | 100 ++++++++---------- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index af89e56f85835..9e00388ee5df8 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -69,6 +69,7 @@ struct ExpressionInfo { namespace o2::framework::expressions { +void unknownParameterUsed(const char* name); const char* stringType(atype::type t); template @@ -147,7 +148,7 @@ struct PlaceholderNode : LiteralNode { if constexpr (variant_trait_v::type> != VariantType::Unknown) { retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; } else { - runtime_error("Unknown parameter used in expression."); + unknownParameterUsed(name.c_str()); } } @@ -188,6 +189,19 @@ struct ParameterNode : LiteralNode { struct ConditionalNode { }; +/// concepts +template +concept is_literal_like = std::same_as || std::same_as || std::same_as; + +template +concept is_binding = std::same_as; + +template +concept is_operation = std::same_as; + +template +concept is_conditional = std::same_as; + /// A generic tree node struct Node { Node(LiteralNode&& v) : self{std::forward(v)}, left{nullptr}, right{nullptr}, condition{nullptr} @@ -267,7 +281,7 @@ struct NodeRecord { /// Tree-walker helper template -void walk(Node* head, L const& pred) +void walk(Node* head, L&& pred) { std::stack path; path.emplace(head, 0); @@ -512,16 +526,15 @@ inline Node binned(std::vector const& binning, std::vector const& paramete } template -Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) +inline Node updateParameters(Node const& pexp, int bins, std::vector const& parameters, int bin) { Node result{pexp}; - auto updateParameter = [&bins, ¶meters, &bin](Node* node) { + walk(&result, [&bins, ¶meters, &bin](Node* node) { if (node->self.index() == 5) { auto* n = std::get_if<5>(&node->self); n->reset(parameters[n->index * bins + bin]); } - }; - walk(&result, updateParameter); + }); return result; } @@ -594,12 +607,6 @@ gandiva::ExpressionPtr makeExpression(gandiva::NodePtr node, gandiva::FieldPtr r /// Update placeholder nodes from context void updatePlaceholders(Filter& filter, InitContext& context); -template -std::vector makeProjectors(framework::pack) -{ - return {C::Projector()...}; -} - std::shared_ptr createProjectorHelper(size_t nColumns, expressions::Projector* projectors, std::shared_ptr schema, std::vector> const& fields); diff --git a/Framework/Core/src/Expressions.cxx b/Framework/Core/src/Expressions.cxx index 6f646515b7837..94649f8639a0a 100644 --- a/Framework/Core/src/Expressions.cxx +++ b/Framework/Core/src/Expressions.cxx @@ -24,6 +24,10 @@ using namespace o2::framework; namespace o2::framework::expressions { +void unknownParameterUsed(const char* name) +{ + runtime_error_f("Unknown parameter used in expression: %s", name); +} /// a map between BasicOp and gandiva node definitions /// note that logical 'and' and 'or' are created separately @@ -89,43 +93,41 @@ size_t Filter::designateSubtrees(Node* node, size_t index) return index; } -namespace +template +constexpr inline auto makeDatum(T const&) { -struct LiteralNodeHelper { - DatumSpec operator()(LiteralNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; + return DatumSpec{}; +} -struct BindingNodeHelper { - DatumSpec operator()(BindingNode const& node) const - { - return DatumSpec{node.name, node.hash, node.type}; - } -}; +template +constexpr inline auto makeDatum(T const& node) +{ + return DatumSpec{node.value, node.type}; +} -struct OpNodeHelper { - ColumnOperationSpec operator()(OpNode const& node) const - { - return ColumnOperationSpec{node.op}; - } -}; +template +constexpr inline auto makeDatum(T const& node) +{ + return DatumSpec{node.name, node.hash, node.type}; +} -struct PlaceholderNodeHelper { - DatumSpec operator()(PlaceholderNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; +template +constexpr inline auto makeOp(T const&, size_t const&) +{ + return ColumnOperationSpec{}; +} -struct ParameterNodeHelper { - DatumSpec operator()(ParameterNode const& node) const - { - return DatumSpec{node.value, node.type}; - } -}; -} // namespace +template +constexpr inline auto makeOp(T const& node, size_t const& index) +{ + return ColumnOperationSpec{node.op, index}; +} + +template +constexpr inline auto makeOp(T const&, size_t const& index) +{ + return ColumnOperationSpec{BasicOp::Conditional, index}; +} std::shared_ptr concreteArrowType(atype::type type) { @@ -169,7 +171,7 @@ std::string upcastTo(atype::type f) case atype::DOUBLE: return "castFLOAT8"; default: - throw runtime_error_f("Do not know how to cast to %d", f); + throw runtime_error_f("Do not know how to cast to %s", stringType(f)); } } @@ -196,13 +198,11 @@ std::ostream& operator<<(std::ostream& os, DatumSpec const& spec) void updatePlaceholders(Filter& filter, InitContext& context) { - auto updateNode = [&](Node* node) { + expressions::walk(filter.node.get(), [&](Node* node) { if (node->self.index() == 3) { std::get_if<3>(&node->self)->reset(context); } - }; - - expressions::walk(filter.node.get(), updateNode); + }); } const char* stringType(atype::type t) @@ -246,12 +246,7 @@ Operations createOperations(Filter const& expression) auto processLeaf = [](Node const* const node) { return std::visit( - overloaded{ - [lh = LiteralNodeHelper{}](LiteralNode const& node) { return lh(node); }, - [bh = BindingNodeHelper{}](BindingNode const& node) { return bh(node); }, - [ph = PlaceholderNodeHelper{}](PlaceholderNode const& node) { return ph(node); }, - [pr = ParameterNodeHelper{}](ParameterNode const& node) { return pr(node); }, - [](auto&&) { return DatumSpec{}; }}, + [](auto const& n) { return makeDatum(n); }, node->self); }; @@ -266,10 +261,7 @@ Operations createOperations(Filter const& expression) // create operation spec, pop the node and add its children auto operationSpec = std::visit( - overloaded{ - [&](OpNode node) { return ColumnOperationSpec{node.op, top.node_ptr->index}; }, - [&](ConditionalNode) { return ColumnOperationSpec{BasicOp::Conditional, top.node_ptr->index}; }, - [](auto&&) { return ColumnOperationSpec{}; }}, + [&](auto const& n) { return makeOp(n, top.node_ptr->index); }, top.node_ptr->self); operationSpec.result = DatumSpec{top.index, operationSpec.type}; @@ -623,15 +615,15 @@ gandiva::NodePtr createExpressionTree(Operations const& opSpecs, auto rightNode = datumNode(it->right); auto condNode = datumNode(it->condition); - auto insertUpcastNode = [&](gandiva::NodePtr node, atype::type t) { - if (t != it->type) { - auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(it->type), {node}, concreteArrowType(it->type)); + auto insertUpcastNode = [](gandiva::NodePtr node, atype::type t0, atype::type t) { + if (t != t0) { + auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t0), {node}, concreteArrowType(t0)); node = upcast; } return node; }; - auto insertEqualizeUpcastNode = [&](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type t1, atype::type t2) { + auto insertEqualizeUpcastNode = [](gandiva::NodePtr& node1, gandiva::NodePtr& node2, atype::type t1, atype::type t2) { if (t2 > t1) { auto upcast = gandiva::TreeExprBuilder::MakeFunction(upcastTo(t2), {node1}, concreteArrowType(t2)); node1 = upcast; @@ -656,14 +648,14 @@ gandiva::NodePtr createExpressionTree(Operations const& opSpecs, default: if (it->op < BasicOp::Sqrt) { if (it->type != atype::BOOL) { - leftNode = insertUpcastNode(leftNode, it->left.type); - rightNode = insertUpcastNode(rightNode, it->right.type); + leftNode = insertUpcastNode(leftNode, it->type, it->left.type); + rightNode = insertUpcastNode(rightNode, it->type, it->right.type); } else if (it->op == BasicOp::Equal || it->op == BasicOp::NotEqual) { insertEqualizeUpcastNode(leftNode, rightNode, it->left.type, it->right.type); } temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode, rightNode}, concreteArrowType(it->type)); } else { - leftNode = insertUpcastNode(leftNode, it->left.type); + leftNode = insertUpcastNode(leftNode, it->type, it->left.type); temp_node = gandiva::TreeExprBuilder::MakeFunction(basicOperationsMap[it->op], {leftNode}, concreteArrowType(it->type)); } break; From e3fdb85e058e0112369e163260c6ca170e37365b Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 2 May 2025 13:41:58 +0200 Subject: [PATCH 0463/1914] DPL: fix reading of booleans from branches with more than 2 baskets. (#14231) --- Framework/AnalysisSupport/src/TTreePlugin.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/AnalysisSupport/src/TTreePlugin.cxx b/Framework/AnalysisSupport/src/TTreePlugin.cxx index 90b862e5fc8d1..28fd713112c94 100644 --- a/Framework/AnalysisSupport/src/TTreePlugin.cxx +++ b/Framework/AnalysisSupport/src/TTreePlugin.cxx @@ -208,7 +208,7 @@ auto readBoolValues = [](uint8_t* target, ReadOps& op, TBufferFile& rootBuffer) memset(target, 0, op.targetBuffer->size()); int readLast = 0; while (readEntries < op.rootBranchEntries) { - auto beginValue = readLast; + auto beginValue = readEntries; readLast = op.branch->GetBulkRead().GetBulkEntries(readEntries, rootBuffer); int size = readLast * op.listSize; readEntries += readLast; From 0938b3554fdce42d98b681bf173c9484b6b8784e Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 5 May 2025 15:16:58 +0200 Subject: [PATCH 0464/1914] GPU Standalone: Build OrtInterface only when ONNX is available --- GPU/GPUTracking/Standalone/CMakeLists.txt | 2 +- GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Standalone/CMakeLists.txt b/GPU/GPUTracking/Standalone/CMakeLists.txt index c112be6abac11..8ce95c8e96d3a 100644 --- a/GPU/GPUTracking/Standalone/CMakeLists.txt +++ b/GPU/GPUTracking/Standalone/CMakeLists.txt @@ -139,7 +139,6 @@ endif() add_subdirectory(../../ GPU) add_library(standalone_support SHARED ${O2_DIR}/Common/Field/src/MagFieldFast.cxx - ${O2_DIR}/Common/ML/src/OrtInterface.cxx ${O2_DIR}/Common/Utils/src/StringUtils.cxx ${O2_DIR}/DataFormats/Detectors/TPC/src/CompressedClusters.cxx ${O2_DIR}/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -228,6 +227,7 @@ endif() if(GPUCA_CONFIG_ONNX) target_link_libraries(standalone_support PRIVATE onnxruntime::onnxruntime) + target_sources(standalone_support PRIVATE ${O2_DIR}/Common/ML/src/OrtInterface.cxx) endif() if (GPUCA_BUILD_DEBUG_SANITIZE AND CMAKE_CXX_COMPILER MATCHES "clang\\+\\+") diff --git a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C index f82c537956ead..f6866bb80da05 100644 --- a/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C +++ b/GPU/GPUTracking/Standalone/tools/dumpGPUDefParam.C @@ -13,7 +13,7 @@ /// \author David Rohr // Run e.g. as (replacing [FILE] and [OUTPUT]: -// echo -e '#define PARAMETER_FILE "[FILE]]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b +// echo -e '#define PARAMETER_FILE "[FILE]"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("[OUTPUT]")\n.q\n' | root -l -b // To dump the defaults for AMPERE architecture, run // echo -e '#define GPUCA_GPUTYPE_AMPERE\n#define PARAMETER_FILE "GPUDefParametersDefaults.h"\ngInterpreter->AddIncludePath("'`pwd`'/include/GPU");\n.x share/GPU/tools/dumpGPUDefParam.C("default_AMPERE.par")\n.q\n' | root -l -b From e786dc0644d91601d319b6a2bc25aed40eb12769 Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Tue, 6 May 2025 03:03:10 +0300 Subject: [PATCH 0465/1914] [FOCAL-55] Open the detector in the middle in x (#14232) * Open the detector in the middle in x * Formatting --- .../FOCAL/base/include/FOCALBase/Geometry.h | 6 ++ Detectors/FOCAL/base/src/Geometry.cxx | 14 +++- .../geometryFiles/geometry_Spaghetti.txt | 4 ++ Detectors/FOCAL/simulation/src/Detector.cxx | 72 +++++++++++++++---- 4 files changed, 80 insertions(+), 16 deletions(-) diff --git a/Detectors/FOCAL/base/include/FOCALBase/Geometry.h b/Detectors/FOCAL/base/include/FOCALBase/Geometry.h index 4938ebb1925dd..770c0aa3c1cf5 100644 --- a/Detectors/FOCAL/base/include/FOCALBase/Geometry.h +++ b/Detectors/FOCAL/base/include/FOCALBase/Geometry.h @@ -135,6 +135,9 @@ class Geometry bool getInsertFrontPadLayers() const { return mInsertFrontPadLayers; } bool getInsertHCalReadoutMaterial() const { return mInsertFrontHCalReadoutMaterial; } + float getDetectorOpeningRight() const { return mGlobal_DetectorOpening_Right; } + float getDetectorOpeningLeft() const { return mGlobal_DetectorOpening_Left; } + std::vector getFOCALMicroModule(int layer) const; const Composition* getComposition(int layer, int stack) const; std::string_view getTowerGapMaterial() const { return mGlobal_Gap_Material; } @@ -175,6 +178,9 @@ class Geometry float mWaferSizeX = 0.0; // Wafer X size float mWaferSizeY = 0.0; // Wafer Y size + float mGlobal_DetectorOpening_Right = 0.0; // detector opening in X + float mGlobal_DetectorOpening_Left = 0.0; // detector opening in Y + // PIX setup float mGlobal_Pixel_Size = 0.0; // pixel size float mGlobal_PIX_SizeX = 0.0; // sensor size X diff --git a/Detectors/FOCAL/base/src/Geometry.cxx b/Detectors/FOCAL/base/src/Geometry.cxx index 94d8c2cee049c..db47816ba8537 100644 --- a/Detectors/FOCAL/base/src/Geometry.cxx +++ b/Detectors/FOCAL/base/src/Geometry.cxx @@ -351,6 +351,16 @@ void Geometry::setParameters(std::string geometryfile) LOG(debug) << "Z-Location of the FoCAL is set to : " << mGlobal_FOCAL_Z0; } + if (command.find("DetectorOpen_Right") != std::string::npos) { + mGlobal_DetectorOpening_Right = std::stof(tokens[1]); + LOG(debug) << "Detector opening on the right : " << mGlobal_DetectorOpening_Right; + } + + if (command.find("DetectorOpen_Left") != std::string::npos) { + mGlobal_DetectorOpening_Left = std::stof(tokens[1]); + LOG(debug) << "Detector opening on the left : " << mGlobal_DetectorOpening_Left; + } + if (command.find("HCAL_TOWER_SIZE") != std::string::npos) { mGlobal_HCAL_Tower_Size = std::stof(tokens[1]); LOG(debug) << "The size of the HCAL readout tower will be : " << mGlobal_HCAL_Tower_Size; @@ -578,8 +588,8 @@ void Geometry::setParameters(std::string geometryfile) } } } // end for itowerY - } // end for itowerX - } // end else + } // end for itowerX + } // end else center_z += tmpComp.getThickness(); } // end loop over pad layer compositions LOG(debug) << "============ Created all pad layer compositions (" << mPadCompositionBase.size() << " volumes)"; diff --git a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt index 5209204cc5eca..31f6940224337 100644 --- a/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt +++ b/Detectors/FOCAL/simulation/geometryFiles/geometry_Spaghetti.txt @@ -59,6 +59,10 @@ COMMAND_INSERT_PIX_AT_L9 GLOBAL_TOWER_TOLX 0.02 Air GLOBAL_TOWER_TOLY 0.8 Al GLOBAL_FOCAL_Z 763.5 +# Open the detector on the right and left in cm, +# can only work if the GLOBAL_HCAL_TOWER_NY is odd number and GLOBAL_HCAL_TOWER_NX is even number +# GLOBAL_DetectorOpen_Right 5 +# GLOBAL_DetectorOpen_Left 5 GLOBAL_Tower_NX 2 GLOBAL_Tower_NY 11 GLOBAL_MIDDLE_TOWER_OFFSET 5 diff --git a/Detectors/FOCAL/simulation/src/Detector.cxx b/Detectors/FOCAL/simulation/src/Detector.cxx index 08df253d49f83..dc71c1066afdf 100644 --- a/Detectors/FOCAL/simulation/src/Detector.cxx +++ b/Detectors/FOCAL/simulation/src/Detector.cxx @@ -539,6 +539,8 @@ void Detector::CreateHCALSpaghetti() } } + bool splitDet = mGeometry->getDetectorOpeningRight() > 0.0 || mGeometry->getDetectorOpeningLeft() > 0.0; + double TowerSize = mGeometry->getHCALTowerSize(); double CuBoxThickness = 0.3; // Thickness of the Cu box carrying capillary tubes @@ -598,25 +600,57 @@ void Detector::CreateHCALSpaghetti() Columns = 0; RowPos = 0.; Int_t NumTowers = 1; - for (Rows = 0; Rows < nTowersY; Rows++) { - float ColumnPos = 0.; - RowPos = Rows * TowerSize; - for (Columns = 0; Columns < nTowersX; Columns++) { - ColumnPos = Columns * TowerSize; - TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); + if (splitDet) { + SizeXHCAL = SizeXHCAL / 2; - // Remove the Towers that overlaps with the beam pipe - Double_t RadialDistance = TMath::Power(trans->GetTranslation()[0], 2) + TMath::Power(trans->GetTranslation()[1], 2); + TGeoVolumeAssembly* volHalfHCAL = new TGeoVolumeAssembly("HalfHCAL"); - if (RadialDistance < MinRadius * MinRadius || TMath::Abs(trans->GetTranslation()[0]) > SizeXHCAL / 2) { - continue; + for (Rows = 0; Rows < nTowersY; Rows++) { + + float ColumnPos = 0.; + RowPos = Rows * TowerSize; + for (Columns = 0; Columns < nTowersX / 2; Columns++) { + ColumnPos = Columns * TowerSize; + TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); + + // Shit the beampipe towers by TowerSize/2 + if (Rows == nTowersY / 2) { + trans->SetDx(trans->GetTranslation()[0] + TowerSize / 2); + } + + // Adding the Tower to the HCAL + volHalfHCAL->AddNode(volTowerHCAL, NumTowers, trans); + + NumTowers++; } + volHCAL->AddNode(volHalfHCAL, 1, new TGeoTranslation(SizeXHCAL / 2 + mGeometry->getDetectorOpeningRight(), 0, 0)); + TGeoRotation* rotFlipZ = new TGeoRotation(); + rotFlipZ->RotateY(180); // Flip around Y to reverse Z + TGeoCombiTrans* combHalf = new TGeoCombiTrans(-SizeXHCAL / 2 - mGeometry->getDetectorOpeningLeft(), 0., 0., rotFlipZ); + volHCAL->AddNode(volHalfHCAL, 2, combHalf); + } + } else { + for (Rows = 0; Rows < nTowersY; Rows++) { - // Adding the Tower to the HCAL - volHCAL->AddNode(volTowerHCAL, NumTowers, trans); + float ColumnPos = 0.; + RowPos = Rows * TowerSize; + for (Columns = 0; Columns < nTowersX; Columns++) { + ColumnPos = Columns * TowerSize; + TGeoTranslation* trans = new TGeoTranslation(ColumnPos - SizeXHCAL / 2 + TowerSize / 2, RowPos - SizeYHCAL / 2 + TowerSize / 2, 0.); - NumTowers++; + // Remove the Towers that overlaps with the beam pipe + Double_t RadialDistance = TMath::Power(trans->GetTranslation()[0], 2) + TMath::Power(trans->GetTranslation()[1], 2); + + if (RadialDistance < MinRadius * MinRadius || TMath::Abs(trans->GetTranslation()[0]) > SizeXHCAL / 2) { + continue; + } + + // Adding the Tower to the HCAL + volHCAL->AddNode(volTowerHCAL, NumTowers, trans); + + NumTowers++; + } } } @@ -791,6 +825,8 @@ void Detector::CreateECALGeometry() // this shifts all the pixel layers to the center near the beampipe double pixshift = geom->getTowerSizeX() - (geom->getGlobalPixelWaferSizeX() * geom->getNumberOfPIXsInX()); + bool splitDet = mGeometry->getDetectorOpeningRight() > 0.0 || mGeometry->getDetectorOpeningLeft() > 0.0; + float offset = pars[2]; // gMC->Gsvolu("EMSC1", "BOX", idtmed[3698], pars, 4);//Left towers (pixels shifted right) // gMC->Gsvolu("EMSC2", "BOX", idtmed[3698], pars, 4);//Right towers (pixels shifted left) @@ -977,9 +1013,13 @@ void Detector::CreateECALGeometry() // const auto towerCenter = geom->getGeoTowerCenter(number); //only ECAL part, second parameter = -1 by default // xp = std::get<0>towerCenter; // std::tie(xp, yp, zp) = geom->getGeoTowerCenter(number); - const auto [xp, yp, zp] = geom->getGeoTowerCenter(number); // only ECAL part, second parameter = -1 by default + auto [xp, yp, zp] = geom->getGeoTowerCenter(number); // only ECAL part, second parameter = -1 by default if (itowerx == 0) { + if (splitDet) { + xp -= geom->getDetectorOpeningLeft(); + } + TVirtualMC::GetMC()->Gspos("EMSC1", number + 1, "ECAL", xp, yp, 0, 0, "ONLY"); // Add the SiPad front volumes directly under the FOCAL volume if (geom->getInsertFrontPadLayers()) { @@ -992,6 +1032,10 @@ void Detector::CreateECALGeometry() } } if (itowerx == 1) { + if (splitDet) { + xp += geom->getDetectorOpeningRight(); + } + TVirtualMC::GetMC()->Gspos("EMSC2", number + 1, "ECAL", xp, yp, 0, 0, "ONLY"); // Add the SiPad front volumes directly under the FOCAL volume if (geom->getInsertFrontPadLayers()) { From 69f1fd10feb52387174f8b5024d7a5afbdf02dd2 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Mon, 5 May 2025 22:00:05 +0800 Subject: [PATCH 0466/1914] ITS3: Fix APTS response file generation Was unnecessarily generated on every built. --- Detectors/Upgrades/ITS3/data/CMakeLists.txt | 29 ++++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/Detectors/Upgrades/ITS3/data/CMakeLists.txt b/Detectors/Upgrades/ITS3/data/CMakeLists.txt index ba8b60c8aa7eb..7a807fd670370 100644 --- a/Detectors/Upgrades/ITS3/data/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/data/CMakeLists.txt @@ -9,17 +9,26 @@ # granted to it by virtue of its status as an Intergovernmental Organization # or submit itself to any jurisdiction. +set(APTS_RESPONSE_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root") + +add_custom_command( + OUTPUT ${APTS_RESPONSE_OUTPUT} + COMMAND ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator + -c APTS + -i ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ + -o ${CMAKE_CURRENT_BINARY_DIR}/ + DEPENDS GenerateAlpideResponse + ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ + COMMENT "Generating APTSResponseData.root" + VERBATIM +) + add_custom_target( GenerateAPTSResponse ALL - COMMAND - ${CMAKE_BINARY_DIR}/stage/bin/o2-alpide-response-generator -c APTS -i - ${ITSRESPONSE_DIR}/response/ITS3ChipResponseData/AptsResponseData/ -o - ${CMAKE_CURRENT_BINARY_DIR}/ - BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root - DEPENDS GenerateAlpideResponse - COMMENT "Generating APTSResponseData.root") + DEPENDS ${APTS_RESPONSE_OUTPUT} +) + install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/APTSResponseData.root" - DESTINATION - "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" + FILES ${APTS_RESPONSE_OUTPUT} + DESTINATION "${CMAKE_INSTALL_PREFIX}/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/" ) From 15b4f5f19e1eef23d79bdb3225e36348a845722d Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 6 May 2025 08:56:19 +0200 Subject: [PATCH 0467/1914] DPL Analysis: prevent slice cache from updating when not required by enabled process functions (#14057) --- Framework/Core/include/Framework/ASoA.h | 12 ++--- .../Core/include/Framework/AnalysisManagers.h | 14 ++++-- .../Core/include/Framework/AnalysisTask.h | 22 ++++----- .../Framework/ArrowTableSlicingCache.h | 45 +++++++++++------ .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 +++---- Framework/Core/src/ArrowTableSlicingCache.cxx | 49 ++++++++++--------- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 96 insertions(+), 73 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e098cd89f6d5d..2e478a8ca64a6 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - StringPair bindingKey; + Entry bindingKey; bool isMissing() const; - StringPair const& getBindingKey() const; + Entry const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 330eaf01f0be4..e310f3eef990c 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,39 +534,43 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, std::vector&, std::vector&) +bool registerCache(T&, Cache&, Cache&) { return false; } template requires std::same_as -bool registerCache(T& preslice, std::vector& bsks, std::vector&) +bool registerCache(T& preslice, Cache& bsks, Cache&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +bool registerCache(T& preslice, Cache&, Cache& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index c7f3da1948c62..9bd2e2af173cc 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -66,20 +66,20 @@ concept is_enumeration = is_enumeration_v>; namespace { struct AnalysisDataProcessorBuilder { template - static void addGroupingCandidates(std::vector& bk, std::vector& bku) + static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) { - [&bk, &bku](framework::pack) mutable { + [&bk, &bku, enabled](framework::pack) mutable { std::string key; if constexpr (soa::is_iterator>) { key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); } - ([&bk, &bku, &key]() mutable { + ([&bk, &bku, &key, enabled]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key); + framework::updatePairList(bku, binding, key, enabled); } else { - framework::updatePairList(bk, binding, key); + framework::updatePairList(bk, binding, key, enabled); } } }(), @@ -147,7 +147,7 @@ struct AnalysisDataProcessorBuilder { /// helper to parse the process arguments /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, Cache&, Cache&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. @@ -156,17 +156,17 @@ struct AnalysisDataProcessorBuilder { /// 2. grouping case - 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache& bk, Cache& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku); + addGroupingCandidates(bk, bku, value); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache&, Cache&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); @@ -480,8 +480,8 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 2edc23a63ce76..292a67023fc5e 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,51 +34,64 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -using StringPair = std::pair; +struct Entry { + std::string binding; + std::string key; + bool enabled; + + Entry(std::string b, std::string k, bool e = true) + : binding{b}, + key{k}, + enabled{e} + { + } +}; + +using Cache = std::vector; -void updatePairList(std::vector& list, std::string const& binding, std::string const& key); +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; - void setCaches(std::vector&& bsks); - void setCachesUnsorted(std::vector&& bsks); + void setCaches(Cache&& bsks); + void setCachesUnsorted(Cache&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - std::vector bindingsKeys; + Cache bindingsKeys; std::vector>> values; std::vector>> counts; - std::vector bindingsKeysUnsorted; + Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); // set caching information externally - void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(StringPair const& bindingKey) const; - int getCachePosSortedFor(StringPair const& bindingKey) const; - int getCachePosUnsortedFor(StringPair const& bindingKey) const; + std::pair getCachePos(Entry const& bindingKey) const; + int getCachePosSortedFor(Entry const& bindingKey) const; + int getCachePosUnsortedFor(Entry const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; + SliceInfoPtr getCacheFor(Entry const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); + static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 64b1d863c59e6..b8436314b057e 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); + auto bk = Entry(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 810398747de88..5940bc0427225 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -197,7 +197,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -StringPair const& PreslicePolicyBase::getBindingKey() const +Entry const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 12a4c7131e828..3b13e30581f70 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,26 +567,27 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, + Cache{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0; i < caches.size(); ++i) { - if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < caches.size(); ++i) { + if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0; i < unsortedCaches.size(); ++i) { - if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < unsortedCaches.size(); ++i) { + if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 4b31f96e32fba..12df5ef6c080b 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -11,6 +11,7 @@ #include "Framework/ArrowTableSlicingCache.h" #include "Framework/RuntimeError.h" +#include "Framework/Logger.h" #include #include @@ -19,10 +20,10 @@ namespace o2::framework { -void updatePairList(std::vector& list, std::string const& binding, std::string const& key) +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { - list.emplace_back(binding, key); + if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }) == list.end()) { + list.emplace_back(binding, key, enabled); } } @@ -65,17 +66,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -86,7 +87,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, s groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -111,7 +112,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -128,7 +129,11 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k] = bindingsKeysUnsorted[pos]; + auto& [b, k, e] = bindingsKeysUnsorted[pos]; + if (!e) { + LOG(debug) << "Update of disabled cache requested"; + return arrow::Status::OK(); + } auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -139,7 +144,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if (groups[pos].size() <= v) { + if ((int)groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -151,7 +156,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -161,41 +166,41 @@ std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindi if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheUnsortedForPos(p); @@ -224,9 +229,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key] = bindingKey; + auto const& [target, key, enabled] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 161939141e790..091c21eeae229 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From a8f5897522519647699a774697325e5e663619f5 Mon Sep 17 00:00:00 2001 From: Matteo Concas Date: Tue, 6 May 2025 11:43:50 +0200 Subject: [PATCH 0468/1914] Remove tmp file (#14239) Trivial and unaffecting anything else, merging. --- .../src/.ThresholdCalibratorSpec.cxx.swo | Bin 16384 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo diff --git a/Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo b/Detectors/ITSMFT/ITS/workflow/src/.ThresholdCalibratorSpec.cxx.swo deleted file mode 100644 index 847bb24d5cf5f12814270f83c821f9c726d7964b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeHOUyK_^8DA&`N-2$s656V$6G!6g^v=G!w9vRo)I0m`;wX2%-uaTINpxE8j_pm> zyW82Z@7yD8Xaf&CqoN`ncqoV`gb)IWrPI$uE-^~q%Xo;sj@sjTcMBKnlH}R zTz@SW3M*G>2p%?k*Dj!DRhTHe9B?aZt*>v>Vj@ZyNEqlbaEEbVazZWjeW|_d(|4`( zM9H^=frNpCfrNpCfrNpCfrNpCfrNqodj@21hw%(X`C&cMK!1K{;PX9wx%&Q-1M+vZ zJg)!80r?dzZ|eK=x`DWz|7!UeE&s`Y{2eWKw0w{5a9sawEw5_%GXwIswES@`e|13q zrj{Sm@`JZTUt;}lX!)X+KQ|!%kCvCT{5J#ge{14S;5Oh-cNoTH;K#s^fG-1QfYZQH;0wSwkONWx1MUKL0e1o~ebO+V2LuoR zcLRHX-N2RG4db`KuYqTQi@-C$IB*Z}^G_JYbHJB?`+?5__W{4W%`kolJPTX|Y5)fo zfjn>!xEFZu^-N5#zrNyaHSVz6fjpj{zm%R^T!c11|y= zunOz}b_1^?fAJddBJea&1g>BX-vVeZ-=O*2?qckM!3M8Ghgj;2w}1bBR&+wM>N;MX zjmS>GN0{TWCYPpdN|X658MGv;IW7-Vk0>?A%X8^9Q+TM6VWg(yQIWtDCaQ6vDvyyW z=J}}Vw>&$AnirU5N~^)rp|taPzHaeAI=;s&-wWmE7|*Z`r5~@CgkwjV^XNi_lpJP` zmGxEGFrCTPxjdvagi@djN75aTQ%V~F-r6rmdbQ{ zWi`^GBha8fna{VR~h!?N}d^ zKFj8I%5O9$W2pVU2<{*7H5l%-IF#byW7^DeL{8t_l1W`3+hh^Cmg3u~v3Mf-L~QS2 zmde?@o%33*n~JPzM^i&~MEZwLPfz3Ztb!hK?`x|1hqtrk&}(?;jCF_FGr{50CHyrk zeo4zF8aL}*wD#4ouqynz=RC#jQ5LuyFOsnqFk*9o7sjO1<|CtQ1Zwc%3pUblq=j99 z*274j(Q_eu5p5~DN$5u-`UjTk1;$+{-k@dMUAVK3)ceHIQl(H{S*Vm2=BTr}G?_`P z?=WnmuXSY|!*$J;D%DQrYfZqGo)g${T?gYIQw?9c&yjn^z?{mcF_j* z=c!VGXOhMN!Fv`Ib5N+f1UX9M&bfMt{^JWlW0i)~rJkDgY zr*vjZ^CLUm0FimtRF!=jMhJ@yU|6-RMf%h&Xow3;V;>nTLH9CS^0)1Zf9DUhDvMTxkQY^S^36G^0R!a-VSdL9**nXCtU}LN@Z^}kKKO<`C^!obg@w2!G zmkM7pD3=bO6f&(!)6I#qnT#UT_FGjPSX|Ur9oZ1+==XlMzCOz0@2H^0SgIUdqLR!B znc!_MLT*QEZ)mTsyF=`O{&CTC_jrzfMyjtM?D1s$+Vhb8TD|g&3N&o?sB%jf+p3&;H5%nW zS=g#J9Z3ViCQp8)>2=t-mT*GbQ5$NOHkGeI#yC8LHd1(8*KZ$S>AA(@>;#OJ&VmEj9xOj_&~tRB ziVlz;uT(0AQiby3DV+W(HOLBeCG=|&(F&$GWHxqt;Z$2Ras(v^Rj4xE6HdJWb||Xh>jBUgOz!^)FcgW~@c#G(%&KBf3Se*XL}V*V?@_kat)Dc~e<7}y2;3-SLyfY*S( z0)GL%1{8s5;1=M|i1D8XiohF)>AwR!0h|Ip54?jo{w3fNPy%iTUO^21E8qt}8#o7? z1?~kdA#Q&V_ygkdr+@>%WyIf?fWM*tXMiVwIe^CVAdp-M0|^5O0|^5+k%0>g$MGh= zXy6WcPMfnU8!7G~p~%NV_yhO|ob@qj))9D~FogrZgTkwPe%kllQC2e%(2uf>)C$}M zcq36&hVpgtCUAg7T5#w(VTVB%9r(6WcgPW;Q_u8F*RR8O2`$r0UBDGZf=plC?pSMM zmM6!o2Di>%UuWPKs9N2#nf@@1;g8nn%tampjs)dN;6~90`e)iU+!;757!i3DNI8V) z+9(2`-qr0G_ro3Z2WbR2q~#hClH7#b1J|hyhk=+LqS`|=z)kxq#c<|E*#r^3&}vrE zapm`@qKFuDs0rN@Lv&KoD9W=AOeMO6$wKUhVQi>$Y&X}sm72c%)KFmoCQD>3VU{qeMlXWRf3%}z(mJCPP8bUy(6XkizB+-n<890{KvkMoxKLZw9 z<&<0Z1r#(X@Yn#=ft*E}ykdsu4-Jp2kHo=wMjLj-U>I4sO0VnhDVYD5)%|PE)f7TS zmGqK+oKYB-3BYR6k!cZ+A{K>j!Q+np7*r)?bbDFe3|s_pt5wrFpQiK|2u(d!SgtJL z%Miu1KWtmKP+piVPM^5J+Q=GJZm{ZdVQS$<>noXW@KC>vu}n`V%roCflx^k60& zm{lZZbRjZoSw`H1NscCJergc|A0I{7SmOu@akxNdv;&hd4p&I#h-y7$bJLXqqNV9U zT@`3lO2aU%Cnmk7E5z5#^~;*-tAc95Ov%4u79pd#A7s75d+HZF)0lk3>;6j zXjKuBbf89*>cn4Mpx!Hiflx;0s)fjq`WwDIsA*RDv~5#wH#3#opUtAd_UDw5V#16+ z#Ul$&tGV2qHxH#Bq%+uZ0$<2Hc<$yyqlw33guOZsTp%A}#t$hwe2olxq?M8y#o{t{ mJwKo)k`3R^!?VH0)nS&4Gex-xt|>W^)^r}B!p*#hng0Qi4P(y$ From 1c7a558df3442e42971b3c4b03dace6796a6b946 Mon Sep 17 00:00:00 2001 From: shahoian Date: Tue, 6 May 2025 11:44:43 +0200 Subject: [PATCH 0469/1914] Fix typos in rANS AlignedArrayIterator --- .../include/rANS/internal/containers/AlignedArray.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h b/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h index df4b0190cdc4f..c1f96df939809 100644 --- a/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h +++ b/Utilities/rANS/include/rANS/internal/containers/AlignedArray.h @@ -107,16 +107,16 @@ class AlignedArrayIterator inline constexpr difference_type operator-(const AlignedArrayIterator& other) const noexcept { - return this->mIter - other.mIter; + return this->mIndex - other.mIndex; }; // comparison inline constexpr bool operator==(const AlignedArrayIterator& other) const noexcept { return this->mIndex == other.mIndex; }; inline constexpr bool operator!=(const AlignedArrayIterator& other) const noexcept { return this->mIndex != other.mIndex; }; - inline constexpr bool operator<(const AlignedArrayIterator& other) const noexcept { return this->mIndex < other->mIndex; }; - inline constexpr bool operator>(const AlignedArrayIterator& other) const noexcept { return this->mIndex > other->mIndex; }; - inline constexpr bool operator>=(const AlignedArrayIterator& other) const noexcept { return this->mIndex >= other->mIndex; }; - inline constexpr bool operator<=(const AlignedArrayIterator& other) const noexcept { return this->mIndex <= other->mIndex; }; + inline constexpr bool operator<(const AlignedArrayIterator& other) const noexcept { return this->mIndex < other.mIndex; }; + inline constexpr bool operator>(const AlignedArrayIterator& other) const noexcept { return this->mIndex > other.mIndex; }; + inline constexpr bool operator>=(const AlignedArrayIterator& other) const noexcept { return this->mIndex >= other.mIndex; }; + inline constexpr bool operator<=(const AlignedArrayIterator& other) const noexcept { return this->mIndex <= other.mIndex; }; // dereference inline constexpr value_type operator*() const noexcept { return (*mContainer)[mIndex]; }; @@ -311,4 +311,4 @@ auto make_span(o2::rans::internal::simd::AlignedArray& array } // namespace gsl -#endif /* RANS_INTERNAL_CONTAINERS_ALIGNEDARRAY_H_ */ \ No newline at end of file +#endif /* RANS_INTERNAL_CONTAINERS_ALIGNEDARRAY_H_ */ From c4f4364b1c819dac3581db77f89c0968c661c7d3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 6 May 2025 17:25:36 +0200 Subject: [PATCH 0470/1914] DPL: improve DataSpecUtils::describe API in case of buffers (#14238) Just like snprintf, it makes sense to return the size of the formatted output. --- .../Core/include/Framework/DataSpecUtils.h | 17 +++--- Framework/Core/src/DataSpecUtils.cxx | 58 ++++++++----------- .../Core/test/unittest_DataSpecUtils.cxx | 55 ++++++++++++++++++ 3 files changed, 88 insertions(+), 42 deletions(-) diff --git a/Framework/Core/include/Framework/DataSpecUtils.h b/Framework/Core/include/Framework/DataSpecUtils.h index 65f8585302aa7..588aa30da7e08 100644 --- a/Framework/Core/include/Framework/DataSpecUtils.h +++ b/Framework/Core/include/Framework/DataSpecUtils.h @@ -18,11 +18,12 @@ #include -namespace o2 -{ -namespace framework +namespace o2::framework { +template +concept HasMatcher = requires(T& t) { t.matcher; }; + struct DataSpecUtils { /// @return true if a given InputSpec @a spec matches with a @a target ConcreteDataMatcher static bool match(InputSpec const& spec, ConcreteDataMatcher const& target); @@ -152,10 +153,8 @@ struct DataSpecUtils { static bool validate(OutputSpec const& output); /// Same as the other describe, but uses a buffer to reduce memory churn. - static void describe(char* buffer, size_t size, InputSpec const& spec); - - /// Same as the other describe, but uses a buffer to reduce memory churn. - static void describe(char* buffer, size_t size, OutputSpec const& spec); + template + static size_t describe(char* buffer, size_t size, T const& spec); /// If possible extract the ConcreteDataMatcher from an InputSpec. This /// can be done either if the InputSpec is defined in terms for a ConcreteDataMatcher @@ -250,6 +249,6 @@ struct DataSpecUtils { static void updateOutputList(std::vector& list, OutputSpec&& input); }; -} // namespace framework -} // namespace o2 +} // namespace o2::framework + #endif // FRAMEWORK_DATASPECUTILS_H diff --git a/Framework/Core/src/DataSpecUtils.cxx b/Framework/Core/src/DataSpecUtils.cxx index 3babbaba2a6ca..48f5e6abcad5b 100644 --- a/Framework/Core/src/DataSpecUtils.cxx +++ b/Framework/Core/src/DataSpecUtils.cxx @@ -15,11 +15,13 @@ #include "Framework/RuntimeError.h" #include "Headers/DataHeaderHelpers.h" +#include #include #include #include #include #include +#include namespace o2::framework { @@ -87,39 +89,29 @@ std::string DataSpecUtils::describe(OutputSpec const& spec) spec.matcher); } -void DataSpecUtils::describe(char* buffer, size_t size, InputSpec const& spec) -{ - if (auto concrete = std::get_if(&spec.matcher)) { - char origin[5]; - origin[4] = 0; - char description[17]; - description[16] = 0; - snprintf(buffer, size, "%s/%s/%" PRIu32, (strncpy(origin, concrete->origin.str, 4), origin), - (strncpy(description, concrete->description.str, 16), description), concrete->subSpec); - } else if (auto matcher = std::get_if(&spec.matcher)) { - std::ostringstream ss; - ss << ""; - strncpy(buffer, ss.str().c_str(), size - 1); - } else { - throw runtime_error("Unsupported InputSpec"); - } -} - -void DataSpecUtils::describe(char* buffer, size_t size, OutputSpec const& spec) -{ - if (auto concrete = std::get_if(&spec.matcher)) { - char origin[5]; - origin[4] = 0; - char description[17]; - description[16] = 0; - snprintf(buffer, size, "%s/%s/%" PRIu32, (strncpy(origin, concrete->origin.str, 4), origin), - (strncpy(description, concrete->description.str, 16), description), concrete->subSpec); - } else if (auto concrete = std::get_if(&spec.matcher)) { - fmt::format_to(buffer, "", concrete->origin, concrete->description); - } else { - throw runtime_error("Unsupported OutputSpec"); - } -} +template +size_t DataSpecUtils::describe(char* buffer, size_t size, T const& spec) +{ + auto result = std::visit(overloaded{ + [buffer, size](ConcreteDataMatcher const& concrete) -> fmt::format_to_n_result { + return fmt::format_to_n(buffer, size - 1, "{:.4}/{:.16}/{}", concrete.origin.str, concrete.description.str, concrete.subSpec); + }, + [buffer, size](ConcreteDataTypeMatcher const& concrete) -> fmt::format_to_n_result { + return fmt::format_to_n(buffer, size - 1, "", concrete.origin, concrete.description); + }, + [buffer, size](DataDescriptorMatcher const& matcher) -> fmt::format_to_n_result { + std::ostringstream ss; + ss << ""; + return fmt::format_to_n(buffer, size - 1, "{}", ss.str()); + }, + [](...) -> fmt::format_to_n_result { throw std::runtime_error("Unsupported Input / Output Spec"); }}, + spec.matcher); + *result.out = '\0'; + return result.out - buffer; +} + +template size_t DataSpecUtils::describe(char* buffer, size_t size, InputSpec const& spec); +template size_t DataSpecUtils::describe(char* buffer, size_t size, OutputSpec const& spec); std::string DataSpecUtils::label(InputSpec const& spec) { diff --git a/Framework/Core/test/unittest_DataSpecUtils.cxx b/Framework/Core/test/unittest_DataSpecUtils.cxx index e6b2f4a22c018..6128183aefa11 100644 --- a/Framework/Core/test/unittest_DataSpecUtils.cxx +++ b/Framework/Core/test/unittest_DataSpecUtils.cxx @@ -42,6 +42,7 @@ TEST_CASE("ConcreteData") CHECK(std::string(concrete.description.as()) == "FOOO"); CHECK(concrete.subSpec == 1); CHECK(DataSpecUtils::describe(spec) == "TEST/FOOO/1"); + CHECK(DataSpecUtils::describe(spec) == "TEST/FOOO/1"); CHECK(*DataSpecUtils::getOptionalSubSpec(spec) == 1); ConcreteDataTypeMatcher dataType = DataSpecUtils::asConcreteDataTypeMatcher(spec); @@ -59,6 +60,44 @@ TEST_CASE("ConcreteData") } } +TEST_CASE("DescribeUsingBuffer") +{ + o2::framework::clean_all_runtime_errors(); + OutputSpec spec{ + "TEST", + "FOOO", + 1, + Lifetime::Timeframe}; + + InputSpec inputSpec{ + "binding", + "TEST", + "FOOO", + 1, + Lifetime::Timeframe}; + + REQUIRE(DataSpecUtils::validate(inputSpec)); + + { + char buffer[1024]; + + ConcreteDataMatcher concrete = DataSpecUtils::asConcreteDataMatcher(spec); + CHECK(std::string(concrete.origin.as()) == "TEST"); + CHECK(std::string(concrete.description.as()) == "FOOO"); + CHECK(concrete.subSpec == 1); + auto size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == "TEST/FOOO/1"); + size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == "TEST/FOOO/1"); + CHECK(*DataSpecUtils::getOptionalSubSpec(spec) == 1); + + char buffer2[1024]; + size = DataSpecUtils::describe(buffer2, 5, spec); + // We always nullterminate the string + CHECK(std::string_view(buffer2, size) == "TEST"); + } +} + TEST_CASE("WithWildCards") { OutputSpec spec{ @@ -78,6 +117,22 @@ TEST_CASE("WithWildCards") CHECK(DataSpecUtils::getOptionalSubSpec(spec) == std::nullopt); } +TEST_CASE("WithWildCardsBuffer") +{ + char buffer[1024]; + OutputSpec spec{ + {"TEST", "FOOO"}, + Lifetime::Timeframe}; + + auto size = DataSpecUtils::describe(buffer, 1024, spec); + CHECK(std::string_view(buffer, size) == ""); + + char buffer2[1024]; + size = DataSpecUtils::describe(buffer2, 5, spec); + // We always null terminate the buffer. + CHECK(std::string_view(buffer2, size) == " Date: Tue, 6 May 2025 17:26:41 +0200 Subject: [PATCH 0471/1914] ITS3: ITS3 Digitisation Development after TDR (#14145) * ITS3 digitization: parameters, segmentation and container fixes - Add digitization parameter sets - Fix C2F/F2C conversion in SegmentationMosaix - Set scale function for Alpide as IB - Fix chip digits container initialization - Correct ordering of maxRows and maxCols - Add support for floating row/column numbers in D2L * Introduce ChipSimResponse with response-centre extraction logic Add more info in CreateDic macro * Add a macro to compare ITS3 clusters and digits on a pixel array Add the script to check hits and clusters on a track Add script for visualizing chip responses * Address reviewer comments --- .../ITSMFTSimulation/AlpideSimResponse.h | 5 +- .../include/ITS3Base/SegmentationMosaix.h | 89 +-- .../ITS3/base/include/ITS3Base/SpecsV2.h | 1 - .../Upgrades/ITS3/macros/test/CMakeLists.txt | 3 + .../ITS3/macros/test/CheckChipResponseFile.C | 192 ++++++ .../ITS3/macros/test/CheckDigitsITS3.C | 2 - .../test/CompareClustersAndDigitsOnChip.C | 579 ++++++++++++++++ .../ITS3/macros/test/CorrTracksClusters.C | 638 ++++++++++++++++++ .../ITS3/macros/test/CreateDictionariesITS3.C | 22 +- .../Upgrades/ITS3/simulation/CMakeLists.txt | 8 +- .../ITS3Simulation/ChipDigitsContainer.h | 59 ++ .../include/ITS3Simulation/ChipSimResponse.h | 41 ++ .../include/ITS3Simulation/DigiParams.h | 28 +- .../include/ITS3Simulation/Digitizer.h | 9 +- .../ITS3Simulation/ITS3DPLDigitizerParam.h | 32 + .../simulation/src/ChipDigitsContainer.cxx | 63 ++ .../ITS3/simulation/src/ChipSimResponse.cxx | 62 ++ .../ITS3/simulation/src/DigiParams.cxx | 62 +- .../ITS3/simulation/src/Digitizer.cxx | 84 ++- .../simulation/src/ITS3DPLDigitizerParam.cxx | 14 + .../simulation/src/ITS3SimulationLinkDef.h | 3 + .../src/ITS3DigitizerSpec.cxx | 7 + 22 files changed, 1886 insertions(+), 117 deletions(-) create mode 100644 Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C create mode 100644 Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C create mode 100644 Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h create mode 100644 Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx create mode 100644 Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx diff --git a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h index 92656a16257a1..5714b51d5aa45 100644 --- a/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h +++ b/Detectors/ITSMFT/common/simulation/include/ITSMFTSimulation/AlpideSimResponse.h @@ -38,7 +38,7 @@ class AlpideRespSimMat static int constexpr getNPix() { return NPix; } AlpideRespSimMat() = default; - ~AlpideRespSimMat() = default; + virtual ~AlpideRespSimMat() = default; void adopt(const AlpideRespSimMat& src, bool flipRow = false, bool flipCol = false) { @@ -69,7 +69,7 @@ class AlpideRespSimMat private: std::array data; - ClassDefNV(AlpideRespSimMat, 1); + ClassDef(AlpideRespSimMat, 1); }; /* @@ -91,6 +91,7 @@ class AlpideSimResponse int getDepthBin(float pos) const; std::string composeDataName(int colBin, int rowBin); + protected: int mNBinCol = 0; /// number of bins in X(col direction) int mNBinRow = 0; /// number of bins in Y(row direction) int mNBinDpt = 0; /// number of bins in Z(sensor dept) diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h index f8d4a784120a0..fbf9a59e6da4b 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SegmentationMosaix.h @@ -12,12 +12,11 @@ /// \file SegmentationMosaix.h /// \brief Definition of the SegmentationMosaix class /// \author felix.schlepper@cern.ch +/// \author chunzheng.wang@cern.ch #ifndef ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ #define ALICEO2_ITS3_SEGMENTATIONMOSAIX_H_ -#include - #include "MathUtils/Cartesian.h" #include "ITS3Base/SpecsV2.h" @@ -43,24 +42,22 @@ class SegmentationMosaix // 3. The detector coordinate system. Defined by the row and column segmentation // defined at the upper edge in the flat coord. - // row,col=0 - // | - // v - // x----------------------x - // | | | - // | | | - // | | | ^ x - // | | | | - // | | | | - // | | | | - // |-----------X----------| X marks (x,z)=(0,0) X----> z - // | | | + // O----------------------| // | | | + // | | | ^ x + // | | | | + // | | | | + // | | | | + // | | | X----> z X marks (x,z)=(0,0) + // |-----------X----------| + // | | | O----> col O marks (row,col)=(0,0) + // | | | | + // | | | | + // | | | v + // | | | row // | | | - // | | | - // | | | - // | | | - // x----------------------x + // |----------------------| + public: constexpr SegmentationMosaix(int layer) : mRadius(static_cast(constants::radiiMiddle[layer])) {} constexpr ~SegmentationMosaix() = default; @@ -79,7 +76,6 @@ class SegmentationMosaix static constexpr float PitchCol{constants::pixelarray::pixels::mosaix::pitchZ}; static constexpr float PitchRow{constants::pixelarray::pixels::mosaix::pitchX}; static constexpr float SensorLayerThickness{constants::totalThickness}; - static constexpr float NominalYShift{constants::nominalYShift}; /// Transformation from the curved surface to a flat surface. /// Additionally a shift in the flat coordinates must be applied because @@ -102,10 +98,10 @@ class SegmentationMosaix // stack float dist = std::hypot(xCurved, yCurved); float phi = std::atan2(yCurved, xCurved); - xFlat = (mRadius * phi) - WidthH; // the y position is in the silicon volume however we need the chip volume (silicon+metalstack) // this is accounted by a y shift - yFlat = dist - mRadius + NominalYShift; + xFlat = WidthH - mRadius * phi; + yFlat = dist - mRadius; } /// Transformation from the flat surface to a curved surface @@ -122,11 +118,12 @@ class SegmentationMosaix { // MUST align the flat surface with the curved surface with the original pixel array is on and account for metal // stack + float dist = yFlat + mRadius; + float phi = (WidthH - xFlat) / mRadius; // the y position is in the chip volume however we need the silicon volume // this is accounted by a -y shift - float dist = yFlat - NominalYShift + mRadius; - xCurved = dist * std::cos((xFlat + WidthH) / mRadius); - yCurved = dist * std::sin((xFlat + WidthH) / mRadius); + xCurved = dist * std::cos(phi); + yCurved = dist * std::sin(phi); } /// Transformation from Geant detector centered local coordinates (cm) to @@ -142,8 +139,11 @@ class SegmentationMosaix /// \param int iCol Detector z cell coordinate. constexpr bool localToDetector(float const xRow, float const zCol, int& iRow, int& iCol) const noexcept { + if (!isValidLoc(xRow, zCol)) { + return false; + } localToDetectorUnchecked(xRow, zCol, iRow, iCol); - if (!isValid(iRow, iCol)) { + if (!isValidDet(iRow, iCol)) { iRow = iCol = -1; return false; } @@ -167,49 +167,54 @@ class SegmentationMosaix /// center of the sensitive volume. /// If iRow and or iCol is outside of the segmentation range a value of -0.5*Dx() /// or -0.5*Dz() is returned. - constexpr bool detectorToLocal(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + bool detectorToLocal(float const row, float const col, float& xRow, float& zCol) const noexcept { - if (!isValid(iRow, iCol)) { + if (!isValidDet(row, col)) { return false; } - detectorToLocalUnchecked(iRow, iCol, xRow, zCol); - return isValid(xRow, zCol); + detectorToLocalUnchecked(row, col, xRow, zCol); + return isValidLoc(xRow, zCol); } // Same as detectorToLocal w.o. checks. // We position ourself in the middle of the pixel. - constexpr void detectorToLocalUnchecked(int const iRow, int const iCol, float& xRow, float& zCol) const noexcept + void detectorToLocalUnchecked(float const row, float const col, float& xRow, float& zCol) const noexcept { - xRow = -(static_cast(iRow) + 0.5f) * PitchRow + WidthH; - zCol = (static_cast(iCol) + 0.5f) * PitchCol - LengthH; + xRow = -(row + 0.5f) * PitchRow + WidthH; + zCol = (col + 0.5f) * PitchCol - LengthH; } - bool detectorToLocal(int const row, int const col, math_utils::Point3D& loc) const noexcept + bool detectorToLocal(float const row, float const col, math_utils::Point3D& loc) const noexcept { float xRow{0.}, zCol{0.}; if (!detectorToLocal(row, col, xRow, zCol)) { return false; } - loc.SetCoordinates(xRow, NominalYShift, zCol); + loc.SetCoordinates(xRow, 0.0f, zCol); return true; } - void detectorToLocalUnchecked(int const row, int const col, math_utils::Point3D& loc) const noexcept + void detectorToLocalUnchecked(float const row, float const col, math_utils::Point3D& loc) const noexcept { float xRow{0.}, zCol{0.}; detectorToLocalUnchecked(row, col, xRow, zCol); - loc.SetCoordinates(xRow, NominalYShift, zCol); + loc.SetCoordinates(xRow, 0.0f, zCol); } private: + // Check local coordinates (cm) validity. template - [[nodiscard]] constexpr bool isValid(T const row, T const col) const noexcept + constexpr bool isValidLoc(T const x, T const z) const noexcept { - if constexpr (std::is_floating_point_v) { // compares in local coord. - return (-WidthH < row && row < WidthH && -LengthH < col && col < LengthH); - } else { // compares in rows/cols - return !static_cast(row < 0 || row >= static_cast(NRows) || col < 0 || col >= static_cast(NCols)); - } + return (-WidthH < x && x < WidthH && -LengthH < z && z < LengthH); + } + + // Check detector coordinates validity. + template + constexpr bool isValidDet(T const row, T const col) const noexcept + { + return (row >= 0 && row < static_cast(NRows) && + col >= 0 && col < static_cast(NCols)); } float mRadius; diff --git a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h index fedaad9182cce..83db7632e72f4 100644 --- a/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h +++ b/Detectors/Upgrades/ITS3/base/include/ITS3Base/SpecsV2.h @@ -134,7 +134,6 @@ constexpr std::array radii{19.0006 * mm, 25.228 * mm, 31.4554 * constexpr std::array radiiInner{radii[0] - silicon::thicknessIn, radii[1] - silicon::thicknessIn, radii[2] - silicon::thicknessIn}; // inner silicon radius constexpr std::array radiiOuter{radii[0] + silicon::thicknessOut, radii[1] + silicon::thicknessOut, radii[2] + silicon::thicknessOut}; // outer silicon radius constexpr std::array radiiMiddle{(radiiInner[0] + radiiOuter[0]) / 2., (radiiInner[1] + radiiOuter[1]) / 2., (radiiInner[2] + radiiOuter[2]) / 2.}; // middle silicon radius -constexpr double nominalYShift{-metalstack::thickness / 2.}; // shift to position in silicion volume to the chip volume (silicon+metalstack) // extra information of pixels and their response functions namespace pixelarray::pixels diff --git a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt index 39e435f0ba2e6..cb6812445283c 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/macros/test/CMakeLists.txt @@ -22,7 +22,10 @@ its3_add_macro(CompareClusterSize.C) its3_add_macro(CheckMosaixSegment.C) its3_add_macro(CheckMosaixSegmentTrans.C) its3_add_macro(CompareClustersAndDigits.C) +its3_add_macro(CompareClustersAndDigitsOnChip.C) its3_add_macro(CheckROFs.C) its3_add_macro(CheckTileNumbering.C) its3_add_macro(CreateITS3StaticDeadMap.C) its3_add_macro(TestSensorGeometry.C) +its3_add_macro(CorrTracksClusters.C) +its3_add_macro(CheckChipResponseFile.C) diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C b/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C new file mode 100644 index 0000000000000..996a99d87ecbc --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CheckChipResponseFile.C @@ -0,0 +1,192 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file CheckChipResponseFile.C +/// \brief Simple macro to check the chip response files + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include +#include +#include +#include +#include +#include +#include + +#define ENABLE_UPGRADES +#include "ITSMFTSimulation/AlpideSimResponse.h" + +#include "ITS3Base/SegmentationMosaix.h" +#include "fairlogger/Logger.h" +#endif + +using SegmentationMosaix = o2::its3::SegmentationMosaix; + +double um2cm(double um) { return um * 1e-4; } +double cm2um(double cm) { return cm * 1e+4; } + +o2::itsmft::AlpideSimResponse *mAlpSimResp0 = nullptr, + *mAlpSimResp1 = nullptr, + *mAptSimResp1 = nullptr; + +o2::itsmft::AlpideSimResponse* loadResponse(const std::string& fileName, const std::string& respName) +{ + TFile* f = TFile::Open(fileName.data()); + if (!f) { + std::cerr << fileName << " not found" << std::endl; + return nullptr; + } + auto resp = (o2::itsmft::AlpideSimResponse*)f->Get(respName.data()); + if (!resp) + std::cerr << respName << " not found in " << fileName << std::endl; + return resp; +} + +void LoadRespFunc() +{ + std::string AptsFile = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; + std::string AlpideFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; + + mAlpSimResp0 = loadResponse(AlpideFile, "response0"); // Vbb=0V + LOG(info) << "ALPIDE Vbb=0V response" << std::endl; + mAlpSimResp0->print(); + mAlpSimResp1 = loadResponse(AlpideFile, "response1"); // Vbb=-3V + LOG(info) << "ALPIDE Vbb=-3V response" << std::endl; + mAlpSimResp1->print(); + mAptSimResp1 = loadResponse(AptsFile, "response1"); // APTS + LOG(info) << "APTS response" << std::endl; + mAptSimResp1->print(); +} + +std::vector getCollectionSeediciencies(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector seed; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + seed.push_back(rspmat ? rspmat->getValue(2, 2) : 0.f); + } + return seed; +} + +std::vector getShareValues(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector share; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + float s = 0; + int npix = resp->getNPix(); + if (rspmat) { + for (int i = 0; i < npix; ++i) + for (int j = 0; j < npix; ++j) + if (!(i == npix / 2 && j == npix / 2)) + s += rspmat->getValue(i, j); + } + share.push_back(s); + } + return share; +} + +std::vector getEffValues(o2::itsmft::AlpideSimResponse* resp, + const std::vector& depths) +{ + std::vector all; + bool flipRow = false, flipCol = false; + for (auto depth : depths) { + auto rspmat = resp->getResponse(0.0, 0.0, + um2cm(depth) + resp->getDepthMin() + 1.e-9, + flipRow, flipCol); + float s = 0; + int npix = resp->getNPix(); + if (rspmat) { + for (int i = 0; i < npix; ++i) + for (int j = 0; j < npix; ++j) + s += rspmat->getValue(i, j); + } + all.push_back(s); + } + return all; +} + +void CheckChipResponseFile() +{ + LoadRespFunc(); + LOG(info) << "Response function loaded" << std::endl; + + std::vector vecDepth(50); + for (int i = 0; i < 50; ++i) + vecDepth[i] = i; + + int colors[] = {kOrange + 7, kRed + 1, kAzure + 4}; + struct RespInfo { + o2::itsmft::AlpideSimResponse* resp; + std::string title; + int color; + }; + std::vector responses = { + {mAptSimResp1, "APTS", colors[0]}, + {mAlpSimResp0, "ALPIDE Vbb=0V", colors[1]}, + {mAlpSimResp1, "ALPIDE Vbb=-3V", colors[2]}}; + + TCanvas* c1 = new TCanvas("c1", "c1", 800, 600); + TH1* frame = c1->DrawFrame(-1, -0.049, 50, 1.049); + frame->SetTitle(";Depth(um);Charge Collection Seed / Share / Eff"); + TLegend* leg = new TLegend(0.15, 0.5, 0.4, 0.85); + leg->SetFillStyle(0); + leg->SetBorderSize(0); + + for (auto& r : responses) { + if (!r.resp) + continue; + auto seed = getCollectionSeediciencies(r.resp, vecDepth); + auto shr = getShareValues(r.resp, vecDepth); + auto all = getEffValues(r.resp, vecDepth); + + TGraph* grSeed = new TGraph(vecDepth.size(), vecDepth.data(), seed.data()); + grSeed->SetTitle(Form("%s seed", r.title.c_str())); + grSeed->SetLineColor(r.color); + grSeed->SetLineWidth(2); + grSeed->SetMarkerColor(r.color); + grSeed->SetMarkerStyle(kFullCircle); + grSeed->SetMarkerSize(0.8); + grSeed->Draw("SAME LP"); + leg->AddEntry(grSeed, Form("%s seed", r.title.c_str()), "lp"); + + TGraph* grShare = new TGraph(vecDepth.size(), vecDepth.data(), shr.data()); + grShare->SetLineColor(r.color); + grShare->SetLineWidth(2); + grShare->SetMarkerColor(r.color); + grShare->SetMarkerStyle(kOpenSquare); + grShare->SetMarkerSize(1); + grShare->Draw("SAME LP"); + leg->AddEntry(grShare, Form("%s share", r.title.c_str()), "p"); + + TGraph* grEff = new TGraph(vecDepth.size(), vecDepth.data(), all.data()); + grEff->SetLineColor(r.color); + grEff->SetLineWidth(2); + grEff->SetMarkerColor(r.color); + grEff->SetMarkerStyle(kFullDiamond); + grEff->SetMarkerSize(1); + grEff->Draw("SAME LP"); + leg->AddEntry(grEff, Form("%s eff", r.title.c_str()), "p"); + } + leg->Draw(); + + c1->SaveAs("ChipResponse.pdf"); +} diff --git a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C index 1dc4a4e2d6b47..240b1bd344af5 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CheckDigitsITS3.C @@ -80,8 +80,6 @@ void CheckDigitsITS3(std::string digifile = "it3digits.root", std::string hitfil int nevD = digTree->GetEntries(); // digits in cont. readout may be grouped as few events per entry - int lastReadHitEv = -1; - int nDigitReadIB{0}, nDigitReadOB{0}; int nDigitFilledIB{0}, nDigitFilledOB{0}; diff --git a/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C new file mode 100644 index 0000000000000..310be8c5858ef --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CompareClustersAndDigitsOnChip.C @@ -0,0 +1,579 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file CompareClustersAndDigitsOnChip.C +/// \brief Macro to compare ITS3 clusters and digits on a pixel array, + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#define ENABLE_UPGRADES +#include "DataFormatsITSMFT/CompCluster.h" +#include "DataFormatsITSMFT/Digit.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "DetectorsCommonDataFormats/DetID.h" +#include "DetectorsCommonDataFormats/DetectorNameConf.h" +#include "ITS3Base/SegmentationMosaix.h" +#include "ITS3Base/SpecsV2.h" +#include "ITS3Reconstruction/TopologyDictionary.h" +#include "DataFormatsITSMFT/CompCluster.h" +#include "DataFormatsITSMFT/ClusterTopology.h" +#include "ITSBase/GeometryTGeo.h" +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITSMFTSimulation/Hit.h" +#include "MathUtils/Cartesian.h" +#include "MathUtils/Utils.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#include "SimulationDataFormat/ConstMCTruthContainer.h" +#include "SimulationDataFormat/IOMCTruthContainerView.h" + +struct Data { + TH2F* pixelArray; + TGraph* hitS; + TGraph* hitM; + TGraph* hitE; + TGraph* clusS; + TGraph* cog; + TLegend* leg; + std::vector* vClusBox; + void clear() + { + delete pixelArray; + delete hitS; + delete hitM; + delete hitE; + delete clusS; + delete cog; + delete leg; + for (auto& b : *vClusBox) { + delete b; + } + delete vClusBox; + } +}; + +void CompareClustersAndDigitsOnChip(std::string clusfile = "o2clus_its.root", + std::string digifile = "it3digits.root", + std::string dictfile = "", + std::string hitfile = "o2sim_HitsIT3.root", + std::string inputGeom = "o2sim_geometry.root", + bool batch = true) +{ + TH1::AddDirectory(kFALSE); + gROOT->SetBatch(batch); + gStyle->SetPalette(kRainBow); + gStyle->SetOptStat(0); + + using namespace o2::base; + using namespace o2::its; + using o2::itsmft::Hit; + using Segmentation = o2::itsmft::SegmentationAlpide; + using o2::itsmft::ClusterTopology; + using o2::itsmft::CompClusterExt; + using ROFRec = o2::itsmft::ROFRecord; + using MC2ROF = o2::itsmft::MC2ROFRecord; + using HitVec = std::vector; + using MC2HITS_map = std::unordered_map; // maps (track_ID<<16 + chip_ID) to entry in the hit vector + std::vector hitVecPool; + std::vector mc2hitVec; + + std::array mMosaixSegmentations{0, 1, 2}; + + // Geometry + o2::base::GeometryManager::loadGeometry(inputGeom); + auto gman = o2::its::GeometryTGeo::Instance(); + gman->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, + o2::math_utils::TransformType::T2GRot, + o2::math_utils::TransformType::L2G)); // request cached transforms + const int nChips = gman->getNumberOfChips(); + + LOGP(info, "Total number of chips is {} in ITS3 (IB and OB)", nChips); + + // Create all plots + LOGP(info, "Selecting chips to be visualised"); + std::set selectedChips; + std::map> chipGroups; + + for (int chipID{0}; chipID < nChips; ++chipID) { + TString tpath = gman->getMatrixPath(chipID); + std::string path = tpath.Data(); + + std::vector tokens; + std::istringstream iss(path); + std::string token; + while (std::getline(iss, token, '/')) { + if (!token.empty()) { + tokens.push_back(token); + } + } + + std::string segmentName, staveName, carbonFormName; + for (const auto& t : tokens) { + if (t.find("ITS3Segment") != std::string::npos) + segmentName = t; + if (t.find("ITSUStave") != std::string::npos) + staveName = t; + if (t.find("ITS3CarbonForm") != std::string::npos) + carbonFormName = t; + } + + std::string groupKey; + if (!segmentName.empty()) { + groupKey = segmentName + "_" + carbonFormName; + } else if (!staveName.empty()) { + groupKey = staveName; + } else { + continue; + } + + chipGroups[groupKey].push_back(chipID); + } + + LOGP(info, "From each IB Segment or OB Stave, 10 chipIDs are uniformly selected"); + LOGP(info, "Selected chipID: "); + for (auto& [groupName, ids] : chipGroups) { + std::vector sampled; + if (ids.size() <= 10) { + for (auto id : ids) { + selectedChips.insert(id); + sampled.push_back(id); + } + } else { + for (int i{0}; i < 10; ++i) { + int idx = i * (ids.size() - 1) / 9; // 9 intervals for 10 points + int id = ids[idx]; + if (selectedChips.insert(id).second) { + sampled.push_back(id); + } + } + } + + std::ostringstream oss; + std::string topOrBot = "N/A"; + std::smatch match; + std::regex rgxSegment(R"(Segment(\d+)_(\d+)_ITS3CarbonForm\d+_(\d+))"); + std::regex rgxStave(R"(Stave(\d+)_(\d+))"); + if (std::regex_search(groupName, match, rgxSegment)) { + int layer = std::stoi(match[1]); + int segment = std::stoi(match[2]); + int carbonForm = std::stoi(match[3]); + topOrBot = (carbonForm == 0 ? "TOP" : "BOT"); + oss << topOrBot << " segment " << segment << " at layer " << layer << ": "; + } else if (std::regex_search(groupName, match, rgxStave)) { + int layer = std::stoi(match[1]); + int stave = std::stoi(match[2]); + oss << "Stave " << stave << " at layer " << layer << ": "; + } else { + LOGP(error, "Cannot select the correct chipID in OB or IB"); + return; + } + for (auto id : sampled) { + oss << id << " "; + } + LOG(info) << oss.str(); + } + LOGP(info, "{} selected chips will be visualized and analyzed.", chipGroups.size()); + + // Hits + TFile fileH(hitfile.data()); + auto* hitTree = dynamic_cast(fileH.Get("o2sim")); + std::vector* hitArray = nullptr; + hitTree->SetBranchAddress("IT3Hit", &hitArray); + mc2hitVec.resize(hitTree->GetEntries()); + hitVecPool.resize(hitTree->GetEntries(), nullptr); + + // Digits + TFile* digFile = TFile::Open(digifile.data()); + TTree* digTree = (TTree*)digFile->Get("o2sim"); + std::vector* digArr = nullptr; + digTree->SetBranchAddress("IT3Digit", &digArr); + o2::dataformats::IOMCTruthContainerView* plabels = nullptr; + digTree->SetBranchAddress("IT3DigitMCTruth", &plabels); + + // Clusters + TFile fileC(clusfile.data()); + auto* clusTree = dynamic_cast(fileC.Get("o2sim")); + std::vector* clusArr = nullptr; + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); + std::vector* patternsPtr = nullptr; + auto pattBranch = clusTree->GetBranch("ITSClusterPatt"); + if (pattBranch != nullptr) { + pattBranch->SetAddress(&patternsPtr); + } + + // Topology dictionary + o2::its3::TopologyDictionary dict; + bool hasAvailableDict = false; + if (!dictfile.empty()) { + std::ifstream file(dictfile.c_str()); + if (file.good()) { + LOGP(info, "Running with external topology dictionary: {}", dictfile); + dict.readFromFile(dictfile); + LOGP(info, "The IB dictionary size is {}, and the OB dictionary size is {}", dict.getSize(true), dict.getSize(false)); + hasAvailableDict = dict.getSize(true) != 0 && dict.getSize(false) != 0; + if (hasAvailableDict) { + LOGP(info, "Dictionaries is vaild."); + } else { + LOGP(info, "Dictionaries is NOT vaild!"); + } + } else { + LOGP(info, "Cannot open dictionary file: {}. Running without external dictionary!", dictfile); + dictfile = ""; + } + } else { + LOGP(info, "Running without external topology dictionary!"); + } + + // ROFrecords + std::vector rofRecVec, *rofRecVecP = &rofRecVec; + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); + + // Cluster MC labels + o2::dataformats::MCTruthContainer* clusLabArr = nullptr; + std::vector mc2rofVec, *mc2rofVecP = &mc2rofVec; + if ((hitTree != nullptr) && (clusTree->GetBranch("ITSClusterMCTruth") != nullptr)) { + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); + } + + clusTree->GetEntry(0); + unsigned int nROFRec = (int)rofRecVec.size(); + std::vector mcEvMin(nROFRec, hitTree->GetEntries()); + std::vector mcEvMax(nROFRec, -1); + + // Build min and max MC events used by each ROF + for (int imc = mc2rofVec.size(); imc--;) { + const auto& mc2rof = mc2rofVec[imc]; + if (mc2rof.rofRecordID < 0) { + continue; // this MC event did not contribute to any ROF + } + for (unsigned int irfd = mc2rof.maxROF - mc2rof.minROF + 1; irfd--;) { + unsigned int irof = mc2rof.rofRecordID + irfd; + if (irof >= nROFRec) { + LOGP(error, "ROF = {} from MC2ROF record is >= N ROFs = {}", irof, nROFRec); + } + if (mcEvMin[irof] > imc) { + mcEvMin[irof] = imc; + } + if (mcEvMax[irof] < imc) { + mcEvMax[irof] = imc; + } + } + } + + // Create all plots + LOGP(info, "Creating plots"); + std::unordered_map data; + auto initData = [&](int chipID, Data& dat) { + if (dat.pixelArray) + return; + + int nCol{0}, nRow{0}; + float lengthPixArr{0}, widthPixArr{0}; + bool isIB = o2::its3::constants::detID::isDetITS3(chipID); + int layer = gman->getLayer(chipID); + if (isIB) { + nCol = o2::its3::SegmentationMosaix::NCols; + nRow = o2::its3::SegmentationMosaix::NRows; + lengthPixArr = o2::its3::constants::pixelarray::pixels::mosaix::pitchZ * nCol; + widthPixArr = o2::its3::constants::pixelarray::pixels::mosaix::pitchX * nRow; + } else { + nCol = o2::itsmft::SegmentationAlpide::NCols; + nRow = o2::itsmft::SegmentationAlpide::NRows; + lengthPixArr = o2::itsmft::SegmentationAlpide::PitchCol * nCol; + widthPixArr = o2::itsmft::SegmentationAlpide::PitchRow * nRow; + } + + dat.pixelArray = new TH2F(Form("histSensor_%d", chipID), Form("SensorID=%d;z(cm);x(cm)", chipID), + nCol, -0.5 * lengthPixArr, 0.5 * lengthPixArr, + nRow, -0.5 * widthPixArr, 0.5 * widthPixArr); + dat.hitS = new TGraph(); + dat.hitS->SetMarkerStyle(kFullTriangleDown); + dat.hitS->SetMarkerColor(kGreen); + dat.hitM = new TGraph(); + dat.hitM->SetMarkerStyle(kFullCircle); + dat.hitM->SetMarkerColor(kGreen + 3); + dat.hitE = new TGraph(); + dat.hitE->SetMarkerStyle(kFullTriangleUp); + dat.hitE->SetMarkerColor(kGreen + 5); + dat.clusS = new TGraph(); + dat.clusS->SetMarkerStyle(kFullSquare); + dat.clusS->SetMarkerColor(kBlue); + dat.cog = new TGraph(); + dat.cog->SetMarkerStyle(kFullDiamond); + dat.cog->SetMarkerColor(kRed); + dat.leg = new TLegend(0.7, 0.7, 0.92, 0.92); + dat.leg->AddEntry(dat.hitS, "Hit Start"); + dat.leg->AddEntry(dat.hitM, "Hit Middle"); + dat.leg->AddEntry(dat.hitE, "Hit End"); + dat.leg->AddEntry(dat.clusS, "Cluster Start"); + dat.leg->AddEntry(dat.cog, "Cluster COG"); + dat.vClusBox = new std::vector; + }; + + LOGP(info, "Filling digits"); + for (int iDigit{0}; digTree->LoadTree(iDigit) >= 0; ++iDigit) { + digTree->GetEntry(iDigit); + for (const auto& digit : *digArr) { + const auto chipID = digit.getChipIndex(); + if (!selectedChips.count(chipID)) + continue; + const auto layer = gman->getLayer(chipID); + bool isIB = layer < 3; + float locDigiX{0}, locDigiZ{0}; + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(digit.getRow(), digit.getColumn(), locDigiX, locDigiZ); + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(digit.getRow(), digit.getColumn(), locDigiX, locDigiZ); + } + auto& dat = data[chipID]; + initData(chipID, dat); + data[chipID].pixelArray->Fill(locDigiZ, locDigiX); + } + } + + LOGP(info, "Building min and max MC events used by each ROF, total ROFs {}", nROFRec); + auto pattIt = patternsPtr->cbegin(); + bool isAllPattIDInvaild{true}; + for (unsigned int irof{0}; irof < nROFRec; irof++) { + const auto& rofRec = rofRecVec[irof]; + // >> read and map MC events contributing to this ROF + for (int im = mcEvMin[irof]; im <= mcEvMax[irof]; im++) { + if (hitVecPool[im] == nullptr) { + hitTree->SetBranchAddress("IT3Hit", &hitVecPool[im]); + hitTree->GetEntry(im); + auto& mc2hit = mc2hitVec[im]; + const auto* hitArray = hitVecPool[im]; + for (int ih = hitArray->size(); ih--;) { + const auto& hit = (*hitArray)[ih]; + uint64_t key = (uint64_t(hit.GetTrackID()) << 32) + hit.GetDetectorID(); + mc2hit.emplace(key, ih); + } + } + } + + // Clusters in this ROF + for (int icl{0}; icl < rofRec.getNEntries(); icl++) { + int clEntry = rofRec.getFirstEntry() + icl; // entry of icl-th cluster of this ROF in the vector of clusters + const auto& cluster = (*clusArr)[clEntry]; + const auto chipID = cluster.getSensorID(); + if (!selectedChips.count(chipID)) { + // Even if not selected, advance pattIt if patternID is InvalidPatternID + if (cluster.getPatternID() == o2::itsmft::CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + const auto pattID = cluster.getPatternID(); + const bool isIB = o2::its3::constants::detID::isDetITS3(chipID); + const auto layer = gman->getLayer(chipID); + auto& dat = data[chipID]; + initData(chipID, dat); + o2::itsmft::ClusterPattern pattern; + // Pattern extraction + if (cluster.getPatternID() != o2::itsmft::CompCluster::InvalidPatternID) { + isAllPattIDInvaild = false; + if (!hasAvailableDict) { + LOGP(error, "Encountered pattern ID {}, which is not equal to the invalid pattern ID {}", cluster.getPatternID(), o2::itsmft::CompCluster::InvalidPatternID); + LOGP(error, "Clusters have already been generated with a dictionary which was not provided properly!"); + return; + } + if (dict.isGroup(cluster.getPatternID(), isIB)) { + pattern.acquirePattern(pattIt); + } else { + pattern = dict.getPattern(cluster.getPatternID(), isIB); + } + } else { + pattern.acquirePattern(pattIt); + } + + // Hits + const auto& lab = (clusLabArr->getLabels(clEntry))[0]; + if (!lab.isValid()) + continue; + const int trID = lab.getTrackID(); + const auto& mc2hit = mc2hitVec[lab.getEventID()]; + const auto* hitArray = hitVecPool[lab.getEventID()]; + uint64_t key = (uint64_t(trID) << 32) + chipID; + auto hitEntry = mc2hit.find(key); + if (hitEntry == mc2hit.end()) + continue; + o2::math_utils::Point3D locHMiddle; + const auto& hit = (*hitArray)[hitEntry->second]; + auto locHEnd = gman->getMatrixL2G(chipID) ^ (hit.GetPos()); + auto locHStart = gman->getMatrixL2G(chipID) ^ (hit.GetPosStart()); + if (isIB) { + float xFlat{0.}, yFlat{0.}; + mMosaixSegmentations[layer].curvedToFlat(locHEnd.X(), locHEnd.Y(), xFlat, yFlat); + locHEnd.SetXYZ(xFlat, yFlat, locHEnd.Z()); + mMosaixSegmentations[layer].curvedToFlat(locHStart.X(), locHStart.Y(), xFlat, yFlat); + locHStart.SetXYZ(xFlat, yFlat, locHStart.Z()); + } + locHMiddle.SetXYZ(0.5f * (locHEnd.X() + locHStart.X()), + 0.5f * (locHEnd.Y() + locHStart.Y()), + 0.5f * (locHEnd.Z() + locHStart.Z())); + data[chipID].hitS->AddPoint(locHStart.Z(), locHStart.X()); + data[chipID].hitM->AddPoint(locHMiddle.Z(), locHMiddle.X()); + data[chipID].hitE->AddPoint(locHEnd.Z(), locHEnd.X()); + + // Cluster Start point + float locCluX{0}, locCluZ{0}; + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow(), cluster.getCol(), locCluX, locCluZ); + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow(), cluster.getCol(), locCluX, locCluZ); + } + data[chipID].clusS->AddPoint(locCluZ, locCluX); + + // COG + o2::math_utils::Point3D locCOG; + // Cluster COG using dictionary (if available) + if (hasAvailableDict && (pattID != o2::itsmft::CompCluster::InvalidPatternID && !dict.isGroup(pattID, isIB))) { + locCOG = dict.getClusterCoordinates(cluster); + } else { + if (isIB) { + locCOG = o2::its3::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); + } else { + locCOG = o2::itsmft::TopologyDictionary::getClusterCoordinates(cluster, pattern, false); + } + } + if (isIB) { + float flatX{0}, flatY{0}; + mMosaixSegmentations[layer].curvedToFlat(locCOG.X(), locCOG.Y(), flatX, flatY); + locCOG.SetCoordinates(flatX, flatY, locCOG.Z()); + } + data[chipID].cog->AddPoint(locCOG.Z(), locCOG.X()); + + // Cluster Box using dictionary if available, otherwise use raw pattern + float lowLeftX{0}, lowLeftZ{0}, topRightX{0}, topRightZ{0}; + // Use dictionary-based cluster box + if (isIB) { + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow(), cluster.getCol(), lowLeftX, lowLeftZ); + mMosaixSegmentations[layer].detectorToLocal(cluster.getRow() + pattern.getRowSpan() - 1, + cluster.getCol() + pattern.getColumnSpan() - 1, + topRightX, topRightZ); + lowLeftX += 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchX; + lowLeftZ -= 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchZ; + topRightX -= 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchX; + topRightZ += 0.5 * o2::its3::constants::pixelarray::pixels::mosaix::pitchZ; + } else { + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow(), cluster.getCol(), lowLeftX, lowLeftZ); + o2::itsmft::SegmentationAlpide::detectorToLocal(cluster.getRow() + pattern.getRowSpan() - 1, + cluster.getCol() + pattern.getColumnSpan() - 1, + topRightX, topRightZ); + lowLeftX += 0.5 * o2::itsmft::SegmentationAlpide::PitchRow; + lowLeftZ -= 0.5 * o2::itsmft::SegmentationAlpide::PitchCol; + topRightX -= 0.5 * o2::itsmft::SegmentationAlpide::PitchRow; + topRightZ += 0.5 * o2::itsmft::SegmentationAlpide::PitchCol; + } + auto clusBox = new TBox(lowLeftZ, lowLeftX, topRightZ, topRightX); + clusBox->SetFillColorAlpha(0, 0); + clusBox->SetFillStyle(0); + clusBox->SetLineWidth(4); + clusBox->SetLineColor(kBlack); + data[chipID].vClusBox->push_back(clusBox); + } + } + + if (isAllPattIDInvaild) { + LOGP(info, "Verified input cluster file was generated w/o topology dictionary"); + if (!dictfile.empty()) { + LOGP(error, "Non-dictionary cluster file processed by external dictionary! Please adjust input."); + return; + } + } + + LOGP(info, "Writing to root file"); + double x1, y1, x2, y2; + auto oFileOut = TFile::Open("CompareClustersAndDigitsOnChip.root", "RECREATE"); + oFileOut->cd(); + for (int chipID{0}; chipID < nChips; chipID++) { + if (!selectedChips.count(chipID)) + continue; + auto& dat = data[chipID]; + TString tpath = gman->getMatrixPath(chipID); + const std::string cpath{tpath.Data() + 39, tpath.Data() + tpath.Length()}; + const std::filesystem::path p{cpath}; + std::string nestedDir = p.parent_path().string(); + TDirectory* currentDir = oFileOut; + std::istringstream iss(nestedDir); + std::string token; + while (std::getline(iss, token, '/')) { + if (token.empty()) + continue; + TDirectory* nextDir = currentDir->GetDirectory(token.c_str()); + if (!nextDir) { + nextDir = currentDir->mkdir(token.c_str()); + } + if (!nextDir) { + LOGP(error, "Cannot create subdirectory: %s", token.c_str()); + break; + } + currentDir = nextDir; + currentDir->cd(); + } + if (!currentDir) { + LOGP(error, "Failed to create nested directory for chip %d", chipID); + continue; + } + + auto canv = new TCanvas(Form("%s_%d", p.filename().c_str(), chipID)); + canv->SetTitle(Form("%s_%d", p.filename().c_str(), chipID)); + canv->cd(); + gPad->SetGrid(1, 1); + dat.pixelArray->Draw("colz"); + dat.hitS->Draw("p;same"); + dat.hitM->Draw("p;same"); + dat.hitE->Draw("p;same"); + auto arr = new TArrow(); + arr->SetArrowSize(0.01); + for (int i{0}; i < dat.hitS->GetN(); ++i) { + dat.hitS->GetPoint(i, x1, y1); + dat.hitE->GetPoint(i, x2, y2); + arr->DrawArrow(x1, y1, x2, y2); + } + dat.clusS->Draw("p;same"); + if (dat.cog->GetN() != 0) + dat.cog->Draw("p;same"); + for (const auto& clusBox : *dat.vClusBox) { + clusBox->Draw(); + } + dat.leg->Draw(); + canv->SetEditable(false); + + currentDir->WriteTObject(canv, canv->GetName()); + dat.clear(); + delete canv; + delete arr; + printf("\rWriting chip %05d", chipID); + } + printf("\n"); + oFileOut->Write(); + oFileOut->Close(); + LOGP(info, "Finished writing selected chip visualizations."); +} \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C b/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C new file mode 100644 index 0000000000000..634d761366920 --- /dev/null +++ b/Detectors/Upgrades/ITS3/macros/test/CorrTracksClusters.C @@ -0,0 +1,638 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#if !defined(__CLING__) || defined(__ROOTCLING__) +#include +#include "TEfficiency.h" +#include +#include +#include + +#include "ITSMFTSimulation/Hit.h" +#include "DataFormatsITS/TrackITS.h" +#include "DetectorsBase/Propagator.h" +#include "Field/MagneticField.h" +#include "ITSBase/GeometryTGeo.h" +#include "DataFormatsITSMFT/CompCluster.h" +#include "SimulationDataFormat/MCCompLabel.h" +#include "SimulationDataFormat/MCEventHeader.h" +#include "SimulationDataFormat/MCTrack.h" +#include "DataFormatsITSMFT/ROFRecord.h" +#include "SimulationDataFormat/MCTruthContainer.h" +#include "SimulationDataFormat/TrackReference.h" +#include "ITS3Reconstruction/TopologyDictionary.h" +#include "ITSMFTBase/SegmentationAlpide.h" +#include "ITS3Base/SegmentationMosaix.h" + +#include +#include +#include +#include +#endif + +using namespace std; +using namespace o2::itsmft; +using namespace o2::its; +using SegmentationIB = o2::its3::SegmentationMosaix; +using SegmentationOB = o2::itsmft::SegmentationAlpide; +static constexpr int kNLayer = 7; +static constexpr int INVALID_INT = -99; +static constexpr float INVALID_FLOAT = -99.f; + +//______________________________________________________________________________ +// ParticleInfo structure +struct ParticleInfo { + int event{}; + int pdg{}; + float pt{}; + float recpt{}; + float eta{}; + float phi{}; + float pvx{}; + float pvy{}; + float pvz{}; + float dcaxy{}; + float dcaz{}; + int mother{}; + int first{}; + unsigned short clusters = 0u; + unsigned char isReco = 0u; + unsigned char isFake = 0u; + bool isPrimary = false; + unsigned char storedStatus = 2; /// not stored = 2, fake = 1, good = 0 + std::array clusterSize; + std::array clusterPattern; + std::array clusterLocX; + std::array clusterLocZ; + std::array hitLocX; + std::array hitLocY; + std::array hitLocZ; + o2::its::TrackITS track; + ParticleInfo() + { + clusterSize.fill(INVALID_INT); + clusterPattern.fill(INVALID_INT); + clusterLocX.fill(INVALID_FLOAT); + clusterLocZ.fill(INVALID_FLOAT); + hitLocX.fill(INVALID_FLOAT); + hitLocY.fill(INVALID_FLOAT); + hitLocZ.fill(INVALID_FLOAT); + } +}; + +//______________________________________________________________________________ +// Convert curved local coordinates to flat coordinates +void CurvedLocalToFlat(o2::math_utils::Point3D& point, const SegmentationIB& seg) +{ + float xFlat = 0.f, yFlat = 0.f; + seg.curvedToFlat(point.X(), point.Y(), xFlat, yFlat); + point.SetXYZ(xFlat, yFlat, point.Z()); +} + +//______________________________________________________________________________ +// Resolve pattern from patternID and iterator +bool resolvePattern(const o2::itsmft::CompClusterExt& cluster, + decltype(std::declval>().cbegin())& pattIt, + const o2::its3::TopologyDictionary& dict, + bool isIB, + o2::itsmft::ClusterPattern& pattOut) +{ + auto pattID = cluster.getPatternID(); + if (pattID != o2::itsmft::CompCluster::InvalidPatternID) { + if (!dict.getSize(true) && !dict.getSize(false)) { + LOGP(error, "Encountered non-invalid pattern ID {} but dictionary is missing!", pattID); + return false; + } + if (dict.isGroup(pattID, isIB)) { + pattOut.acquirePattern(pattIt); + } else { + pattOut = dict.getPattern(pattID, isIB); + } + } else { + pattOut.acquirePattern(pattIt); + } + return true; +} + +//______________________________________________________________________________ +// Function to analyze reconstructed tracks +void analyzeRecoTracks(TTree* recTree, + const std::vector* recArr, + const std::vector* trkLabArr, + std::vector>& info, + float bz, + ULong_t& unaccounted, + ULong_t& good, + ULong_t& fakes, + ULong_t& total) +{ + unaccounted = good = fakes = total = 0; + for (int frame = 0; frame < recTree->GetEntriesFast(); frame++) { // reco tracks frames + if (recTree->GetEvent(frame) == 0) + continue; + total += trkLabArr->size(); + for (unsigned int iTrack = 0; iTrack < trkLabArr->size(); ++iTrack) { + auto lab = trkLabArr->at(iTrack); + if (!lab.isSet()) { + unaccounted++; + continue; + } + int trackID, evID, srcID; + bool fake; + lab.get(trackID, evID, srcID, fake); + if (evID < 0 || evID >= (int)info.size()) { + unaccounted++; + continue; + } + if (trackID < 0 || trackID >= (int)info[evID].size()) { + unaccounted++; + continue; + } + info[evID][trackID].isReco += !fake; + info[evID][trackID].isFake += fake; + if (recArr->at(iTrack).isBetter(info[evID][trackID].track, 1.e9)) { + info[evID][trackID].storedStatus = fake; + info[evID][trackID].track = recArr->at(iTrack); + float ip[2]{0., 0.}; + info[evID][trackID].track.getImpactParams(info[evID][trackID].pvx, + info[evID][trackID].pvy, + info[evID][trackID].pvz, bz, ip); + info[evID][trackID].dcaxy = ip[0]; + info[evID][trackID].dcaz = ip[1]; + info[evID][trackID].recpt = info[evID][trackID].track.getPt(); + } + fakes += static_cast(fake); + good += static_cast(!fake); + } + } + LOGP(info, "** Some statistics:"); + LOGP(info, "\t- Total number of tracks: {}", total); + LOGP(info, "\t- Total number of tracks not corresponding to particles: {} ({:.2f}%)", unaccounted, unaccounted * 100. / total); + LOGP(info, "\t- Total number of fakes: {} ({:.2f}%)", fakes, fakes * 100. / total); + LOGP(info, "\t- Total number of good: {} ({:.2f}%)", good, good * 100. / total); +} + +//______________________________________________________________________________ +// Read and map hit information from hitTree +void mapHitsForMCEvents(TTree* hitTree, + std::vector*>& hitVecPool, + std::vector>& mc2hitVec, + const std::vector& mcEvMin, + const std::vector& mcEvMax, + size_t nROFRec) +{ + for (unsigned int irof = 0; irof < nROFRec; irof++) { + for (int im = mcEvMin[irof]; im <= mcEvMax[irof]; im++) { + if (!hitVecPool[im]) { + hitTree->SetBranchAddress("IT3Hit", &hitVecPool[im]); + hitTree->GetEntry(im); + auto& mc2hit = mc2hitVec[im]; + const auto* hitArray = hitVecPool[im]; + for (int ih = hitArray->size(); ih--;) { + const auto& hit = (*hitArray)[ih]; + uint64_t key = (uint64_t(hit.GetTrackID()) << 32) + hit.GetDetectorID(); + mc2hit.emplace(key, ih); + } + } + } + } +} + +//______________________________________________________________________________ +// Load geometry and magnetic field information +void loadGeometryAndField(const std::string& magfile, const std::string& inputGeom, float& bz, o2::its::GeometryTGeo*& gman) +{ + o2::base::Propagator::initFieldFromGRP(magfile); + bz = o2::base::Propagator::Instance()->getNominalBz(); + o2::base::GeometryManager::loadGeometry(inputGeom); + gman = o2::its::GeometryTGeo::Instance(); + gman->fillMatrixCache(o2::math_utils::bit2Mask(o2::math_utils::TransformType::T2L, + o2::math_utils::TransformType::T2GRot, + o2::math_utils::TransformType::L2G)); +} + +//______________________________________________________________________________ +// Load topology dictionary +void loadTopologyDictionary(const std::string& dictfile, o2::its3::TopologyDictionary& dict) +{ + std::ifstream iofile(dictfile); + if (iofile.good()) { + LOG(info) << "Running with dictionary: " << dictfile; + dict.readFromFile(dictfile); + } else { + LOG(info) << "Dictionary file not found: " << dictfile; + } +} + +//______________________________________________________________________________ +// Build ROF +void buildMcEvRangePerROF(const std::vector& mc2rofVec, + size_t nROFRec, + std::vector& mcEvMin, + std::vector& mcEvMax) +{ + for (size_t imc = 0; imc < mc2rofVec.size(); ++imc) { + const auto& mc2rof = mc2rofVec[imc]; + if (mc2rof.rofRecordID < 0) + continue; + for (size_t i = mc2rof.minROF; i <= mc2rof.maxROF; ++i) { + if (i >= nROFRec) + continue; + mcEvMin[i] = std::min(mcEvMin[i], static_cast(imc)); + mcEvMax[i] = std::max(mcEvMax[i], static_cast(imc)); + } + } +} + +//______________________________________________________________________________ +// Load Hits data +void prepareHitAccess(const std::string& hitfile, + TTree*& hitTree, + std::vector*>& hitVecPool, + std::vector>& mc2hitVec) +{ + TFile* fHit = TFile::Open(hitfile.data()); + hitTree = (TTree*)fHit->Get("o2sim"); + mc2hitVec.resize(hitTree->GetEntries()); + hitVecPool.resize(hitTree->GetEntries(), nullptr); +} + +void loadCluster(const std::string& clusfile, + TTree*& clusTree, + std::vector*& clusArr, + o2::dataformats::MCTruthContainer*& clusLabArr, + std::vector& mc2rofVec, + std::vector*& patternsPtr, + std::vector& rofRecVec) +{ + // Open file and let it persist + TFile* fileC = TFile::Open(clusfile.data()); + // Get tree + clusTree = dynamic_cast(fileC->Get("o2sim")); + // Cluster array + clusArr = nullptr; + clusTree->SetBranchAddress("ITSClusterComp", &clusArr); + // MC truth + clusLabArr = nullptr; + clusTree->SetBranchAddress("ITSClusterMCTruth", &clusLabArr); + clusTree->SetBranchAddress("ITSClusterPatt", &patternsPtr); + // ROF records + std::vector* rofRecVecP = &rofRecVec; + clusTree->SetBranchAddress("ITSClustersROF", &rofRecVecP); + // MC2ROF mapping + std::vector* mc2rofVecP = &mc2rofVec; + clusTree->SetBranchAddress("ITSClustersMC2ROF", &mc2rofVecP); + clusTree->GetEntry(0); + // After setting all branch addresses, trigger preload of the first entr +} + +//______________________________________________________________________________ +// Load Reconstructed Tracks data +void loadRecoTracks(const std::string& tracfile, + TTree*& recTree, + std::vector*& recArr, + std::vector*& trkLabArr) +{ + TFile* fTrk = TFile::Open(tracfile.data()); + recTree = (TTree*)fTrk->Get("o2sim"); + recTree->SetBranchAddress("ITSTrack", &recArr); + recTree->SetBranchAddress("ITSTrackMCTruth", &trkLabArr); +} + +//______________________________________________________________________________ +// Load MC Track information +void loadMCTrackInfo(const std::string& kinefile, + std::vector>& info, + std::vector*& mcArr, + o2::dataformats::MCEventHeader*& mcEvent, + TTree*& mcTree) +{ + TFile* kineFile = TFile::Open(kinefile.data()); + mcTree = (TTree*)kineFile->Get("o2sim"); + mcTree->SetBranchStatus("*", 0); + mcTree->SetBranchStatus("MCTrack*", 1); + mcTree->SetBranchStatus("MCEventHeader*", 1); + mcTree->SetBranchAddress("MCTrack", &mcArr); + mcTree->SetBranchAddress("MCEventHeader.", &mcEvent); + + int nev = mcTree->GetEntriesFast(); + info.resize(nev); + for (int n = 0; n < nev; n++) { + mcTree->GetEvent(n); + info[n].resize(mcArr->size()); + for (unsigned int mcI = 0; mcI < mcArr->size(); ++mcI) { + auto part = mcArr->at(mcI); + info[n][mcI].pvx = mcEvent->GetX(); + info[n][mcI].pvy = mcEvent->GetY(); + info[n][mcI].pvz = mcEvent->GetZ(); + info[n][mcI].event = n; + info[n][mcI].pdg = part.GetPdgCode(); + info[n][mcI].pt = part.GetPt(); + info[n][mcI].phi = part.GetPhi(); + info[n][mcI].eta = part.GetEta(); + info[n][mcI].isPrimary = part.isPrimary(); + } + } +} + +//______________________________________________________________________________ +// Main function CorrTracksClusters +void CorrTracksClusters(const std::string& tracfile = "o2trac_its.root", + const std::string& clusfile = "o2clus_its.root", + const std::string& kinefile = "o2sim_Kine.root", + const std::string& magfile = "o2sim_grp.root", + const std::string& hitfile = "o2sim_HitsIT3.root", + const std::string& dictfile = "IT3dictionary.root", + const std::string& inputGeom = "", + bool batch = false) +{ + gROOT->SetBatch(batch); + + // Geo and Field + LOGP(info, "Geo and Field loading"); + float bz{0.f}; + o2::its::GeometryTGeo* gman = nullptr; + loadGeometryAndField(magfile, inputGeom, bz, gman); + LOGP(info, "Finished Geo and Field loading"); + + // MC tracks + LOGP(info, "MC Track Info loading"); + std::vector* mcArr = nullptr; + o2::dataformats::MCEventHeader* mcEvent = nullptr; + TTree* mcTree = nullptr; + std::vector> info; + loadMCTrackInfo(kinefile, info, mcArr, mcEvent, mcTree); + LOGP(info, "Finished MC Track Info loading"); + + // Reconstructed tracks + LOGP(info, "Reco Tracks loading"); + TTree* recTree = nullptr; + std::vector* recArr = nullptr; + std::vector* trkLabArr = nullptr; + loadRecoTracks(tracfile, recTree, recArr, trkLabArr); + LOGP(info, "Finished Reco Tracks loading"); + + // Run analyzeRecoTracks + LOGP(info, "Track analysis (analyzeRecoTracks)"); + ULong_t unaccounted{0}, good{0}, fakes{0}, total{0}; + analyzeRecoTracks(recTree, recArr, trkLabArr, info, bz, unaccounted, good, fakes, total); + LOGP(info, "Finished track analysis (analyzeRecoTracks)"); + + // Topology dictionary + LOGP(info, "Topology Dictionary loading"); + o2::its3::TopologyDictionary dict; + loadTopologyDictionary(dictfile, dict); + LOGP(info, "Finished Topology Dictionary loading"); + + // Clusters + LOGP(info, "Cluster Data loading"); + TTree* clusTree = nullptr; + std::vector* clusArr = nullptr; + o2::dataformats::MCTruthContainer* clusLabArr = nullptr; + std::vector* patternsPtr = nullptr; + std::vector mc2rofVec; + std::vector rofRecVec; + loadCluster(clusfile, clusTree, clusArr, clusLabArr, mc2rofVec, patternsPtr, rofRecVec); + LOGP(info, "Finished Cluster Data loading"); + // clusTree->GetEntry(0); + + // Hits + LOGP(info, "Hits loading"); + TTree* hitTree = nullptr; + std::vector*> hitVecPool; + std::vector> mc2hitVec; + prepareHitAccess(hitfile, hitTree, hitVecPool, mc2hitVec); + LOGP(info, "Finished Hits loading"); + + // Build min and max MC events used by each ROF + LOGP(info, "Building MC event ranges"); + std::vector mcEvMin, mcEvMax; + mcEvMin.assign(rofRecVec.size(), hitTree->GetEntries()); + mcEvMax.assign(rofRecVec.size(), -1); + buildMcEvRangePerROF(mc2rofVec, rofRecVec.size(), mcEvMin, mcEvMax); + LOGP(info, "Initial MC event ranges built"); + unsigned int nROFRec = rofRecVec.size(); + + // Map hits for MC events + LOGP(info, "Map hits for MC events"); + mapHitsForMCEvents(hitTree, hitVecPool, mc2hitVec, mcEvMin, mcEvMax, nROFRec); + LOGP(info, "Mapped hits for MC events"); + + // Run cluster particle matching + auto pattIt = patternsPtr->cbegin(); + for (unsigned int iClus = 0; iClus < clusArr->size(); ++iClus) { + auto lab = (clusLabArr->getLabels(iClus))[0]; + const auto& c = (*clusArr)[iClus]; + // Ensure pattIt is advanced even if cluster is skipped + if (!lab.isValid() || lab.getSourceID() != 0 || !lab.isCorrect()) { + if (c.getPatternID() == CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + + int trackID{0}, evID{0}, srcID{0}; + bool fake{false}; + lab.get(trackID, evID, srcID, fake); + if (evID < 0 || static_cast(evID) >= info.size() || trackID < 0 || static_cast(trackID) >= info[evID].size()) { + if (c.getPatternID() == CompCluster::InvalidPatternID) { + o2::itsmft::ClusterPattern::skipPattern(pattIt); + } + continue; + } + UShort_t chipID = c.getSensorID(); + int layer = gman->getLayer(chipID); + bool isIB = layer < 3; + info[evID][trackID].clusters |= 1 << layer; + + o2::math_utils::Point3D clusterPos; + int clusterSize; + auto pattID = c.getPatternID(); + o2::itsmft::ClusterPattern patt; + if (!resolvePattern(c, pattIt, dict, isIB, patt)) { + continue; + } + clusterSize = patt.getNPixels(); + clusterPos = dict.getClusterCoordinates(c, patt, false); + + if (isIB) { + CurvedLocalToFlat(clusterPos, SegmentationIB(layer)); + } + + info[evID][trackID].clusterSize[layer] = clusterSize; + info[evID][trackID].clusterPattern[layer] = pattID; + info[evID][trackID].clusterLocX[layer] = clusterPos.X(); + info[evID][trackID].clusterLocZ[layer] = clusterPos.Z(); + + const auto& mc2hit = mc2hitVec[lab.getEventID()]; + const auto* hitArray = hitVecPool[lab.getEventID()]; + uint64_t key = (uint64_t(trackID) << 32) + c.getSensorID(); + auto hitIt = mc2hit.find(key); + if (hitIt == mc2hit.end()) + continue; + const auto& hit = (*hitArray)[hitIt->second]; + + auto hitLocSta = gman->getMatrixL2G(chipID) ^ hit.GetPosStart(); + auto hitLocEnd = gman->getMatrixL2G(chipID) ^ hit.GetPos(); + + if (isIB) { + CurvedLocalToFlat(hitLocSta, SegmentationIB(layer)); + CurvedLocalToFlat(hitLocEnd, SegmentationIB(layer)); + info[evID][trackID].hitLocX[layer] = 0.5f * (hitLocSta.X() + hitLocEnd.X()); + info[evID][trackID].hitLocY[layer] = 0.5f * (hitLocSta.Y() + hitLocEnd.Y()); + info[evID][trackID].hitLocZ[layer] = 0.5f * (hitLocSta.Z() + hitLocEnd.Z()); + } else { + auto x0 = hitLocSta.X(), dx = hitLocEnd.X() - x0; + auto y0 = hitLocSta.Y(), dy = hitLocEnd.Y() - y0; + auto z0 = hitLocSta.Z(), dz = hitLocEnd.Z() - z0; + auto r = (0.5f * (SegmentationOB::SensorLayerThickness - SegmentationOB::SensorLayerThicknessEff) - y0) / dy; + info[evID][trackID].hitLocX[layer] = x0 + r * dx; + info[evID][trackID].hitLocY[layer] = y0 + r * dy; + info[evID][trackID].hitLocZ[layer] = z0 + r * dz; + } + } + + LOGP(info, "Finished cluster-to-particle matching"); + + // The following part generates statistical histograms and outputs a TTree + int nb = 100; + double xbins[nb + 1], ptcutl = 0.01, ptcuth = 10.; + double a = std::log(ptcuth / ptcutl) / nb; + for (int i = 0; i <= nb; ++i) { + xbins[i] = ptcutl * std::exp(i * a); + } + auto* h_pt_num = new TH1D("h_pt_num", ";#it{p}_{T} (GeV/#it{c});Number of tracks", nb, xbins); + auto* h_pt_den = new TH1D("h_pt_den", ";#it{p}_{T} (GeV/#it{c});Number of generated primary particles", nb, xbins); + auto* h_pt_eff = new TEfficiency("h_pt_eff", "Tracking Efficiency;#it{p}_{T} (GeV/#it{c});Eff.", nb, xbins); + + auto* h_eta_num = new TH1D("h_eta_num", ";#it{#eta};Number of tracks", 60, -3, 3); + auto* h_eta_den = new TH1D("h_eta_den", ";#it{#eta};Number of generated particles", 60, -3, 3); + auto* h_eta_eff = new TEfficiency("h_eta_eff", "Tracking Efficiency;#it{#eta};Eff.", 60, -3, 3); + + auto* h_phi_num = new TH1D("h_phi_num", ";#varphi;Number of tracks", 360, 0., 2 * TMath::Pi()); + auto* h_phi_den = new TH1D("h_phi_den", ";#varphi;Number of generated particles", 360, 0., 2 * TMath::Pi()); + auto* h_phi_eff = new TEfficiency("h_phi_eff", "Tracking Efficiency;#varphi;Eff.", 360, 0., 2 * TMath::Pi()); + + auto* h_pt_fake = new TH1D("h_pt_fake", ";#it{p}_{T} (GeV/#it{c});Number of fake tracks", nb, xbins); + auto* h_pt_multifake = new TH1D("h_pt_multifake", ";#it{p}_{T} (GeV/#it{c});Number of multifake tracks", nb, xbins); + auto* h_pt_clones = new TH1D("h_pt_clones", ";#it{p}_{T} (GeV/#it{c});Number of cloned tracks", nb, xbins); + auto* h_dcaxy_vs_pt = new TH2D("h_dcaxy_vs_pt", ";#it{p}_{T} (GeV/#it{c});DCA_{xy} (#mum)", nb, xbins, 2000, -500., 500.); + auto* h_dcaxy_vs_eta = new TH2D("h_dcaxy_vs_eta", ";#it{#eta};DCA_{xy} (#mum)", 60, -3, 3, 2000, -500., 500.); + auto* h_dcaxy_vs_phi = new TH2D("h_dcaxy_vs_phi", ";#varphi;DCA_{xy} (#mum)", 360, 0., 2 * TMath::Pi(), 2000, -500., 500.); + auto* h_dcaz_vs_pt = new TH2D("h_dcaz_vs_pt", ";#it{p}_{T} (GeV/#it{c});DCA_{z} (#mum)", nb, xbins, 2000, -500., 500.); + auto* h_dcaz_vs_eta = new TH2D("h_dcaz_vs_eta", ";#it{#eta};DCA_{z} (#mum)", 60, -3, 3, 2000, -500., 500.); + auto* h_dcaz_vs_phi = new TH2D("h_dcaz_vs_phi", ";#varphi;DCA_{z} (#mum)", 360, 0., 2 * TMath::Pi(), 2000, -500., 500.); + auto* h_chi2 = new TH2D("h_chi2", ";#it{p}_{T} (GeV/#it{c});#chi^{2};Number of tracks", nb, xbins, 200, 0., 100.); + + for (auto& evInfo : info) { + for (auto& part : evInfo) { + if ((part.clusters & 0x7f) != 0x7f) { + // part.clusters != 0x3f && part.clusters != 0x3f << 1 && + // part.clusters != 0x1f && part.clusters != 0x1f << 1 && part.clusters + // != 0x1f << 2 && part.clusters != 0x0f && part.clusters != 0x0f << 1 + // && part.clusters != 0x0f << 2 && part.clusters != 0x0f << 3) { + continue; + } + if (!part.isPrimary) { + continue; + } + + h_pt_den->Fill(part.pt); + h_eta_den->Fill(part.eta); + h_phi_den->Fill(part.phi); + + if (part.isReco != 0u) { + h_pt_num->Fill(part.pt); + h_eta_num->Fill(part.eta); + h_phi_num->Fill(part.phi); + if (std::abs(part.eta) < 0.5) { + h_dcaxy_vs_pt->Fill(part.pt, part.dcaxy * 10000); + h_dcaz_vs_pt->Fill(part.pt, part.dcaz * 10000); + } + h_dcaz_vs_eta->Fill(part.eta, part.dcaz * 10000); + h_dcaxy_vs_eta->Fill(part.eta, part.dcaxy * 10000); + h_dcaxy_vs_phi->Fill(part.phi, part.dcaxy * 10000); + h_dcaz_vs_phi->Fill(part.phi, part.dcaz * 10000); + + h_chi2->Fill(part.pt, part.track.getChi2()); + + if (part.isReco > 1) { + for (int _i{0}; _i < part.isReco - 1; ++_i) { + h_pt_clones->Fill(part.pt); + } + } + } + if (part.isFake != 0u) { + h_pt_fake->Fill(part.pt); + if (part.isFake > 1) { + for (int _i{0}; _i < part.isFake - 1; ++_i) { + h_pt_multifake->Fill(part.pt); + } + } + } + } + } + + LOGP(info, "Streaming output TTree to file"); + TFile file("CorrTracksClusters.root", "recreate"); + TTree tree("ParticleInfo", "ParticleInfo"); + ParticleInfo pInfo; + tree.Branch("particle", &pInfo); + for (auto& event : info) { + for (auto& part : event) { + int nCl{0}; + for (unsigned int bit{0}; bit < sizeof(pInfo.clusters) * 8; ++bit) { + nCl += bool(part.clusters & (1 << bit)); + } + if (nCl < 3) { + continue; + } + pInfo = part; + tree.Fill(); + } + } + tree.Write(); + h_pt_num->Write(); + h_eta_num->Write(); + h_phi_num->Write(); + h_pt_den->Write(); + h_eta_den->Write(); + h_phi_den->Write(); + h_pt_multifake->Write(); + h_pt_fake->Write(); + h_dcaxy_vs_pt->Write(); + h_dcaz_vs_pt->Write(); + h_dcaxy_vs_eta->Write(); + h_dcaxy_vs_phi->Write(); + h_dcaz_vs_eta->Write(); + h_dcaz_vs_phi->Write(); + h_pt_clones->Write(); + h_chi2->Write(); + + h_pt_eff->SetTotalHistogram(*h_pt_den, ""); + h_pt_eff->SetPassedHistogram(*h_pt_num, ""); + h_pt_eff->SetTitle("Tracking Efficiency;#it{p}_{T} (GeV/#it{c});Eff."); + h_pt_eff->Write(); + + h_phi_eff->SetTotalHistogram(*h_phi_den, ""); + h_phi_eff->SetPassedHistogram(*h_phi_num, ""); + h_phi_eff->SetTitle("Tracking Efficiency;#varphi;Eff."); + h_phi_eff->Write(); + + h_eta_eff->SetTotalHistogram(*h_eta_den, ""); + h_eta_eff->SetPassedHistogram(*h_eta_num, ""); + h_eta_eff->SetTitle("Tracking Efficiency;#it{#eta};Eff."); + h_eta_eff->Write(); + + file.Close(); + LOGP(info, "Finished streaming output TTree to file"); + LOGP(info, "done."); +} diff --git a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C index cc241afb3357a..76d7bf09de77f 100644 --- a/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C +++ b/Detectors/Upgrades/ITS3/macros/test/CreateDictionariesITS3.C @@ -52,7 +52,7 @@ #endif -void CreateDictionariesITS3(bool saveDeltas = false, +void CreateDictionariesITS3(bool saveDeltas = true, float probThreshold = 1e-6, std::string clusDictFile = "", std::string clusfile = "o2clus_its.root", @@ -94,7 +94,7 @@ void CreateDictionariesITS3(bool saveDeltas = false, TNtuple* nt = nullptr; if (saveDeltas) { fout = TFile::Open("CreateDictionaries.root", "recreate"); - nt = new TNtuple("nt", "hashes ntuple", "hash:dx:dz"); + nt = new TNtuple("nt", "hashes ntuple", "hash:layer:chipID:xhf:zhf:xcf:zcf:dx:dz:outlimDx:outlimDz"); } const o2::steer::DigitizationContext* digContext = nullptr; @@ -284,19 +284,25 @@ void CreateDictionariesITS3(bool saveDeltas = false, dZ = xyzLocM.Z() - locC.Z(); dX /= (ib) ? o2::its3::SegmentationMosaix::PitchRow : o2::itsmft::SegmentationAlpide::PitchRow; dZ /= (ib) ? o2::its3::SegmentationMosaix::PitchCol : o2::itsmft::SegmentationAlpide::PitchCol; - if (saveDeltas) { - nt->Fill(topology.getHash(), dX, dZ); - } + + float outLimitDx{-1}, outLimitDz{-1}; if (checkOutliers > 0.) { - if (bool bX = std::abs(dX) > topology.getRowSpan() * checkOutliers, bZ = std::abs(dZ) > topology.getColumnSpan() * checkOutliers; bX || bZ) { // ignore outlier + outLimitDx = topology.getRowSpan() * checkOutliers; + outLimitDz = topology.getColumnSpan() * checkOutliers; + bool isOutDx = std::abs(dX) > outLimitDx; + bool isOutDz = std::abs(dZ) > outLimitDz; + if (isOutDx || isOutDz) { // ignore outlier (ib) ? ++cOutliersIB : ++cOutliersOB; - LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, bX); - LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, bZ); + LOGP(debug, "Ignored Value dX={} > {} * {} -> {}", dX, topology.getRowSpan(), checkOutliers, isOutDx); + LOGP(debug, "Ignored Value dZ={} > {} * {} -> {}", dZ, topology.getColumnSpan(), checkOutliers, isOutDz); dX = dZ = BuildTopologyDictionary::IgnoreVal; } else { (ib) ? ++cOkIB : ++cOkOB; } } + if (saveDeltas) { + nt->Fill(topology.getHash(), layer, chipID, xyzLocM.X(), xyzLocM.Z(), locC.X(), locC.Z(), dX, dZ, outLimitDx, outLimitDz); + } } } else { /* LOGP(info, " Failed to find MC hit entry for Tr: {} chipID: {}", trID, chipID); */ diff --git a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt index 2fad72a96426d..8c4722012224d 100644 --- a/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt +++ b/Detectors/Upgrades/ITS3/simulation/CMakeLists.txt @@ -15,8 +15,11 @@ o2_add_library(ITS3Simulation src/DescriptorInnerBarrelITS3.cxx src/Digitizer.cxx src/DigiParams.cxx + src/ITS3DPLDigitizerParam.cxx + src/ChipDigitsContainer.cxx + src/ChipSimResponse.cxx PUBLIC_LINK_LIBRARIES O2::SimulationDataFormat - O2::ITSBase O2::ITSMFTSimulation + O2::ITSBase O2::ITSMFTSimulation O2::ITSMFTBase ROOT::Physics) o2_target_root_dictionary(ITS3Simulation @@ -25,6 +28,9 @@ o2_target_root_dictionary(ITS3Simulation include/ITS3Simulation/DescriptorInnerBarrelITS3.h include/ITS3Simulation/Digitizer.h include/ITS3Simulation/DigiParams.h + include/ITS3Simulation/ITS3DPLDigitizerParam.h + include/ITS3Simulation/ChipDigitsContainer.h + include/ITS3Simulation/ChipSimResponse.h ) o2_data_file(COPY data DESTINATION Detectors/ITS3/simulation) diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h new file mode 100644 index 0000000000000..0c9627fe412c3 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipDigitsContainer.h @@ -0,0 +1,59 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3_CHIPDIGITSCONTAINER_ +#define ALICEO2_ITS3_CHIPDIGITSCONTAINER_ + +#include "ITSMFTBase/SegmentationAlpide.h" // Base class in o2::itsmft namespace +#include "ITSMFTSimulation/ChipDigitsContainer.h" // Base class in o2::itsmft namespace +#include "ITS3Base/SegmentationMosaix.h" // OB segmentation implementation +#include "ITS3Base/SpecsV2.h" // Provides SpecsV2::isDetITS3() interface +#include "ITS3Simulation/DigiParams.h" // ITS3-specific DigiParams interface +#include + +namespace o2::its3 +{ + +class ChipDigitsContainer : public o2::itsmft::ChipDigitsContainer +{ + private: + bool innerBarrel; ///< true if the chip belongs to the inner barrel (IB), false if outer barrel (OB) + int maxRows; ///< maximum number of rows + int maxCols; ///< maximum number of columns + + public: + explicit ChipDigitsContainer(UShort_t idx = 0); + + using SegmentationIB = SegmentationMosaix; + using SegmentationOB = o2::itsmft::SegmentationAlpide; + + /// Returns whether the chip is in the inner barrel (IB) + void setChipIndex(UShort_t idx) + { + o2::itsmft::ChipDigitsContainer::setChipIndex(idx); + innerBarrel = constants::detID::isDetITS3(getChipIndex()); + maxRows = innerBarrel ? SegmentationIB::NRows : SegmentationOB::NRows; + maxCols = innerBarrel ? SegmentationIB::NCols : SegmentationOB::NCols; + } + + int getMaxRows() const { return maxRows; } + int getMaxCols() const { return maxCols; } + bool isIB() const; + /// Adds noise digits, deleted the one using the itsmft::DigiParams interface + void addNoise(UInt_t rofMin, UInt_t rofMax, const o2::itsmft::DigiParams* params, int maxRows = o2::itsmft::SegmentationAlpide::NRows, int maxCols = o2::itsmft::SegmentationAlpide::NCols) = delete; + void addNoise(UInt_t rofMin, UInt_t rofMax, const o2::its3::DigiParams* params); + + ClassDefNV(ChipDigitsContainer, 1); +}; + +} // namespace o2::its3 + +#endif // ALICEO2_ITS3_CHIPDIGITSCONTAINER_ \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h new file mode 100644 index 0000000000000..f96fde9fb0d55 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ChipSimResponse.h @@ -0,0 +1,41 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H +#define ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H + +#include "ITSMFTSimulation/AlpideSimResponse.h" + +namespace o2 +{ +namespace its3 +{ + +class ChipSimResponse : public o2::itsmft::AlpideSimResponse +{ + public: + ChipSimResponse() = default; + ChipSimResponse(const ChipSimResponse& other) = default; + + float getRespCentreDep() const { return mRespCentreDep; } + void computeCentreFromData(); + void initData(int tableNumber, std::string dataPath, const bool quiet = true); + + private: + float mRespCentreDep = 0.f; + + ClassDef(ChipSimResponse, 1); +}; + +} // namespace its3 +} // namespace o2 + +#endif // ALICEO2_ITS3SIMULATION_CHIPSIMRESPONSE_H \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h index eca0a71949ba7..5764dfbd7d593 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/DigiParams.h @@ -13,21 +13,43 @@ #define ITS3_DIGIPARAMS_H #include "ITSMFTSimulation/DigiParams.h" +#include "ITS3Simulation/ChipSimResponse.h" namespace o2::its3 { class DigiParams final : public o2::itsmft::DigiParams { + private: + float mIBNoisePerPixel = 1.e-8; + int mIBChargeThreshold = 150; ///< charge threshold in Nelectrons + int mIBMinChargeToAccount = 15; ///< minimum charge contribution to account + int mIBNSimSteps = 18; ///< number of steps in response simulation + float mIBNSimStepsInv = 0; ///< its inverse + public: + DigiParams(); + + void setIBNoisePerPixel(float v) { mIBNoisePerPixel = v; } + float getIBNoisePerPixel() const { return mIBNoisePerPixel; } + + void setIBChargeThreshold(int v, float frac2Account = 0.1); + int getIBChargeThreshold() const { return mIBChargeThreshold; } + + void setIBNSimSteps(int v); + int getIBNSimSteps() const { return mIBNSimSteps; } + float getIBNSimStepsInv() const { return mIBNSimStepsInv; } + + int getIBMinChargeToAccount() const { return mIBMinChargeToAccount; } + const o2::itsmft::AlpideSimResponse* getAlpSimResponse() const = delete; void setAlpSimResponse(const o2::itsmft::AlpideSimResponse* par) = delete; const o2::itsmft::AlpideSimResponse* getOBSimResponse() const { return mOBSimResponse; } void setOBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mOBSimResponse = response; } - const o2::itsmft::AlpideSimResponse* getIBSimResponse() const { return mIBSimResponse; } - void setIBSimResponse(const o2::itsmft::AlpideSimResponse* response) { mIBSimResponse = response; } + o2::its3::ChipSimResponse* getIBSimResponse() const { return mIBSimResponse; } + void setIBSimResponse(o2::its3::ChipSimResponse* response); bool hasResponseFunctions() const { return mIBSimResponse != nullptr && mOBSimResponse != nullptr; } @@ -35,7 +57,7 @@ class DigiParams final : public o2::itsmft::DigiParams private: const o2::itsmft::AlpideSimResponse* mOBSimResponse = nullptr; //!< pointer to external response - const o2::itsmft::AlpideSimResponse* mIBSimResponse = nullptr; //!< pointer to external response + o2::its3::ChipSimResponse* mIBSimResponse = nullptr; //!< pointer to external response ClassDef(DigiParams, 1); }; diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h index 8d0f06a27343b..edc5583c03d5a 100644 --- a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/Digitizer.h @@ -21,12 +21,13 @@ #include "Rtypes.h" #include "TObject.h" -#include "ITSMFTSimulation/ChipDigitsContainer.h" #include "ITSMFTSimulation/AlpideSimResponse.h" #include "ITSMFTSimulation/Hit.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/SegmentationMosaix.h" #include "ITS3Simulation/DigiParams.h" +#include "ITS3Simulation/ChipDigitsContainer.h" +#include "ITS3Simulation/ChipSimResponse.h" #include "DataFormatsITSMFT/Digit.h" #include "DataFormatsITSMFT/ROFRecord.h" #include "CommonDataFormat/InteractionRecord.h" @@ -78,7 +79,7 @@ class Digitizer : public TObject private: void processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID, int srcID); - void registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, + void registerDigits(o2::its3::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl); ExtraDig* getExtraDigBuffer(uint32_t roFrame) @@ -108,7 +109,7 @@ class Digitizer : public TObject static constexpr std::array mIBSegmentations{0, 1, 2}; - o2::itsmft::AlpideSimResponse* mSimRespIB = nullptr; // simulated response for IB + o2::its3::ChipSimResponse* mSimRespIB = nullptr; // simulated response for IB o2::itsmft::AlpideSimResponse* mSimRespOB = nullptr; // simulated response for OB bool mSimRespIBOrientation{false}; // wether the orientation in the IB response function is flipped float mSimRespIBShift{0.f}; // adjusting the Y-shift in the IB response function to match sensor local coord. @@ -118,7 +119,7 @@ class Digitizer : public TObject const o2::its::GeometryTGeo* mGeometry = nullptr; ///< ITS3 geometry - std::vector mChips; ///< Array of chips digits containers + std::vector mChips; ///< Array of chips digits containers std::deque> mExtraBuff; ///< burrer (per roFrame) for extra digits std::vector* mDigits = nullptr; //! output digits diff --git a/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h new file mode 100644 index 0000000000000..3192f73fb8f79 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/include/ITS3Simulation/ITS3DPLDigitizerParam.h @@ -0,0 +1,32 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#ifndef ALICEO2_ITS3DPLDIGITIZERPARAM_H_ +#define ALICEO2_ITS3DPLDIGITIZERPARAM_H_ + +#include "CommonUtils/ConfigurableParam.h" +#include "CommonUtils/ConfigurableParamHelper.h" + +namespace o2::its3 +{ + +struct ITS3DPLDigitizerParam : public o2::conf::ConfigurableParamHelper { + float IBNoisePerPixel = 1.e-8; ///< MOSAIX Noise per channel + int IBChargeThreshold = 150; ///< charge threshold in Nelectrons for IB + int IBMinChargeToAccount = 15; ///< minimum charge contribution to account for IB + int nIBSimSteps = 18; ///< number of steps in response for IB + + O2ParamDef(ITS3DPLDigitizerParam, "ITS3DPLDigitizerParam"); +}; + +} // namespace o2::its3 + +#endif \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx b/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx new file mode 100644 index 0000000000000..0611f7002f160 --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ChipDigitsContainer.cxx @@ -0,0 +1,63 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ChipDigitsContainer.h" + +namespace o2 +{ +namespace its3 +{ + +ChipDigitsContainer::ChipDigitsContainer(UShort_t idx) + : o2::itsmft::ChipDigitsContainer(idx) {} + +bool ChipDigitsContainer::isIB() const +{ + return innerBarrel; +} + +void ChipDigitsContainer::addNoise(UInt_t rofMin, UInt_t rofMax, const o2::its3::DigiParams* params) +{ + UInt_t row = 0; + UInt_t col = 0; + Int_t nhits = 0; + constexpr float ns2sec = 1e-9; + float mean = 0.f; + int nel = 0; + + if (isIB()) { + // Inner barrel: use ITS3-specific noise interface with OB segmentation. + mean = params->getIBNoisePerPixel() * SegmentationOB::NPixels; + nel = static_cast(params->getIBChargeThreshold() * 1.1); + } else { + // Outer barrel: use base class noise interface with IB segmentation. + mean = params->getNoisePerPixel() * SegmentationIB::NPixels; + nel = static_cast(params->getChargeThreshold() * 1.1); + } + + for (UInt_t rof = rofMin; rof <= rofMax; ++rof) { + nhits = gRandom->Poisson(mean); + for (Int_t i = 0; i < nhits; ++i) { + row = gRandom->Integer(maxRows); + col = gRandom->Integer(maxCols); + if (mNoiseMap && mNoiseMap->isNoisy(mChipIndex, row, col)) + continue; + if (mDeadChanMap && mDeadChanMap->isNoisy(mChipIndex, row, col)) + continue; + auto key = getOrderingKey(rof, row, col); + if (!findDigit(key)) + addDigit(key, rof, row, col, nel, o2::MCCompLabel(true)); + } + } +} + +} // namespace its3 +} // namespace o2 \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx b/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx new file mode 100644 index 0000000000000..1c482983f0d0a --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ChipSimResponse.cxx @@ -0,0 +1,62 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ChipSimResponse.h" +#include +#include + +using namespace o2::its3; + +ClassImp(o2::its3::ChipSimResponse); + +void ChipSimResponse::initData(int tableNumber, std::string dataPath, const bool quiet) +{ + AlpideSimResponse::initData(tableNumber, dataPath, quiet); + computeCentreFromData(); +} + +void ChipSimResponse::computeCentreFromData() +{ + std::vector zVec, qVec; + const int npix = o2::itsmft::AlpideRespSimMat::getNPix(); + + for (int iz = 0; iz < mNBinDpt; ++iz) { + size_t bin = iz + mNBinDpt * (0 + mNBinRow * 0); + const auto& mat = mData[bin]; + float val = mat.getValue(npix / 2, npix / 2); + float gz = mDptMin + iz / mStepInvDpt; + zVec.push_back(gz); + qVec.push_back(val); + } + + std::vector> zqPairs; + for (size_t i = 0; i < zVec.size(); ++i) { + zqPairs.emplace_back(zVec[i], qVec[i]); + } + std::sort(zqPairs.begin(), zqPairs.end()); + zVec.clear(); + qVec.clear(); + for (auto& p : zqPairs) { + zVec.push_back(p.first); + qVec.push_back(p.second); + } + + float intQ = 0.f, intZQ = 0.f; + for (size_t i = 0; i + 1 < zVec.size(); ++i) { + float z0 = zVec[i], z1 = zVec[i + 1]; + float q0 = qVec[i], q1 = qVec[i + 1]; + float dz = z1 - z0; + intQ += 0.5f * (q0 + q1) * dz; + intZQ += 0.5f * (z0 * q0 + z1 * q1) * dz; + } + + mRespCentreDep = (intQ > 0.f) ? intZQ / intQ : 0.f; +} diff --git a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx index a9f17a544b3c4..afa02ec44741d 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/DigiParams.cxx @@ -14,27 +14,67 @@ #include "Framework/Logger.h" #include "ITS3Simulation/DigiParams.h" +#include ClassImp(o2::its3::DigiParams); namespace o2::its3 { +DigiParams::DigiParams() +{ + // make sure the defaults are consistent + setIBNSimSteps(mIBNSimSteps); +} + +void DigiParams::setIBNSimSteps(int v) +{ + // set number of sampling steps in silicon + mIBNSimSteps = v > 0 ? v : 1; + mIBNSimStepsInv = 1.f / mIBNSimSteps; +} + +void DigiParams::setIBChargeThreshold(int v, float frac2Account) +{ + // set charge threshold for digits creation and its fraction to account + // contribution from single hit + mIBChargeThreshold = v; + mIBMinChargeToAccount = v * frac2Account; + if (mIBMinChargeToAccount < 0 || mIBMinChargeToAccount > mIBChargeThreshold) { + mIBMinChargeToAccount = mIBChargeThreshold; + } + LOG(info) << "Set Mosaix charge threshold to " << mIBChargeThreshold + << ", single hit will be accounted from " << mIBMinChargeToAccount + << " electrons"; +} + void DigiParams::print() const { // print settings - LOGF(info, "ITS3 DigiParams settings:"); - LOGF(info, "Continuous readout : %s", isContinuous() ? "ON" : "OFF"); - LOGF(info, "Readout Frame Length(ns) : %f", getROFrameLength()); - LOGF(info, "Strobe delay (ns) : %f", getStrobeDelay()); - LOGF(info, "Strobe length (ns) : %f", getStrobeLength()); - LOGF(info, "Threshold (N electrons) : %d", getChargeThreshold()); - LOGF(info, "Min N electrons to account : %d", getMinChargeToAccount()); - LOGF(info, "Number of charge sharing steps : %d", getNSimSteps()); - LOGF(info, "ELoss to N electrons factor : %e", getEnergyToNElectrons()); - LOGF(info, "Noise level per pixel : %e", getNoisePerPixel()); - LOGF(info, "Charge time-response:\n"); + printf("ITS3 DigiParams settings:\n"); + printf("Continuous readout : %s\n", isContinuous() ? "ON" : "OFF"); + printf("Readout Frame Length(ns) : %f\n", getROFrameLength()); + printf("Strobe delay (ns) : %f\n", getStrobeDelay()); + printf("Strobe length (ns) : %f\n", getStrobeLength()); + printf("IB Threshold (N electrons) : %d\n", getIBChargeThreshold()); + printf("OB Threshold (N electrons) : %d\n", getChargeThreshold()); + printf("Min N electrons to account for IB : %d\n", getIBMinChargeToAccount()); + printf("Min N electrons to account for OB : %d\n", getMinChargeToAccount()); + printf("Number of charge sharing steps of IB : %d\n", getIBNSimSteps()); + printf("Number of charge sharing steps of OB : %d\n", getNSimSteps()); + printf("ELoss to N electrons factor : %e\n", getEnergyToNElectrons()); + printf("Noise level per pixel of IB : %e\n", getIBNoisePerPixel()); + printf("Noise level per pixel of OB : %e\n", getNoisePerPixel()); + printf("Charge time-response:\n"); getSignalShape().print(); } +void DigiParams::setIBSimResponse(o2::its3::ChipSimResponse* response) +{ + mIBSimResponse = response; + if (mIBSimResponse) { + mIBSimResponse->computeCentreFromData(); + } +} + } // namespace o2::its3 diff --git a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx index 3c75bf3e8f680..1d1d15a91f89b 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx +++ b/Detectors/Upgrades/ITS3/simulation/src/Digitizer.cxx @@ -27,7 +27,8 @@ #include using o2::itsmft::Hit; -using SegmentationAlpide = o2::itsmft::SegmentationAlpide; +using SegmentationOB = o2::itsmft::SegmentationAlpide; +using SegmentationIB = o2::its3::SegmentationMosaix; using o2::itsmft::AlpideRespSimMat; using o2::itsmft::PreDigit; @@ -46,8 +47,8 @@ void Digitizer::init() } if (!mParams.hasResponseFunctions()) { - auto loadSetResponseFunc = [&](const char* name, const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { - LOGP(info, "Loading response function for {}: IB={}:{} ; OB={}:{}", name, nameIB, fileIB, nameOB, fileOB); + auto loadSetResponseFunc = [&](const char* fileIB, const char* nameIB, const char* fileOB, const char* nameOB) { + LOGP(info, "Loading response function IB={}:{} ; OB={}:{}", nameIB, fileIB, nameOB, fileOB); auto fIB = TFile::Open(fileIB, "READ"); if (!fIB || fIB->IsZombie() || !fIB->IsOpen()) { LOGP(fatal, "Cannot open file {}", fileIB); @@ -56,7 +57,7 @@ void Digitizer::init() if (!fOB || fOB->IsZombie() || !fOB->IsOpen()) { LOGP(fatal, "Cannot open file {}", fileOB); } - mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); + mParams.setIBSimResponse(mSimRespIB = fIB->Get(nameIB)); mParams.setOBSimResponse(mSimRespOB = fOB->Get(nameOB)); fIB->Close(); fOB->Close(); @@ -64,25 +65,27 @@ void Digitizer::init() if (const auto& func = ITS3Params::Instance().chipResponseFunction; func == "Alpide") { constexpr const char* responseFile = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - loadSetResponseFunc("Alpide", responseFile, "response0", responseFile, "response1"); - mSimRespIBShift = mSimRespIB->getDepthMax() - SegmentationMosaix::SensorLayerThickness / 2.f + 10.e-4f; - mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; + loadSetResponseFunc(responseFile, "response0", responseFile, "response0"); + mSimRespIBScaleX = o2::itsmft::SegmentationAlpide::PitchRow / SegmentationIB::PitchRow; + mSimRespIBScaleZ = o2::itsmft::SegmentationAlpide::PitchCol / SegmentationIB::PitchCol; } else if (func == "APTS") { constexpr const char* responseFileIB = "$(O2_ROOT)/share/Detectors/Upgrades/ITS3/data/ITS3ChipResponseData/APTSResponseData.root"; constexpr const char* responseFileOB = "$(O2_ROOT)/share/Detectors/ITSMFT/data/AlpideResponseData/AlpideResponseData.root"; - loadSetResponseFunc("APTS", responseFileIB, "response1", responseFileOB, "response1"); - mSimRespIBShift = mSimRespIB->getDepthMax() + (float)constants::pixelarray::pixels::apts::responseYShift; - mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationAlpide::SensorLayerThickness / 2.f; - mSimRespIBScaleX = 0.5f * constants::pixelarray::pixels::apts::pitchX / SegmentationMosaix::PitchRow; - mSimRespIBScaleZ = 0.5f * constants::pixelarray::pixels::apts::pitchZ / SegmentationMosaix::PitchCol; + loadSetResponseFunc(responseFileIB, "response1", responseFileOB, "response0"); + mSimRespIBScaleX = constants::pixelarray::pixels::apts::pitchX / SegmentationIB::PitchRow; + mSimRespIBScaleZ = constants::pixelarray::pixels::apts::pitchZ / SegmentationIB::PitchCol; mSimRespIBOrientation = true; } else { LOGP(fatal, "ResponseFunction '{}' not implemented!", func); } + mSimRespIBShift = mSimRespIB->getDepthMax() - constants::silicon::thickness / 2.f; + mSimRespOBShift = mSimRespOB->getDepthMax() - SegmentationOB::SensorLayerThickness / 2.f; } + mParams.print(); - LOGP(info, "IBShift = {} ; OBShift = {}", mSimRespIBShift, mSimRespOBShift); - LOGP(info, "IB-Scale: X={} ; Z={}", mSimRespIBScaleX, mSimRespIBScaleZ); + LOGP(info, "IB shift = {} ; OB shift = {}", mSimRespIBShift, mSimRespOBShift); + LOGP(info, "IB pixel scale on x = {} ; z = {}", mSimRespIBScaleX, mSimRespIBScaleZ); + LOGP(info, "IB response orientation: {}", mSimRespIBOrientation ? "flipped" : "normal"); mIRFirstSampledTF = o2::raw::HBFUtils::Instance().getFirstSampledTFIR(); } @@ -173,11 +176,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) auto& extra = *(mExtraBuff.front().get()); for (size_t iChip{0}; iChip < mChips.size(); ++iChip) { auto& chip = mChips[iChip]; - if (constants::detID::isDetITS3(iChip)) { // Check if this is a chip of ITS3 - chip.addNoise(mROFrameMin, mROFrameMin, &mParams, SegmentationMosaix::NRows, SegmentationMosaix::NCols); - } else { - chip.addNoise(mROFrameMin, mROFrameMin, &mParams); - } + chip.addNoise(mROFrameMin, mROFrameMin, &mParams); auto& buffer = chip.getPreDigits(); if (buffer.empty()) { continue; @@ -190,7 +189,7 @@ void Digitizer::fillOutputContainer(uint32_t frameLast) break; // is the digit ROFrame from the key > the max requested frame } auto& preDig = iter->second; // preDigit - if (preDig.charge >= mParams.getChargeThreshold()) { + if (preDig.charge >= (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { int digID = mDigits->size(); mDigits->emplace_back(chip.getChipIndex(), preDig.row, preDig.col, preDig.charge); mMCLabels->addElement(digID, preDig.labelRef.label); @@ -257,16 +256,15 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } // here we start stepping in the depth of the sensor to generate charge diffision - float nStepsInv = mParams.getNSimStepsInv(); - int nSteps = mParams.getNSimSteps(); int detID{hit.GetDetectorID()}; int layer = mGeometry->getLayer(detID); const auto& matrix = mGeometry->getMatrixL2G(detID); - bool innerBarrel{layer < 3}; + int nSteps = chip.isIB() ? mParams.getIBNSimSteps() : mParams.getNSimSteps(); + float nStepsInv = chip.isIB() ? mParams.getIBNSimStepsInv() : mParams.getNSimStepsInv(); math_utils::Vector3D xyzLocS, xyzLocE; xyzLocS = matrix ^ (hit.GetPosStart()); // Global hit coordinates to local detector coordinates xyzLocE = matrix ^ (hit.GetPos()); - if (innerBarrel) { + if (chip.isIB()) { // transform the point on the curved surface to a flat one float xFlatE{0.f}, yFlatE{0.f}, xFlatS{0.f}, yFlatS{0.f}; mIBSegmentations[layer].curvedToFlat(xyzLocS.X(), xyzLocS.Y(), xFlatS, yFlatS); @@ -284,7 +282,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID xyzLocS += stepH; // Adjust start position to the middle of the first step xyzLocE -= stepH; // Adjust end position to the middle of the last step int rowS = -1, colS = -1, rowE = -1, colE = -1, nSkip = 0; - if (innerBarrel) { + if (chip.isIB()) { // get entrance pixel row and col while (!mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { @@ -301,14 +299,14 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } else { // get entrance pixel row and col - while (!SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? + while (!SegmentationOB::localToDetector(xyzLocS.X(), xyzLocS.Z(), rowS, colS)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } xyzLocS += step; } // get exit pixel row and col - while (!SegmentationAlpide::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? + while (!SegmentationOB::localToDetector(xyzLocE.X(), xyzLocE.Z(), rowE, colE)) { // guard-ring ? if (++nSkip >= nSteps) { return; // did not enter to sensitive matrix } @@ -327,8 +325,8 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID rowE += AlpideRespSimMat::NPix / 2; rowS = std::max(rowS, 0); - const int maxNrows{innerBarrel ? SegmentationMosaix::NRows : SegmentationAlpide::NRows}; - const int maxNcols{innerBarrel ? SegmentationMosaix::NCols : SegmentationAlpide::NCols}; + const int maxNrows{chip.isIB() ? SegmentationIB::NRows : SegmentationOB::NRows}; + const int maxNcols{chip.isIB() ? SegmentationIB::NCols : SegmentationOB::NCols}; rowE = std::min(rowE, maxNrows - 1); colS -= AlpideRespSimMat::NPix / 2; @@ -352,22 +350,22 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // take into account that the AlpideSimResponse depth defintion has different min/max boundaries // although the max should coincide with the surface of the epitaxial layer, which in the chip // local coordinates has Y = +SensorLayerThickness/2 - xyzLocS.SetY(xyzLocS.Y() + ((innerBarrel) ? mSimRespIBShift : mSimRespOBShift)); + xyzLocS.SetY(xyzLocS.Y() + ((chip.isIB()) ? mSimRespIBShift : mSimRespOBShift)); // collect charge in evey pixel which might be affected by the hit for (int iStep = nSteps; iStep--;) { // Get the pixel ID - if (innerBarrel) { + if (chip.isIB()) { mIBSegmentations[layer].localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } else { - SegmentationAlpide::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); + SegmentationOB::localToDetector(xyzLocS.X(), xyzLocS.Z(), row, col); } if (row != rowPrev || col != colPrev) { // update pixel and coordinates of its center - if (innerBarrel) { + if (chip.isIB()) { if (!mIBSegmentations[layer].detectorToLocal(row, col, cRowPix, cColPix)) { continue; } - } else if (!SegmentationAlpide::detectorToLocal(row, col, cRowPix, cColPix)) { + } else if (!SegmentationOB::detectorToLocal(row, col, cRowPix, cColPix)) { continue; // should not happen } rowPrev = row; @@ -377,13 +375,13 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID // note that response needs coordinates along column row (locX) (locZ) then depth (locY) float rowMax{}, colMax{}; const AlpideRespSimMat* rspmat{nullptr}; - if (innerBarrel) { - rowMax = 0.5f * SegmentationMosaix::PitchRow; - colMax = 0.5f * SegmentationMosaix::PitchCol; + if (chip.isIB()) { + rowMax = 0.5f * SegmentationIB::PitchRow * mSimRespIBScaleX; + colMax = 0.5f * SegmentationIB::PitchCol * mSimRespIBScaleZ; rspmat = mSimRespIB->getResponse(mSimRespIBScaleX * (xyzLocS.X() - cRowPix), mSimRespIBScaleZ * (xyzLocS.Z() - cColPix), xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); } else { - rowMax = 0.5f * SegmentationAlpide::PitchRow; - colMax = 0.5f * SegmentationAlpide::PitchCol; + rowMax = 0.5f * SegmentationOB::PitchRow; + colMax = 0.5f * SegmentationOB::PitchCol; rspmat = mSimRespOB->getResponse(xyzLocS.X() - cRowPix, xyzLocS.Z() - cColPix, xyzLocS.Y(), flipRow, flipCol, rowMax, colMax); } @@ -402,7 +400,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID if (colDest < 0 || colDest >= colSpan) { continue; } - respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((innerBarrel && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); + respMatrix[rowDest][colDest] += rspmat->getValue(irow, icol, ((chip.isIB() && mSimRespIBOrientation) ? !flipRow : flipRow), flipCol); } } } @@ -419,7 +417,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } int nEle = gRandom->Poisson(nElectrons * nEleResp); // total charge in given pixel // ignore charge which have no chance to fire the pixel - if (nEle < mParams.getMinChargeToAccount()) { + if (nEle < (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { continue; } uint16_t colIS = icol + colS; @@ -428,7 +426,7 @@ void Digitizer::processHit(const o2::itsmft::Hit& hit, uint32_t& maxFr, int evID } } -void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, +void Digitizer::registerDigits(o2::its3::ChipDigitsContainer& chip, uint32_t roFrame, float tInROF, int nROF, uint16_t row, uint16_t col, int nEle, o2::MCCompLabel& lbl) { // Register digits for given pixel, accounting for the possible signal contribution to @@ -442,7 +440,7 @@ void Digitizer::registerDigits(o2::itsmft::ChipDigitsContainer& chip, uint32_t r tStrobe += mParams.getROFrameLength(); // for the next ROF // discard too small contributions, they have no chance to produce a digit - if (nEleROF < mParams.getMinChargeToAccount()) { + if (nEleROF < (chip.isIB() ? mParams.getIBChargeThreshold() : mParams.getChargeThreshold())) { continue; } if (roFr > mEventROFrameMax) { diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx b/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx new file mode 100644 index 0000000000000..69314b8a0be9b --- /dev/null +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3DPLDigitizerParam.cxx @@ -0,0 +1,14 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +#include "ITS3Simulation/ITS3DPLDigitizerParam.h" + +O2ParamImpl(o2::its3::ITS3DPLDigitizerParam) \ No newline at end of file diff --git a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h index fca3f5d63c2c4..921512193f98b 100644 --- a/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h +++ b/Detectors/Upgrades/ITS3/simulation/src/ITS3SimulationLinkDef.h @@ -20,5 +20,8 @@ #pragma link C++ class o2::its3::DescriptorInnerBarrelITS3 + ; #pragma link C++ class o2::its3::DigiParams + ; #pragma link C++ class o2::its3::Digitizer + ; +#pragma link C++ class o2::its3::ITS3DPLDigitizerParam + ; +#pragma link C++ class o2::its3::ChipDigitsContainer + ; +#pragma link C++ class o2::its3::ChipSimResponse + ; #endif diff --git a/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx b/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx index 27f876f7bc24b..af0af091d40e8 100644 --- a/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx +++ b/Steer/DigitizerWorkflow/src/ITS3DigitizerSpec.cxx @@ -27,6 +27,7 @@ #include "DataFormatsITSMFT/ROFRecord.h" #include "ITS3Simulation/Digitizer.h" #include "ITSMFTSimulation/DPLDigitizerParam.h" +#include "ITS3Simulation/ITS3DPLDigitizerParam.h" #include "ITSMFTBase/DPLAlpideParam.h" #include "ITSBase/GeometryTGeo.h" #include "ITS3Base/ITS3Params.h" @@ -216,6 +217,7 @@ class ITS3DPLDigitizerTask : BaseDPLDigitizer mDigitizer.setGeometry(geom); const auto& dopt = o2::itsmft::DPLDigitizerParam::Instance(); + const auto& doptIB = o2::its3::ITS3DPLDigitizerParam::Instance(); pc.inputs().get*>("ITS_alppar"); const auto& aopt = o2::itsmft::DPLAlpideParam::Instance(); digipar.setContinuous(dopt.continuous); @@ -238,6 +240,11 @@ class ITS3DPLDigitizerTask : BaseDPLDigitizer digipar.setTimeOffset(dopt.timeOffset); digipar.setNSimSteps(dopt.nSimSteps); + // ITS3 inner barrel specific parameters + digipar.setIBChargeThreshold(doptIB.IBChargeThreshold); + digipar.setIBNSimSteps(doptIB.nIBSimSteps); + digipar.setIBNoisePerPixel(doptIB.IBNoisePerPixel); + mROMode = digipar.isContinuous() ? o2::parameters::GRPObject::CONTINUOUS : o2::parameters::GRPObject::PRESENT; LOG(info) << mID.getName() << " simulated in " << ((mROMode == o2::parameters::GRPObject::CONTINUOUS) ? "CONTINUOUS" : "TRIGGERED") From e4399252ad5567ec680ed552fe5330ac9a51ce24 Mon Sep 17 00:00:00 2001 From: Hadi Hassan Date: Wed, 7 May 2025 13:09:49 +0300 Subject: [PATCH 0472/1914] Increasing the FOCAL volume based on the opening (#14241) --- Detectors/FOCAL/simulation/src/Detector.cxx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/FOCAL/simulation/src/Detector.cxx b/Detectors/FOCAL/simulation/src/Detector.cxx index dc71c1066afdf..164b531e60918 100644 --- a/Detectors/FOCAL/simulation/src/Detector.cxx +++ b/Detectors/FOCAL/simulation/src/Detector.cxx @@ -451,7 +451,7 @@ void Detector::ConstructGeometry() } float pars[4]; - pars[0] = (mGeometry->getFOCALSizeX() + 2 * mGeometry->getMiddleTowerOffset()) / 2; + pars[0] = (mGeometry->getFOCALSizeX() + 2 * mGeometry->getMiddleTowerOffset() + mGeometry->getDetectorOpeningRight() + mGeometry->getDetectorOpeningLeft()) / 2; pars[1] = mGeometry->getFOCALSizeY() / 2; pars[2] = mGeometry->getFOCALSizeZ() / 2; // Add space to place 2 SiPad layers in front of ECAL @@ -984,7 +984,7 @@ void Detector::CreateECALGeometry() // Place the towers in the ECAL // --- Place the ECAL in FOCAL float fcal_pars[4]; - fcal_pars[0] = (geom->getFOCALSizeX() + 2. * geom->getMiddleTowerOffset()) / 2.; + fcal_pars[0] = (geom->getFOCALSizeX() + 2. * geom->getMiddleTowerOffset() + mGeometry->getDetectorOpeningRight() + mGeometry->getDetectorOpeningLeft()) / 2.; fcal_pars[1] = geom->getFOCALSizeY() / 2.; fcal_pars[2] = geom->getECALSizeZ() / 2.; fcal_pars[3] = 0.; From a28ef94e0010e819d44d01b6dcdec64db75c6ff9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 13:33:27 +0200 Subject: [PATCH 0473/1914] GPU TPC: Do not shift track in Z after the last fit --- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 5bd8fd556aa3f..2524c01f0c00b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -407,7 +407,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ break; // bad chi2 for the whole track, stop the fit } } - if (((nWays - iWay) & 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { + if (((nWays - iWay) & 1) && (iWay != nWays - 1) && (clusters[0].sector < 18) == (clusters[maxN - 1].sector < 18)) { ShiftZ2(clusters, clustersXYZ, merger, maxN); } } From 8789c46104bc200f83a423b5840a783441e872fa Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 16:02:04 +0200 Subject: [PATCH 0474/1914] GPU: Fix Debug Dump of deterministic mode without dEdx (was segfaulting in sync mode) --- GPU/GPUTracking/Base/GPUParam.cxx | 6 +++--- GPU/GPUTracking/Base/GPUParam.h | 2 +- GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 6 +++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUParam.cxx b/GPU/GPUTracking/Base/GPUParam.cxx index b835e1b198eea..57f23792e472a 100644 --- a/GPU/GPUTracking/Base/GPUParam.cxx +++ b/GPU/GPUTracking/Base/GPUParam.cxx @@ -135,9 +135,9 @@ void GPUParam::UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessi UpdateRun3ClusterErrors(p->param.tpcErrorParamY, p->param.tpcErrorParamZ); } if (w) { - par.dodEdx = dodEdxDownscaled = w->steps.isSet(GPUDataTypes::RecoStep::TPCdEdx); - if (par.dodEdx && p && p->tpcDownscaledEdx != 0) { - dodEdxDownscaled = (rand() % 100) < p->tpcDownscaledEdx; + par.dodEdx = dodEdxEnabled = w->steps.isSet(GPUDataTypes::RecoStep::TPCdEdx); + if (dodEdxEnabled && p && p->tpcDownscaledEdx != 0) { + dodEdxEnabled = (rand() % 100) < p->tpcDownscaledEdx; } } if (d) { diff --git a/GPU/GPUTracking/Base/GPUParam.h b/GPU/GPUTracking/Base/GPUParam.h index 4b77628c88775..076d25d72d178 100644 --- a/GPU/GPUTracking/Base/GPUParam.h +++ b/GPU/GPUTracking/Base/GPUParam.h @@ -54,7 +54,7 @@ struct GPUParam_t { float bzCLight; float qptB5Scaler; - int8_t dodEdxDownscaled; + int8_t dodEdxEnabled; int32_t continuousMaxTimeBin; int32_t tpcCutTimeBin; diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 84835a1695071..a38148ccb375a 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -256,7 +256,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; - if (param().dodEdxDownscaled) { + if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b6241ad36b5de..6e7de7ee48ca6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -299,7 +299,7 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); - if (mRec->GetParam().dodEdxDownscaled) { + if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 2f8fbecadce5f..ac55f423b1c42 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -203,7 +203,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; } out << std::setprecision(ss); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index ea219a02a1887..9dc6ddc59c2b4 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -44,7 +44,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlock constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); - bool cutOnTrackdEdx = merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled && merger.Param().rec.tpc.minTrackdEdxMax2Tot > 0.f; + bool cutOnTrackdEdx = merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.minTrackdEdxMax2Tot > 0.f; GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,7 +130,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled) { + if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); oTrack.setdEdxAlt(tracksdEdxAlt[i]); } @@ -148,7 +148,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxDownscaled && merger.Param().rec.tpc.enablePID) { + if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { PIDResponse pidResponse{}; auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 2524c01f0c00b..f5bfbe985fb8c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,7 +216,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxDownscaled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); @@ -367,7 +367,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxDownscaled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { float qtot = 0, qmax = 0, pad = 0, relTime = 0; @@ -426,7 +426,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. - if (param.par.dodEdx && param.dodEdxDownscaled) { + if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); } From 42e1f6ac595fe09ada3cf36ae4baddd2b03aba44 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 7 May 2025 20:00:01 +0200 Subject: [PATCH 0475/1914] GPU: debug files of multiple GPU Reconstruciton instances shall not overwrite each other --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 1 + GPU/GPUTracking/Base/GPUReconstruction.h | 2 ++ GPU/GPUTracking/Global/GPUChainTracking.cxx | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index ad2ee2e840d00..3ef995b9f9561 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -87,6 +87,7 @@ GPUReconstruction::GPUReconstruction(const GPUSettingsDeviceBackend& cfg) : mHos throw std::invalid_argument("Cannot be slave to a slave"); } mMaster = cfg.master; + mSlaveId = cfg.master->mSlaves.size(); cfg.master->mSlaves.emplace_back(this); } param().SetDefaults(mGRPSettings.get()); diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index f5b39cb370b9e..d5c0b8e828087 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -190,6 +190,7 @@ class GPUReconstruction // Helpers to fetch processors from other shared libraries virtual void GetITSTraits(std::unique_ptr* trackerTraits, std::unique_ptr* vertexerTraits, std::unique_ptr* timeFrame); bool slavesExist() { return mSlaves.size() || mMaster; } + int slaveId() { return mSlaveId; } // Getters / setters for parameters DeviceType GetDeviceType() const; @@ -339,6 +340,7 @@ class GPUReconstruction GPUReconstruction* mMaster = nullptr; // Ptr to a GPUReconstruction object serving as master, sharing GPU memory, events, etc. std::vector mSlaves; // Ptr to slave GPUReconstructions + int mSlaveId = -1; // Id of this slave (-1 for master) // Others bool mInitialized = false; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index a3f9b996e070d..f8d4165477220 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -378,7 +378,8 @@ int32_t GPUChainTracking::Init() } if (GetProcessingSettings().debugLevel >= 6) { - mDebugFile->open(mRec->IsGPU() ? "GPU.out" : "CPU.out"); + std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + ".out"; + mDebugFile->open(filename.c_str()); } return 0; From 689970d53e9172450ec9ee1b0c14e125df741e8a Mon Sep 17 00:00:00 2001 From: jokonig Date: Wed, 7 May 2025 15:09:50 +0200 Subject: [PATCH 0476/1914] [EMCAL-539] Fix in trigger simulation to not accept every event as MB - Only events where EMCal is triggered should be given the trigger flag. This was previously not the case --- .../include/EMCALSimulation/Digitizer.h | 1 + .../EMCALSimulation/DigitsWriteoutBuffer.h | 17 +++++++++++++++-- .../EMCAL/workflow/src/EMCALDigitizerSpec.cxx | 12 +++++++----- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h b/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h index ad296a4d65a58..66f85184c98e6 100644 --- a/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h +++ b/Detectors/EMCAL/simulation/include/EMCALSimulation/Digitizer.h @@ -72,6 +72,7 @@ class Digitizer : public TObject double getEventTime() const { return mDigits.getEventTime(); } bool isLive(double t) const { return mDigits.isLive(t); } bool isLive() const { return mDigits.isLive(); } + bool isCurrentEventTriggered() const { return mDigits.isCurrentEventTriggered(); } void setDebugStreaming(bool doStreaming) { mEnableDebugStreaming = doStreaming; } diff --git a/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h b/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h index b6f486ddf2add..5713f2ef18ad9 100644 --- a/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h +++ b/Detectors/EMCAL/simulation/include/EMCALSimulation/DigitsWriteoutBuffer.h @@ -60,8 +60,21 @@ class DigitsWriteoutBuffer double getTriggerTime() const { return mTriggerTime; } double getEventTime() const { return mLastEventTime; } - bool isLive(double t) const { return ((t - mTriggerTime) < mLiveTime || (t - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); } - bool isLive() const { return ((mLastEventTime - mTriggerTime) < mLiveTime || (mLastEventTime - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); } + bool isLive(double t) const + { + return ((t - mTriggerTime) < mLiveTime || (t - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); + } + bool isLive() const + { + return ((mLastEventTime - mTriggerTime) < (mLiveTime - mPreTriggerTime) || (mLastEventTime - mTriggerTime) >= (mLiveTime + mBusyTime - mPreTriggerTime)); + } + + /// Check if current collision was triggered + /// \return true if event was triggered + bool isCurrentEventTriggered() const + { + return mLastEventTime == mTriggerTime; + } // function returns true if the collision occurs 600ns before the readout window is open // Look here for more details https://alice.its.cern.ch/jira/browse/EMCAL-681 diff --git a/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx b/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx index 5de966d1b6a4c..cabdb2c74d818 100644 --- a/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx +++ b/Detectors/EMCAL/workflow/src/EMCALDigitizerSpec.cxx @@ -332,15 +332,17 @@ void DigitizerSpec::run(framework::ProcessingContext& ctx) if (!trigger.any()) { continue; } - // Trigger sim: Prepare CTP input digit - acceptedTriggers.push_back(std::make_tuple(timesview[collID], trigger)); - LOG(debug) << "EMCAL TRU simulation: Sending trg = " << trigger << " to CTP"; mDigitizer.setEventTime(timesview[collID], trigger.any()); - - if (!mDigitizer.isLive()) { + if (!mDigitizer.isCurrentEventTriggered()) { + LOG(debug) << "reject collision"; continue; } + LOG(debug) << "accept collision"; + + // Trigger sim: Prepare CTP input digit + acceptedTriggers.push_back(std::make_tuple(timesview[collID], trigger)); + LOG(debug) << "EMCAL TRU simulation: Sending trg = " << trigger << " to CTP"; // for each collision, loop over the constituents event and source IDs // (background signal merging is basically taking place here) From 440e50040510d18bcc9742e75bfc2a4898a17c8c Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Thu, 8 May 2025 18:18:01 +0200 Subject: [PATCH 0477/1914] =?UTF-8?q?Revert=20"DPL=20Analysis:=20prevent?= =?UTF-8?q?=20slice=20cache=20from=20updating=20when=20not=20required=20by?= =?UTF-8?q?=20=E2=80=A6"=20(#14252)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Framework/Core/include/Framework/ASoA.h | 12 ++--- .../Core/include/Framework/AnalysisManagers.h | 14 ++---- .../Core/include/Framework/AnalysisTask.h | 22 ++++----- .../Framework/ArrowTableSlicingCache.h | 45 ++++++----------- .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 ++++--- Framework/Core/src/ArrowTableSlicingCache.cxx | 49 +++++++++---------- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 73 insertions(+), 96 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index 2e478a8ca64a6..e098cd89f6d5d 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - Entry bindingKey; + StringPair bindingKey; bool isMissing() const; - Entry const& getBindingKey() const; + StringPair const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.key.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.second.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index e310f3eef990c..330eaf01f0be4 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,43 +534,39 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, Cache&, Cache&) +bool registerCache(T&, std::vector&, std::vector&) { return false; } template requires std::same_as -bool registerCache(T& preslice, Cache& bsks, Cache&) +bool registerCache(T& preslice, std::vector& bsks, std::vector&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); - } else if (locate->enabled == false) { - locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, Cache&, Cache& bsksU) +bool registerCache(T& preslice, std::vector&, std::vector& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); - } else if (locate->enabled == false) { - locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 9bd2e2af173cc..c7f3da1948c62 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -66,20 +66,20 @@ concept is_enumeration = is_enumeration_v>; namespace { struct AnalysisDataProcessorBuilder { template - static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) + static void addGroupingCandidates(std::vector& bk, std::vector& bku) { - [&bk, &bku, enabled](framework::pack) mutable { + [&bk, &bku](framework::pack) mutable { std::string key; if constexpr (soa::is_iterator>) { key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); } - ([&bk, &bku, &key, enabled]() mutable { + ([&bk, &bku, &key]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key, enabled); + framework::updatePairList(bku, binding, key); } else { - framework::updatePairList(bk, binding, key, enabled); + framework::updatePairList(bk, binding, key); } } }(), @@ -147,7 +147,7 @@ struct AnalysisDataProcessorBuilder { /// helper to parse the process arguments /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, Cache&, Cache&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. @@ -156,17 +156,17 @@ struct AnalysisDataProcessorBuilder { /// 2. grouping case - 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache& bk, Cache& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku, value); + addGroupingCandidates(bk, bku); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, Cache&, Cache&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); @@ -480,8 +480,8 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - Cache bindingsKeys; - Cache bindingsKeysUnsorted; + std::vector bindingsKeys; + std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 292a67023fc5e..2edc23a63ce76 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,64 +34,51 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -struct Entry { - std::string binding; - std::string key; - bool enabled; - - Entry(std::string b, std::string k, bool e = true) - : binding{b}, - key{k}, - enabled{e} - { - } -}; - -using Cache = std::vector; +using StringPair = std::pair; -void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); +void updatePairList(std::vector& list, std::string const& binding, std::string const& key); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - Cache bindingsKeys; - Cache bindingsKeysUnsorted; + std::vector bindingsKeys; + std::vector bindingsKeysUnsorted; - void setCaches(Cache&& bsks); - void setCachesUnsorted(Cache&& bsks); + void setCaches(std::vector&& bsks); + void setCachesUnsorted(std::vector&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - Cache bindingsKeys; + std::vector bindingsKeys; std::vector>> values; std::vector>> counts; - Cache bindingsKeysUnsorted; + std::vector bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); + ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); // set caching information externally - void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); + void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(Entry const& bindingKey) const; - int getCachePosSortedFor(Entry const& bindingKey) const; - int getCachePosUnsortedFor(Entry const& bindingKey) const; + std::pair getCachePos(StringPair const& bindingKey) const; + int getCachePosSortedFor(StringPair const& bindingKey) const; + int getCachePosUnsortedFor(StringPair const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(Entry const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; + SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); + static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index b8436314b057e..64b1d863c59e6 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = Entry(binding, mIndexColumnName); + auto bk = std::make_pair(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 5940bc0427225..810398747de88 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -197,7 +197,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -Entry const& PreslicePolicyBase::getBindingKey() const +StringPair const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 3b13e30581f70..12a4c7131e828 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,27 +567,26 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, - Cache{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0u; i < caches.size(); ++i) { - if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); + for (auto i = 0; i < caches.size(); ++i) { + if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0u; i < unsortedCaches.size(); ++i) { - if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); + for (auto i = 0; i < unsortedCaches.size(); ++i) { + if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 12df5ef6c080b..4b31f96e32fba 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -11,7 +11,6 @@ #include "Framework/ArrowTableSlicingCache.h" #include "Framework/RuntimeError.h" -#include "Framework/Logger.h" #include #include @@ -20,10 +19,10 @@ namespace o2::framework { -void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) +void updatePairList(std::vector& list, std::string const& binding, std::string const& key) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }) == list.end()) { - list.emplace_back(binding, key, enabled); + if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { + list.emplace_back(binding, key); } } @@ -66,17 +65,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) +void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -87,7 +86,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorte groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -112,7 +111,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -129,11 +128,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k, e] = bindingsKeysUnsorted[pos]; - if (!e) { - LOG(debug) << "Update of disabled cache requested"; - return arrow::Status::OK(); - } + auto& [b, k] = bindingsKeysUnsorted[pos]; auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -144,7 +139,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if ((int)groups[pos].size() <= v) { + if (groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -156,7 +151,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -166,41 +161,41 @@ std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); } return getCacheUnsortedForPos(p); @@ -229,9 +224,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key, enabled] = bindingKey; + auto const& [target, key] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 091c21eeae229..161939141e790 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = Entry(soa::getLabelFromType(), "fID"); + auto bk = std::make_pair(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = Entry(soa::getLabelFromType(), "fID"); + auto bk = std::make_pair(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From 9647d481ee3e039dec060e2f005a49617e21d4e5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 00:18:02 +0200 Subject: [PATCH 0478/1914] GPU: Add debug dumps for compressed / uncompressed TPC clusters --- GPU/GPUTracking/CMakeLists.txt | 1 + .../DataCompression/GPUTPCCompression.cxx | 56 +++++++++++++ .../DataCompression/GPUTPCCompression.h | 4 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- GPU/GPUTracking/Global/GPUChain.h | 31 +++----- GPU/GPUTracking/Global/GPUChainTracking.h | 1 + .../Global/GPUChainTrackingClusterizer.cxx | 19 ++--- .../Global/GPUChainTrackingCompression.cxx | 3 + .../Global/GPUChainTrackingDebug.h | 79 +++++++++++++++++++ .../GPUChainTrackingDebugAndProfiling.cxx | 15 ++++ .../Global/GPUChainTrackingMerger.cxx | 19 ++--- .../Global/GPUChainTrackingSectorTracker.cxx | 15 ++-- .../TPCClusterFinder/GPUTPCClusterFinder.h | 1 + .../GPUTPCClusterFinderDump.cxx | 2 +- 14 files changed, 201 insertions(+), 47 deletions(-) create mode 100644 GPU/GPUTracking/Global/GPUChainTrackingDebug.h diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index b2852389398d0..2e26622d05291 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -138,6 +138,7 @@ set(HDRS_INSTALL Definitions/GPULogging.h Definitions/GPUSettingsList.h Global/GPUChainTrackingDefs.h + Global/GPUChainTrackingDebug.h Global/GPUChainTrackingGetters.inc Global/GPUErrorCodes.h Merger/GPUTPCGMBorderTrack.h diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 2a0c5b58d8a83..a107f749ddd77 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -130,3 +130,59 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters += 16 - (mMaxClusters % 16); } } + +void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) +{ + const o2::tpc::CompressedClusters O = *mOutputFlat; + out << "\n\nCompressed Clusters:\n"; + out << O.nTracks << " Tracks\n"; + out << "Slice Row Clusters:\n"; + for (uint32_t i = 0; i < NSECTORS; i++) { + out << "Sector " << i << ": "; + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", "; + } + out << "\n"; + } + out << "\nTrack Clusters:\n"; + for (uint32_t i = 0; i < O.nTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << O.nTrackClusters[i] << ", "; + } + out << "\n\nUnattached Clusters\n"; + uint32_t offset = 0; + for (uint32_t i = 0; i < NSECTORS; i++) { + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << "Sector " << i << " Row " << j << ": "; + for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l = k + offset; + out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; + } + offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; + out << "\n"; + } + } + out << "\n\nAttached Clusters\n"; + offset = 0; + for (uint32_t i = 0; i < O.nTracks; i++) { + out << "Track " << i << ": {" << (uint32_t)O.qPtA[i] << ", " << (uint32_t)O.rowA[i] << ", " << (uint32_t)O.sliceA[i] << ", " << (uint32_t)O.timeA[i] << ", " << (uint32_t)O.padA[i] << "} - "; + for (uint32_t k = 0; k < O.nTrackClusters[i]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l1 = k + offset, l2 = k + offset - i; + out << "["; + if (k) { + out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", "; + } + out << (uint32_t)O.qTotA[l1] << ", " << (uint32_t)O.qMaxA[l1] << ", " << (uint32_t)O.flagsA[l1] << ", " << (uint32_t)O.sigmaPadA[l1] << ", " << (uint32_t)O.sigmaTimeA[l1] << "] "; + } + offset += O.nTrackClusters[i]; + out << "\n"; + } +} diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h index c1d9fe283fbea..52585b4c08b24 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.h +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.h @@ -57,6 +57,10 @@ class GPUTPCCompression : public GPUProcessor GPUd() static void truncateSignificantBitsChargeMax(uint16_t& charge, const GPUParam& param) { truncateSignificantBits(charge, param.rec.tpc.sigBitsCharge, P_MAX_QMAX); } GPUd() static void truncateSignificantBitsWidth(uint8_t& width, const GPUParam& param) { truncateSignificantBits(width, param.rec.tpc.sigBitsWidth, P_MAX_SIGMA); } +#ifndef GPUCA_GPUCODE + void DumpCompressedClusters(std::ostream& out); +#endif + protected: struct memory { uint32_t nStoredTracks = 0; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 4c32c3e46e3a7..638a3ed43d2aa 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, int32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) diff --git a/GPU/GPUTracking/Global/GPUChain.h b/GPU/GPUTracking/Global/GPUChain.h index 5df324fcba648..aca1bb2420fb6 100644 --- a/GPU/GPUTracking/Global/GPUChain.h +++ b/GPU/GPUTracking/Global/GPUChain.h @@ -20,6 +20,7 @@ #include "GPUKernelClassesFwd.h" #include +#include namespace o2::gpu { @@ -226,12 +227,19 @@ class GPUChain virtual int32_t DoStuckProtection(int32_t stream, deviceEvent event) { return 0; } template - bool DoDebugAndDump(RecoStep step, int32_t mask, T& processor, S T::*func, Args&&... args) + bool DoDebugAndDump(RecoStep step, uint32_t mask, T& processor, S T::*func, Args&&... args) { return DoDebugAndDump(step, mask, true, processor, func, args...); } template - bool DoDebugAndDump(RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); + bool DoDebugAndDump(RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args); + template + bool DoDebugDump(uint32_t mask, std::function func, Args&... args); + template + bool DoDebugDump(uint32_t mask, S* func, Args&&... args) + { + return DoDebugDump(mask, std::function([&func](Args&&... args_tmp) { (*func)(args_tmp...); }), args...); + } template int32_t runRecoStep(RecoStep step, S T::*func, Args... args); @@ -278,24 +286,7 @@ inline void GPUChain::timeCpy(RecoStep step, int32_t toGPU, S T::*func, Args... } template -bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, int32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) -{ - if (GetProcessingSettings().keepAllMemory) { - if (transfer) { - TransferMemoryResourcesToHost(step, &processor, -1, true); - } - if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { - if (func) { - (processor.*func)(args...); - } - return true; - } - } - return false; -} - -template -int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) +inline int32_t GPUChain::runRecoStep(RecoStep step, S T::*func, Args... args) { if (GetRecoSteps().isSet(step)) { auto* timer = GetProcessingSettings().recoTaskTiming ? &mRec->getRecoStepTimer(step) : nullptr; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 8664652b549e3..13773a97d4e3d 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -234,6 +234,7 @@ class GPUChainTracking : public GPUChain void PrepareDebugOutput(); void PrintDebugOutput(); void PrintOutputStat(); + static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index f188388e76a02..2cdd1bb76bf00 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -14,6 +14,7 @@ #include "GPUChainTracking.h" #include "GPUChainTrackingDefs.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -813,7 +814,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (fragment.index == 0) { runKernel({GetGridAutoStep(lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy, TPC_PADS_IN_SECTOR * sizeof(*clustererShadow.mPpadIsNoisy)); } - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererZeroedCharges, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Zeroed Charges"); if (doGPU) { if (mIOPtrs.tpcZS && mCFContext->nPagesSector[iSector] && mCFContext->zsVersion != -1) { @@ -900,7 +901,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (!mIOPtrs.tpcZS) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); } - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 1, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererDigits, clusterer, &GPUTPCClusterFinder::DumpDigits, *mDebugFile)) { clusterer.DumpChargeMap(*mDebugFile, "Charges"); } @@ -919,13 +920,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 0, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 2, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererPeaks, clusterer, &GPUTPCClusterFinder::DumpPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -939,13 +940,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); runKernel({GetGrid(clusterer.mPmemory->counters.nPeaks, lane), {iSector}}); - if (DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { + if (DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaks, *mDebugFile)) { clusterer.DumpPeakMap(*mDebugFile, "Suppressed Peaks"); } RunTPCClusterizer_compactPeaks(clusterer, clustererShadow, 1, doGPU, lane); TransferMemoryResourceLinkToHost(RecoStep::TPCClusterFinding, clusterer.mMemoryId, lane); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 3, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererSuppressedPeaks, clusterer, &GPUTPCClusterFinder::DumpSuppressedPeaksCompacted, *mDebugFile); // clang-format off }); mRec->runParallelOuterLoop(doGPU, maxLane, [&](uint32_t lane) { uint32_t iSector = iSectorBase + lane; @@ -979,7 +980,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); } // float time_clusterizer = 0, time_fill = 0, time_networks = 0; @@ -1092,7 +1093,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) #endif } else { runKernel({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}); - DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges"); runKernel({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0); } @@ -1111,7 +1112,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) TransferMemoryResourcesToHost(RecoStep::TPCClusterFinding, &clusterer, lane); laneHasData[lane] = true; // Include clusters in default debug mask, exclude other debug output by default - DoDebugAndDump(RecoStep::TPCClusterFinding, 131072, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off + DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererClusters, clusterer, &GPUTPCClusterFinder::DumpClusters, *mDebugFile); // clang-format off }); mRec->SetNActiveThreadsOuterLoop(1); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 24c74a661f18e..534c02a4c0a84 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUTrackingInputProvider.h" @@ -202,6 +203,7 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); + DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile); return 0; } @@ -425,5 +427,6 @@ int32_t GPUChainTracking::RunTPCDecompression() } mRec->PopNonPersistentMemory(RecoStep::TPCDecompression, qStr2Tag("TPCDCMPR")); } + DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative); return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h new file mode 100644 index 0000000000000..810f40a1d8654 --- /dev/null +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -0,0 +1,79 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUChainTrackingDEBUG.h +/// \author David Rohr + +#ifndef GPUCHAINTRACKINGDEBUG_H +#define GPUCHAINTRACKINGDEBUG_H + +#include +#include +#include + +namespace o2::gpu +{ +// NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask +enum GPUChainTrackingDebugFlags : uint32_t { + TPCSectorTrackingData = 1, + TPCPreLinks = 2, + TPCLinks = 4, + TPCStartHits = 8, + TPCTracklets = 16, + TPCSectorTracks = 32, + TPCHitWeights = 256, + TPCCompressedClusters = 512, + TPCDecompressedClusters = 1024, + TPCMergingRanges = 2048, + TPCMergingSectorTracks = 4096, + TPCMergingMergedTracks = 8192, + TPCMergingCollectedTracks = 16384, + TPCMergingCE = 32768, + TPCMergingRefit = 65536, + TPCClustererClusters = 131072, + TPCClusterer = 262144, + TPCClustererDigits = 262144 << 1, + TPCClustererPeaks = 262144 << 2, + TPCClustererSuppressedPeaks = 262144 << 3, + TPCClustererChargeMap = 262144 << 4, + TPCClustererZeroedCharges = 262144 << 5 +}; + +template +inline bool GPUChain::DoDebugAndDump(GPUChain::RecoStep step, uint32_t mask, bool transfer, T& processor, S T::*func, Args&&... args) +{ + if (GetProcessingSettings().keepAllMemory) { + if (transfer) { + TransferMemoryResourcesToHost(step, &processor, -1, true); + } + std::function lambda = [&processor, &func](Args&... args_tmp) { + if (func) { + (processor.*func)(args_tmp...); + } + }; + return DoDebugDump(mask, lambda, args...); + } + return false; +} + +template +inline bool GPUChain::DoDebugDump(uint32_t mask, std::function func, Args&... args) +{ + if (GetProcessingSettings().debugLevel >= 6 && (mask == 0 || (GetProcessingSettings().debugMask & mask))) { + func(args...); + return true; + } + return false; +} + +} // namespace o2::gpu + +#endif diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 53bdfbadd4b25..903505068ad2c 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -333,3 +333,18 @@ void GPUChainTracking::RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* cluster } } } + +void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters) +{ + out << "\nTPC Clusters:\n"; + for (uint32_t iSec = 0; iSec < GPUCA_NSECTORS; iSec++) { + out << "TPCClusters - Sector " << iSec << "\n"; + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + out << " Row: " << i << ": " << clusters->nClusters[iSec][i] << " clusters:\n"; + for (uint32_t j = 0; j < clusters->nClusters[iSec][i]; j++) { + const auto& cl = clusters->clusters[iSec][i][j]; + out << " " << std::hex << cl.timeFlagsPacked << std::dec << " " << cl.padPacked << " " << int32_t{cl.sigmaTimePacked} << " " << int32_t{cl.sigmaPadPacked} << " " << cl.qMax << " " << cl.qTot << "\n"; + } + } + } +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index a38148ccb375a..6e86be03e7950 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUDefParametersRuntime.h" #include "GPUO2DataTypes.h" @@ -72,7 +73,7 @@ void GPUChainTracking::RunTPCTrackingMerger_MergeBorderTracks(int8_t withinSecto runKernel(GetGridAuto(0, deviceType), i, withinSector, mergeMode); } } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRanges, doGPU, Merger, &GPUTPCGMMerger::DumpMergeRanges, *mDebugFile, withinSector, mergeMode); mRec->ReturnVolatileDeviceMemory(); } @@ -135,14 +136,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({{GPUCA_NSECTORS, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingSectorTracks, doGPU, Merger, &GPUTPCGMMerger::DumpSectorTracks, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -157,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -167,14 +168,14 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel({{1, -WarpSize(), 0, deviceType}}, 1); runKernel({{1, -WarpSize(), 0, deviceType}}, 1); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCollectedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpCollected, *mDebugFile); if (param().rec.tpc.mergeCE) { runKernel(GetGridAuto(0, deviceType), true); RunTPCTrackingMerger_MergeBorderTracks(-1, 1, deviceType); RunTPCTrackingMerger_MergeBorderTracks(-1, 2, deviceType); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingCE, doGPU, Merger, &GPUTPCGMMerger::DumpMergeCE, *mDebugFile); } int32_t waitForTransfer = 0; if (doGPU) { @@ -201,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -227,7 +228,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, Merger, &GPUTPCGMMerger::DumpRefit, *mDebugFile); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); @@ -240,7 +241,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); } - DoDebugAndDump(RecoStep::TPCMerging, 2048, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); if (doGPU) { RecordMarker(&mEvents->single, 0); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index 635641c00ae14..ef38d53173c2b 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -13,6 +13,7 @@ /// \author David Rohr #include "GPUChainTracking.h" +#include "GPUChainTrackingDebug.h" #include "GPULogging.h" #include "GPUO2DataTypes.h" #include "GPUMemorySizeScalers.h" @@ -176,7 +177,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 6) { *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; - if (GetProcessingSettings().debugMask & 1) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); } @@ -191,13 +192,13 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().keepDisplayMemory) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); memcpy(trk.LinkTmpMemory(), mRec->Res(trk.MemoryResLinks()).Ptr(), mRec->Res(trk.MemoryResLinks()).Size()); - if (GetProcessingSettings().debugMask & 2) { + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCPreLinks) { trk.DumpLinks(*mDebugFile, 0); } } runKernel({GetGridBlk(GPUCA_ROW_COUNT - 2, useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 4, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCLinks, trk, &GPUTPCTracker::DumpLinks, *mDebugFile, 1); runKernel({GetGridBlk(GPUCA_ROW_COUNT - 6, useStream), {iSector}}); if (mRec->getGPUParameters(doGPU).par_SORT_STARTHITS) { @@ -206,7 +207,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().deterministicGPUReconstruction) { runKernel({GetGrid(1, 1, useStream), {iSector}}); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 32, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCStartHits, trk, &GPUTPCTracker::DumpStartHits, *mDebugFile); if (GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { trk.UpdateMaxData(); @@ -215,8 +216,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } runKernel({GetGridAuto(useStream), {iSector}}); - DoDebugAndDump(RecoStep::TPCSectorTracking, 128, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); - if (GetProcessingSettings().debugMask & 256 && GetProcessingSettings().deterministicGPUReconstruction < 2) { + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCTracklets, trk, &GPUTPCTracker::DumpTrackletHits, *mDebugFile); + if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCHitWeights && GetProcessingSettings().deterministicGPUReconstruction < 2) { trk.DumpHitWeights(*mDebugFile); } @@ -230,7 +231,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() if (GetProcessingSettings().debugLevel >= 3) { GPUInfo("Sector %u, Number of tracks: %d", iSector, *trk.NTracks()); } - DoDebugAndDump(RecoStep::TPCSectorTracking, 512, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); + DoDebugAndDump(RecoStep::TPCSectorTracking, GPUChainTrackingDebugFlags::TPCSectorTracks, trk, &GPUTPCTracker::DumpTrackHits, *mDebugFile); }); mRec->SetNActiveThreadsOuterLoop(1); if (error) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h index 37399f5e4863f..35e2a7297338f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h @@ -36,6 +36,7 @@ class ConstMCTruthContainerView; namespace tpc { struct ClusterNative; +struct ClusterNativeAccess; class Digit; } // namespace tpc diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx index da30375149b7c..d676cf9cd3887 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinderDump.cxx @@ -157,7 +157,7 @@ void GPUTPCClusterFinder::DumpClusters(std::ostream& out) { out << "\nClusterer - Clusters - Sector " << mISector << " - Fragment " << mPmemory->fragment.index << "\n"; - for (int32_t i = 0; i < GPUCA_ROW_COUNT; i++) { + for (uint32_t i = 0; i < GPUCA_ROW_COUNT; i++) { size_t N = mPclusterInRow[i]; const tpc::ClusterNative* row = &mPclusterByRow[i * mNMaxClusterPerRow]; From f1fbd3547c5f944b35e114a04a06f8a80367dcea Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Thu, 8 May 2025 21:41:40 +0200 Subject: [PATCH 0479/1914] ctpdev: consistencycheck debug and ctpcfg config added (#14247) * dev: checkConsistency and ctp.cfg * dev: consistency checker * dev:ctpcfg to CCDB * clang * dev:ctpcfg * clang * fix * fix --------- Co-authored-by: Roman Lietava --- .../include/DataFormatsCTP/Configuration.h | 13 +++ .../Detectors/CTP/src/Configuration.cxx | 65 +++++++++++ .../Detectors/CTP/src/DataFormatsCTPLinkDef.h | 2 + .../CTPReconstruction/RawDataDecoder.h | 14 ++- .../CTP/reconstruction/src/RawDataDecoder.cxx | 107 ++++++++---------- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 17 ++- .../include/CTPWorkflowScalers/RunManager.h | 2 +- .../CTPWorkflowScalers/ctpCCDBManager.h | 7 +- .../CTP/workflowScalers/src/RunManager.cxx | 10 +- .../CTP/workflowScalers/src/ctp-proxy.cxx | 7 +- .../workflowScalers/src/ctpCCDBManager.cxx | 30 +++++ 11 files changed, 205 insertions(+), 69 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index 4ff0256f33827..fdd73986f1eaf 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -173,6 +173,8 @@ class CTPConfiguration uint64_t getDecrtiptorInputsMask(const std::string& name) const; std::map> getDet2InputMap(); uint64_t getTriggerClassMask() const; + uint64_t getTriggerClassMaskWInputs() const; + uint64_t getTriggerClassMaskWInputsNoTrgDets() const; std::vector getTriggerClassList() const; uint32_t getRunNumber() { return mRunNumber; }; std::vector getDetectorList() const; @@ -203,6 +205,17 @@ class CTPConfiguration std::ostream& operator<<(std::ostream& in, const CTPConfiguration& conf); +struct CtpCfg { + CtpCfg() = default; + std::string filename = "ctp.cfg"; + int readAndSave(std::string& path); + uint32_t TFOrbits = 0; + int ccdb = -1; // -1 means def constructor was called + uint32_t orbitShift = 0; + uint32_t irInputs_1_24 = 0; + uint32_t irInputs_25_48 = 0; + ClassDefNV(CtpCfg, 1) +}; } // namespace ctp } // namespace o2 #endif //_CTP_CONFIGURATION_H_ diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 03f9b38db0e9f..38a49132db3d1 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -905,6 +905,30 @@ uint64_t CTPConfiguration::getTriggerClassMask() const } return clsmask; } +uint64_t CTPConfiguration::getTriggerClassMaskWInputs() const +{ + uint64_t clsmask = 0; + for (auto const& cls : mCTPClasses) { + if (cls.name.find("TRUE") != std::string::npos) { // ignoring internal ctp generators + continue; + } + clsmask |= cls.classMask; + } + return clsmask; +} +uint64_t CTPConfiguration::getTriggerClassMaskWInputsNoTrgDets() const +{ + uint64_t clsmask = 0; + for (auto const& cls : mCTPClasses) { + bool exclude = cls.name.find("TRUE") != std::string::npos; // ignoring internal ctp generators + exclude += cls.name.find("EMC") != std::string::npos; + exclude += cls.name.find("TRD") != std::string::npos; + exclude += cls.name.find("HMP") != std::string::npos; + if (!exclude) + clsmask |= cls.classMask; + } + return clsmask; +} // Hardware positions of classes std::vector CTPConfiguration::getTriggerClassList() const { @@ -1153,6 +1177,47 @@ int CTPInputsConfiguration::getInputIndexFromName(std::string& name) return 0xff; } +int CtpCfg::readAndSave(std::string& path) +{ + std::string file = path + filename; + std::ifstream ctpcfg(file); + if (ctpcfg.is_open()) { + std::string line; + while (std::getline(ctpcfg, line)) { + o2::utils::Str::trim(line); + if (line.size() == 0) { + continue; + } + if (line[0] == '#') { + continue; + } + std::vector tokens = o2::utils::Str::tokenize(line, ' '); + size_t ntokens = tokens.size(); + if (ntokens < 2) { + LOG(warn) << "Not enough tokens"; + continue; + } + if (tokens[0].find("TForbits") != std::string::npos) { + TFOrbits = std::atol(tokens[1].c_str()); + } else if (tokens[0].find("ccdb") != std::string::npos) { + ccdb = std::atoi(tokens[1].c_str()); + } else if (tokens[0].find("orbitshift") != std::string::npos) { + orbitShift = std::atol(tokens[1].c_str()); + } else if (tokens[0].find("ir_inputs") != std::string::npos) { + irInputs_1_24 = std::stoul(tokens[2].c_str(), nullptr, 16); + irInputs_25_48 = std::stoul(tokens[1].c_str(), nullptr, 16); + } else { + LOG(warn) << " Token not found:" << tokens[0]; + } + } + LOG(warn) << "Open file success:" << file; + } else { + LOG(warn) << "Can not open file:" << file; + return 1; + } + return 0; +} + std::ostream& o2::ctp::operator<<(std::ostream& in, const o2::ctp::CTPConfiguration& conf) { conf.printStream(in); diff --git a/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h b/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h index da21f779723f8..ac2a83d31edda 100644 --- a/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h +++ b/DataFormats/Detectors/CTP/src/DataFormatsCTPLinkDef.h @@ -55,4 +55,6 @@ #pragma link C++ class o2::ctp::TriggerOffsetsParam + ; #pragma link C++ class o2::conf::ConfigurableParamHelper < o2::ctp::TriggerOffsetsParam> + ; +#pragma link C++ class o2::ctp::CtpCfg + ; + #endif diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 7579e9dc1d6f5..8ebc7e0304561 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -55,7 +55,12 @@ class RawDataDecoder int init(); static int shiftNew(const o2::InteractionRecord& irin, uint32_t TFOrbit, std::bitset<48>& inpmask, int64_t shift, int level, std::map& digmap); static int shiftInputs(std::map& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask = 0xffffffffffffffff); - int checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask = 0xffffffffffffffff); + int checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask = 0xffffffffffffffff, uint64_t trigclassmaskNoTrgDets = 0xffffffffffffffff); + std::array getClassErrorsA() { return mClassErrorsA; } + std::array getClassErrorsB() { return mClassErrorsB; } + std::array getClassCountersA() { return mClassCountersA; } + std::array getClassCountersB() { return mClassCountersB; } + int getLostDueToShift() { return mLostDueToShift; } private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; @@ -83,8 +88,13 @@ class RawDataDecoder // error verbosness int mErrorIR = 0; int mErrorTCR = 0; - int mErrorMax = 3; + int mErrorMax = 5; bool mStickyError = false; + std::array mClassErrorsA{}; + std::array mClassErrorsB{}; // from inputs + std::array mClassCountersA{}; + std::array mClassCountersB{}; // from inputs + int mLostDueToShift = 0; CTPConfiguration mCTPConfig; }; } // namespace ctp diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index fa7fd673c7e85..797dd0e300519 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -294,13 +294,17 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask) { // int nClasswoInp = 0; // counting classes without input which should never happen - int nLM = 0; - int nL0 = 0; - int nL1 = 0; - int nTwI = 0; - int nTwoI = 0; - int nTwoIlost = 0; std::map digitsMapShifted; auto L0shift = o2::ctp::TriggerOffsetsParam::Instance().LM_L0; auto L1shift = L0shift + o2::ctp::TriggerOffsetsParam::Instance().L0_L1; @@ -593,86 +591,77 @@ int RawDataDecoder::shiftInputs(std::map& digit } } for (auto const& dig : digitsMapShifted) { - auto d = dig.second; - if ((d.CTPInputMask & LMMASKInputs).count()) { - nLM++; - } - if ((d.CTPInputMask & L0MASKInputs).count()) { - nL0++; - } - if ((d.CTPInputMask & L1MASKInputs).count()) { - nL1++; - } - if ((d.CTPClassMask).to_ulong() & trgclassmask) { - if (d.CTPInputMask.count()) { - nTwI++; - } else { - if (d.intRecord.bc == (o2::constants::lhc::LHCMaxBunches - L1shift)) { // input can be lost because latency class-l1input = 1 - nTwoIlost++; - } else { - // LOG(error) << d.intRecord << " " << d.CTPClassMask << " " << d.CTPInputMask; - // std::cout << "ERROR:" << std::hex << d.CTPClassMask << " " << d.CTPInputMask << std::dec << std::endl; - nTwoI++; - } - } - } digits.push_back(dig.second); } - int ret = 0; - if (nTwoI) { // Trigger class wo Input - LOG(error) << "LM:" << nLM << " L0:" << nL0 << " L1:" << nL1 << " TwI:" << nTwI << " Trigger classes wo input:" << nTwoI; - ret = 64; - } - if (nTwoIlost) { - LOG(warn) << " Trigger classes wo input from diff latency 1:" << nTwoIlost; - } - return ret; + return 0; } // -int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask) +int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask, uint64_t trgclassmaskNoTrgDet) { + LOG(debug) << "Checking readout"; int ret = 0; - int lost = 0; + static int nerror = 0; for (auto const& digit : digits) { // if class mask => inps for (int i = 0; i < digit.CTPClassMask.size(); i++) { - if (digit.CTPClassMask[i] & trgclassmask) { + bool trgcls = trgclassmask & (1ull << i); + if (digit.CTPClassMask[i] & trgcls) { const CTPClass* cls = mCTPConfig.getCTPClassFromHWIndex(i); if (cls == nullptr) { - LOG(error) << "Class mask index not found in CTP config:" << i; + if (nerror < mErrorMax) { + LOG(error) << "Class mask index not found in CTP config:" << i; + nerror++; + } ret = 128; continue; } + mClassCountersA[i]++; + if (cls->descriptor == nullptr) + continue; uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { - LOG(error) << "CTP class:" << cls->name << " inpmask:" << clsinpmask << " not compatible with inputs mask:" << diginpmask; + if (nerror < mErrorMax) { + LOG(error) << "Cls=>Inps: CTP class:" << cls->name << " inpmask:" << clsinpmask << " not compatible with inputs mask:" << diginpmask; + nerror++; + } + mClassErrorsA[i]++; ret = 128; } } } // if inps => class mask for (auto const& cls : mCTPConfig.getCTPClasses()) { - uint64_t clsinpmask = cls.descriptor->getInputsMask(); + // cls.printStream(std::cout); + if (cls.descriptor == nullptr) + continue; + uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition uint64_t diginpmask = digit.CTPInputMask.to_ullong(); uint64_t digclsmask = digit.CTPClassMask.to_ullong(); if ((clsinpmask & diginpmask) == clsinpmask) { - if ((cls.classMask & digclsmask) == 0) { - int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; - int32_t offset = BCShiftCorrection + o2::ctp::TriggerOffsetsParam::Instance().LM_L0 + o2::ctp::TriggerOffsetsParam::Instance().L0_L1_classes - 1; - offset = o2::constants::lhc::LHCMaxBunches - offset; - if (digit.intRecord.bc < offset) { - LOG(error) << "CTP class:" << cls.name << " inpmask:" << clsinpmask << " cls mask:" << cls.classMask << " not found in digit:" << digit; - ret = 256; - } else { - lost++; + if (cls.classMask & trgclassmask) { + mClassCountersB[cls.getIndex()]++; + if ((cls.classMask & digclsmask) == 0) { + int32_t BCShiftCorrection = -o2::ctp::TriggerOffsetsParam::Instance().customOffset[o2::detectors::DetID::CTP]; + int32_t offset = BCShiftCorrection + o2::ctp::TriggerOffsetsParam::Instance().LM_L0 + o2::ctp::TriggerOffsetsParam::Instance().L0_L1_classes - 1; + offset = o2::constants::lhc::LHCMaxBunches - offset; + if (digit.intRecord.bc < offset) { + if ((nerror < mErrorMax) && (cls.classMask & ~trgclassmaskNoTrgDet)) { + LOG(info) << "Inp=>Cls: CTP class:" << cls.name << " inpmask:" << clsinpmask << " cls mask:" << cls.classMask << " not found in digit:" << digit; + nerror++; + } + mClassErrorsB[cls.getIndex()]++; + ret = 256; + } else { + mLostDueToShift++; + } } } } } } - if (lost) { - LOG(info) << "LOST classes because of shift:" << lost; + if (mLostDueToShift) { + LOG(debug) << "LOST classes because of shift:" << mLostDueToShift; } return ret; } diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 753f88114a14b..3f7c729b351a3 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -69,8 +69,21 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) o0 = TFOrbits[i]; } std::cout << std::endl; - std::cout << "Number of missing TF:" << nmiss << std::endl; - std::cout << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); + LOG(info) << "Number of missing TF:" << nmiss << std::endl; + if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) + LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + std::array clsA = mDecoder.getClassCountersA(); + std::array clsB = mDecoder.getClassCountersB(); + std::array clsEA = mDecoder.getClassErrorsA(); + std::array clsEB = mDecoder.getClassErrorsB(); + + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; + if (clsEA[i]) + LOG(error) << " Class without inputs:"; + LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; + } } void RawDecoderSpec::run(framework::ProcessingContext& ctx) { diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h index 72fb9c2056367..6d2172e3da165 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/RunManager.h @@ -72,7 +72,7 @@ class CTPRunManager : public ctpCCDBManager int mEOX = 0; // redundancy check int mNew = 1; // 1 - no CCDB: used for QC int mQCWritePeriod = 3; // Time in 10secs between two writes to QCCD - ClassDefNV(CTPRunManager, 7); + ClassDefNV(CTPRunManager, 8); }; } // namespace ctp } // namespace o2 diff --git a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h index c968a83183624..4237ad4501fcc 100644 --- a/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h +++ b/Detectors/CTP/workflowScalers/include/CTPWorkflowScalers/ctpCCDBManager.h @@ -29,11 +29,13 @@ class ctpCCDBManager int saveRunConfigToCCDB(CTPConfiguration* cfg, long timeStart); int saveSoxOrbit(uint32_t runNumber, uint32_t soxOrbit, long timeStart); int saveOrbitReset(long timeStamp); + int saveCtpCfg(uint32_t runNumber, long timeStamp); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run, bool& ok); static CTPConfiguration getConfigFromCCDB(long timestamp, std::string run); CTPRunScalers getScalersFromCCDB(long timestamp, std::string, bool& ok); static void setCCDBHost(std::string host) { mCCDBHost = host; }; static void setQCDBHost(std::string host) { mQCDBHost = host; }; + void setCtpCfgDir(std::string& ctpcfgdir) { mCtpCfgDir = ctpcfgdir; }; protected: /// Database constants @@ -46,7 +48,10 @@ class ctpCCDBManager const std::string mQCDBPathCTPScalers = "qc/CTP/Scalers"; const std::string mCCDBPathSoxOrbit = "CTP/Calib/FirstRunOrbit"; const std::string mCCDBPathOrbitReset = "CTP/Calib/OrbitReset"; - ClassDefNV(ctpCCDBManager, 1); + const std::string mCCDBPathCtpCfg = "CTP/Config/CtpCfg"; + std::string mCtpCfgDir; + + ClassDefNV(ctpCCDBManager, 2); }; } // namespace ctp } // namespace o2 diff --git a/Detectors/CTP/workflowScalers/src/RunManager.cxx b/Detectors/CTP/workflowScalers/src/RunManager.cxx index 5d0b906e28088..054505aea7ba6 100644 --- a/Detectors/CTP/workflowScalers/src/RunManager.cxx +++ b/Detectors/CTP/workflowScalers/src/RunManager.cxx @@ -87,6 +87,7 @@ void CTPRunManager::init() LOG(info) << "QCDB writing every:" << mQCWritePeriod << " 10 secs"; LOG(info) << "CCDB host:" << mCCDBHost; LOG(info) << "CTP vNew cfg:" << mNew; + LOG(info) << "ctp.cfg dir:" << mCtpCfgDir; LOG(info) << "CTPRunManager initialised."; } int CTPRunManager::loadRun(const std::string& cfg) @@ -106,7 +107,7 @@ int CTPRunManager::loadRun(const std::string& cfg) timeStamp = (tt * 1000.); LOG(info) << "Timestamp file:" << timeStamp; cfgmod = cfg.substr(pos, cfg.size()); - LOG(info) << "ctpcfg: using ctp time"; + LOG(info) << "ctpconfig: using ctp time"; } } CTPActiveRun* activerun = new CTPActiveRun; @@ -122,7 +123,9 @@ int CTPRunManager::loadRun(const std::string& cfg) // mRunsLoaded[runnumber] = activerun; saveRunConfigToCCDB(&activerun->cfg, timeStamp); - + if (mCtpCfgDir != "none") { + saveCtpCfg(runnumber, timeStamp); + } return 0; } int CTPRunManager::setRunConfigBK(uint32_t runNumber, const std::string& cfg) @@ -272,6 +275,9 @@ int CTPRunManager::processMessage(std::string& topic, const std::string& message } return ret; } + if (topic.find("rocnts") != std::string::npos) { + return 0; + } static int nerror = 0; if (topic.find("sox") != std::string::npos) { // get config diff --git a/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx b/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx index b2896215d1c6a..f8f8ad3c95fbb 100644 --- a/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx +++ b/Detectors/CTP/workflowScalers/src/ctp-proxy.cxx @@ -46,13 +46,14 @@ #include "BookkeepingApi/BkpClient.h" using namespace o2::framework; using DetID = o2::detectors::DetID; -InjectorFunction dcs2dpl(std::string& ccdbhost, std::string& bkhost, std::string& qchost, int qcwriteperiod) +InjectorFunction dcs2dpl(std::string& ccdbhost, std::string& bkhost, std::string& qchost, int qcwriteperiod, std::string& ctpcfgdir) { auto runMgr = std::make_shared(); runMgr->setCCDBHost(ccdbhost); runMgr->setBKHost(bkhost); runMgr->setQCDBHost(qchost); runMgr->setQCWritePeriod(qcwriteperiod); + runMgr->setCtpCfgDir(ctpcfgdir); runMgr->init(); // runMgr->setClient(client); return [runMgr](TimingInfo&, ServiceRegistryRef const& services, fair::mq::Parts& parts, ChannelRetriever channelRetriever, size_t newTimesliceId, bool& stop) -> bool { @@ -78,6 +79,7 @@ void customize(std::vector& workflowOptions) workflowOptions.push_back(ConfigParamSpec{"ccdb-host", VariantType::String, "http://o2-ccdb.internal:8080", {"ccdb host"}}); workflowOptions.push_back(ConfigParamSpec{"bk-host", VariantType::String, "none", {"bk host"}}); workflowOptions.push_back(ConfigParamSpec{"qc-host", VariantType::String, "none", {"qc host"}}); + workflowOptions.push_back(ConfigParamSpec{"ctpcfg-dir", VariantType::String, "none", {"ctp.cfg file directory"}}); workflowOptions.push_back(ConfigParamSpec{"qc-writeperiod", VariantType::Int, 30, {"Period of writing to QCDB in units of 10secs, default = 30 (5 mins)"}}); } @@ -104,6 +106,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) std::string bkhost = config.options().get("bk-host"); std::string qchost = config.options().get("qc-host"); int qcwriteperiod = config.options().get("qc-writeperiod"); + std::string ctpcfgdir = config.options().get("ctpcfg-dir"); if (chan.empty()) { throw std::runtime_error("input channel is not provided"); } @@ -118,7 +121,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& config) std::move(ctpCountersOutputs), // this is just default, can be overriden by --ctp-config-proxy '--channel-config..' chan.c_str(), - dcs2dpl(ccdbhost, bkhost, qchost, qcwriteperiod)); + dcs2dpl(ccdbhost, bkhost, qchost, qcwriteperiod, ctpcfgdir)); ctpProxy.labels.emplace_back(DataProcessorLabel{"input-proxy"}); LOG(info) << "===> Proxy done"; WorkflowSpec workflow; diff --git a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx index cbe8fe5dd675f..58850d88eb2c6 100644 --- a/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx +++ b/Detectors/CTP/workflowScalers/src/ctpCCDBManager.cxx @@ -167,6 +167,36 @@ int ctpCCDBManager::saveOrbitReset(long timeStamp) } return 0; } +int ctpCCDBManager::saveCtpCfg(uint32_t runNumber, long timeStart) +{ + if (mCCDBHost == "none") { + LOG(info) << "CtpCfg not written to CCDB none"; + return 0; + } + CtpCfg ctpcfg; + int ret = ctpcfg.readAndSave(mCtpCfgDir); + if (ret == 0) { + using namespace std::chrono_literals; + std::chrono::seconds days3 = 259200s; + std::chrono::seconds min10 = 600s; + long time3days = std::chrono::duration_cast(days3).count(); + long time10min = std::chrono::duration_cast(min10).count(); + long tmin = timeStart - time10min; + long tmax = timeStart + time3days; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + metadata["runNumber"] = std::to_string(runNumber); + api.init(mCCDBHost.c_str()); // or http://localhost:8080 for a local installation + // store abitrary user object in strongly typed manner + ret = api.storeAsTFileAny(&ctpcfg, mCCDBPathCtpCfg, metadata, tmin, tmax); + if (ret == 0) { + LOG(info) << "CtpCfg saved in ccdb:" << mCCDBHost << " tmin:" << tmin << " tmax:" << tmax; + } else { + LOG(error) << "CtpCfg Problem writing to database ret:" << ret; + } + } + return ret; +} CTPConfiguration ctpCCDBManager::getConfigFromCCDB(long timestamp, std::string run, bool& ok) { auto& mgr = o2::ccdb::BasicCCDBManager::instance(); From edea1644f2e9190af8aa8c50f372dc23a5902155 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 21:16:48 +0200 Subject: [PATCH 0480/1914] GPU: Add debugSuffix option for debug files --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 6 +++--- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 638a3ed43d2aa..9e0aa32155f0d 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -296,6 +296,7 @@ AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GP AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") AddOption(deterministicGPUReconstruction, int32_t, -1, "", 0, "Make CPU and GPU debug output comparable (sort / skip concurrent parts), -1 = automatic if debugLevel >= 6", def(1)) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index f8d4165477220..c1c3e368ce90c 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -378,7 +378,7 @@ int32_t GPUChainTracking::Init() } if (GetProcessingSettings().debugLevel >= 6) { - std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + ".out"; + std::string filename = std::string(mRec->IsGPU() ? "GPU" : "CPU") + (mRec->slaveId() != -1 ? (std::string("_slave") + std::to_string(mRec->slaveId())) : std::string(mRec->slavesExist() ? "_master" : "")) + GetProcessingSettings().debugLogSuffix + ".out"; mDebugFile->open(filename.c_str()); } @@ -838,7 +838,7 @@ int32_t GPUChainTracking::RunChainFinalize() int32_t iKey; do { - Sleep(10); + usleep(10000); if (GetProcessingSettings().eventDisplay->EnableSendKey()) { iKey = kbhit() ? getch() : 0; if (iKey == 27) { @@ -847,7 +847,7 @@ int32_t GPUChainTracking::RunChainFinalize() break; } else if (iKey) { while (GetProcessingSettings().eventDisplay->getSendKey() != 0) { - Sleep(1); + usleep(1000); } GetProcessingSettings().eventDisplay->setSendKey(iKey); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 5e7672022b3ff..035e257ca7952 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -47,8 +47,6 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" -#include "utils/linux_helpers.h" - using namespace o2::gpu; #include "GPUO2DataTypes.h" From 408bae4d0040288a7740d6b0b9328ea41e9585ff Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 20:53:50 +0200 Subject: [PATCH 0481/1914] GPU: Add sorting of tracks of attached compressed clusters in deterministic mode --- .../DataCompression/GPUTPCCompression.cxx | 26 ++++++------ GPU/GPUTracking/Global/GPUChainTracking.h | 1 + .../Global/GPUChainTrackingCompression.cxx | 4 ++ .../GPUChainTrackingDebugAndProfiling.cxx | 42 +++++++++++++++++++ 4 files changed, 61 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index a107f749ddd77..82834a694d0ba 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -140,7 +140,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) for (uint32_t i = 0; i < NSECTORS; i++) { out << "Sector " << i << ": "; for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - out << O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] << ", "; + out << (O.nSliceRowClusters ? O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j] : 0) << ", "; } out << "\n"; } @@ -153,18 +153,20 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) } out << "\n\nUnattached Clusters\n"; uint32_t offset = 0; - for (uint32_t i = 0; i < NSECTORS; i++) { - for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - out << "Sector " << i << " Row " << j << ": "; - for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { - if (k && k % 10 == 0) { - out << "\n "; + if (O.nSliceRowClusters) { + for (uint32_t i = 0; i < NSECTORS; i++) { + for (uint32_t j = 0; j < GPUCA_ROW_COUNT; j++) { + out << "Sector " << i << " Row " << j << ": "; + for (uint32_t k = 0; k < O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; k++) { + if (k && k % 10 == 0) { + out << "\n "; + } + const uint32_t l = k + offset; + out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; } - const uint32_t l = k + offset; - out << "[" << (uint32_t)O.qTotU[l] << ", " << (uint32_t)O.qMaxU[l] << ", " << (uint32_t)O.flagsU[l] << ", " << (int32_t)O.padDiffU[l] << ", " << (int32_t)O.timeDiffU[l] << ", " << (uint32_t)O.sigmaPadU[l] << ", " << (uint32_t)O.sigmaTimeU[l] << "] "; + offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; + out << "\n"; } - offset += O.nSliceRowClusters[i * GPUCA_ROW_COUNT + j]; - out << "\n"; } } out << "\n\nAttached Clusters\n"; @@ -175,7 +177,7 @@ void GPUTPCCompression::DumpCompressedClusters(std::ostream& out) if (k && k % 10 == 0) { out << "\n "; } - const uint32_t l1 = k + offset, l2 = k + offset - i; + const uint32_t l1 = offset + k, l2 = offset - i + k - 1; out << "["; if (k) { out << (int32_t)O.rowDiffA[l2] << ", " << (int32_t)O.sliceLegDiffA[l2] << ", " << (uint32_t)O.padResA[l2] << ", " << (uint32_t)O.timeResA[l2] << ", "; diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 13773a97d4e3d..2a2996895dbcf 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -235,6 +235,7 @@ class GPUChainTracking : public GPUChain void PrintDebugOutput(); void PrintOutputStat(); static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); + static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx index 534c02a4c0a84..3bcd2390eae52 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx @@ -203,6 +203,10 @@ int32_t GPUChainTracking::RunTPCCompression() ((GPUChainTracking*)GetNextChainInQueue())->mRec->BlockStackedMemory(mRec); } mRec->PopNonPersistentMemory(RecoStep::TPCCompression, qStr2Tag("TPCCOMPR")); + if (GetProcessingSettings().deterministicGPUReconstruction) { + SynchronizeGPU(); + DebugSortCompressedClusters(Compressor.mOutputFlat); + } DoDebugAndDump(RecoStep::TPCCompression, GPUChainTrackingDebugFlags::TPCCompressedClusters, Compressor, &GPUTPCCompression::DumpCompressedClusters, *mDebugFile); return 0; } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index 903505068ad2c..00cf127162b94 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -20,6 +20,7 @@ #include #include #include +#include #ifdef GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE #include "bitmapfile.h" @@ -348,3 +349,44 @@ void GPUChainTracking::DumpClusters(std::ostream& out, const o2::tpc::ClusterNat } } } + +void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls) +{ + o2::tpc::CompressedClusters c = *cls; + std::vector sorted(c.nTracks), offsets(c.nTracks); + std::iota(sorted.begin(), sorted.end(), 0); + auto sorter = [&c](const auto a, const auto b) { + return std::tie(c.sliceA[a], c.rowA[a], c.timeA[a], c.padA[a], c.qPtA[a]) < + std::tie(c.sliceA[b], c.rowA[b], c.timeA[b], c.padA[b], c.qPtA[b]); + }; + std::sort(sorted.begin(), sorted.end(), sorter); + uint32_t offset = 0; + for (uint32_t i = 0; i < c.nTracks; i++) { + offsets[i] = offset; + offset += c.nTrackClusters[i]; + } + + auto sortArray = [&c, &sorted, &offsets](auto* src, size_t totalSize, auto getOffset, auto getSize) { + auto buf = std::make_unique[]>(totalSize); + memcpy(buf.get(), src, totalSize * sizeof(*src)); + uint32_t targetOffset = 0; + for (uint32_t i = 0; i < c.nTracks; i++) { + const uint32_t j = sorted[i]; + memcpy(src + targetOffset, buf.get() + getOffset(offsets[j], j), getSize(j) * sizeof(*src)); + targetOffset += getSize(j); + } + }; + auto sortMultiple = [&sortArray](size_t totalSize, auto getOffset, auto getSize, auto&&... arrays) { + (..., sortArray(std::forward(arrays), totalSize, getOffset, getSize)); + }; + auto getFullOffset = [](uint32_t off, uint32_t ind) { return off; }; + auto getReducedOffset = [](uint32_t off, uint32_t ind) { return off - ind; }; + auto getIndex = [](uint32_t off, uint32_t ind) { return ind; }; + auto getN = [&c](uint32_t j) { return c.nTrackClusters[j]; }; + auto getN1 = [&c](uint32_t j) { return c.nTrackClusters[j] - 1; }; + auto get1 = [](uint32_t j) { return 1; }; + + sortMultiple(c.nAttachedClusters, getFullOffset, getN, c.qTotA, c.qMaxA, c.flagsA, c.sigmaPadA, c.sigmaTimeA); + sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA); + sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above! +} From 64dd944342374edfef4da03b1e5b64c2cfb5bc3f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 23:26:22 +0200 Subject: [PATCH 0482/1914] GPU: Make GPUCommonAlgorithm::sortInBlock deterministic with GPUCA_DETERMINISTIC_MODE --- GPU/Common/GPUCommonAlgorithm.h | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index 417c9e0d1f8c1..d0643391246a8 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -283,21 +283,29 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end, comp); #else - int32_t n = end - begin; - for (int32_t i = 0; i < n; i++) { - for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { - int32_t offset = i % 2; - int32_t curPos = 2 * tIdx + offset; - int32_t nextPos = curPos + 1; - - if (nextPos < n) { - if (!comp(begin[curPos], begin[nextPos])) { - IterSwap(&begin[curPos], &begin[nextPos]); + GPUCA_DETERMINISTIC_CODE( // clang-format off + GPUbarrier(); + if (get_local_id(0) == 0) { + GPUCommonAlgorithm::sort(begin, end, comp); + } + GPUbarrier(); + , // !GPUCA_DETERMINISTIC_CODE + int32_t n = end - begin; + for (int32_t i = 0; i < n; i++) { + for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { + int32_t offset = i % 2; + int32_t curPos = 2 * tIdx + offset; + int32_t nextPos = curPos + 1; + + if (nextPos < n) { + if (!comp(begin[curPos], begin[nextPos])) { + IterSwap(&begin[curPos], &begin[nextPos]); + } } } + GPUbarrier(); } - GPUbarrier(); - } + ) // clang-format on #endif } From fff75cfa60148feb0a51b524e956aeec7167a572 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 8 May 2025 23:51:00 +0200 Subject: [PATCH 0483/1914] GPU: Use total sorting in deterministic mode for unattached clusters --- .../GPUTPCCompressionKernels.cxx | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 5dbbf63ca8264..3b88c8764d0fd 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -148,19 +148,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<0>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<1>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { return mClsPtr[a].padPacked < mClsPtr[b].padPacked; } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { if (mClsPtr[a].getTimePacked() >> 3 == mClsPtr[b].getTimePacked() >> 3) { return mClsPtr[a].padPacked < mClsPtr[b].padPacked; @@ -169,7 +169,7 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::opera } template <> -GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::operator()(uint32_t a, uint32_t b) const +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare::operator()(uint32_t a, uint32_t b) const { if (mClsPtr[a].padPacked >> 3 == mClsPtr[b].padPacked >> 3) { return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); @@ -177,6 +177,18 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::opera return mClsPtr[a].padPacked < mClsPtr[b].padPacked; } +template <> // Deterministic comparison +GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::operator()(uint32_t a, uint32_t b) const +{ + if (mClsPtr[a].getTimePacked() != mClsPtr[b].getTimePacked()) { + return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked(); + } + if (mClsPtr[a].padPacked != mClsPtr[b].padPacked) { + return mClsPtr[a].padPacked < mClsPtr[b].padPacked; + } + return mClsPtr[a].qTot < mClsPtr[b].qTot; +} + template <> GPUdii() void GPUTPCCompressionKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors) { @@ -261,15 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } + , // !GPUCA_DETERMINISTIC_CODE + if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } + ) // clang-format on GPUbarrier(); } From b1c76a5365321adbe8f61bd8085ccb258a6304f1 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Fri, 9 May 2025 11:40:19 +0200 Subject: [PATCH 0484/1914] fix: remove debug --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 797dd0e300519..74bd08ce943ee 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -302,8 +302,7 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector Date: Fri, 9 May 2025 12:18:39 +0200 Subject: [PATCH 0485/1914] Avoid including BasicCCDBManager in headers exposed to ROOT. (#14254) Root will happily embed references to curl.h and / or the kernel headers if found on the build machine and die if they are not there on the node. --- .../CTP/include/DataFormatsCTP/CTPRateFetcher.h | 12 +++++++----- DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx | 1 + 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h index 6b7802feb15ad..78c4245b16b20 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/CTPRateFetcher.h @@ -14,14 +14,16 @@ #include -#include "CCDB/BasicCCDBManager.h" #include "DataFormatsParameters/GRPLHCIFData.h" #include "DataFormatsCTP/Configuration.h" #include "DataFormatsCTP/Scalers.h" -namespace o2 +namespace o2::ccdb { -namespace ctp +class BasicCCDBManager; +} + +namespace o2::ctp { class CTPRateFetcher @@ -54,7 +56,7 @@ class CTPRateFetcher o2::parameters::GRPLHCIFData mLHCIFdata{}; ClassDefNV(CTPRateFetcher, 1); }; -} // namespace ctp -} // namespace o2 +} // namespace o2::ctp + #endif // COMMON_CCDB_CTPRATEFETCHER_H_ diff --git a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx index 67e59aad3ea24..d899fcafec47d 100644 --- a/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx +++ b/DataFormats/Detectors/CTP/src/CTPRateFetcher.cxx @@ -10,6 +10,7 @@ // or submit itself to any jurisdiction. #include "DataFormatsCTP/CTPRateFetcher.h" +#include "CCDB/BasicCCDBManager.h" #include #include From fb08c353c9118f73c75fd4a41c1545ca541aef57 Mon Sep 17 00:00:00 2001 From: ariedel-cern <85537041+ariedel-cern@users.noreply.github.com> Date: Fri, 9 May 2025 19:23:07 +0200 Subject: [PATCH 0486/1914] TPC QC: Add getter for occupancy to Cluster class (#14251) Feat: add getOccupancy method to Cluster class --- Detectors/TPC/qc/include/TPCQC/Clusters.h | 6 ++++-- Detectors/TPC/qc/src/Clusters.cxx | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Detectors/TPC/qc/include/TPCQC/Clusters.h b/Detectors/TPC/qc/include/TPCQC/Clusters.h index ca9c53a43b3f6..b61c27e8bf0e6 100644 --- a/Detectors/TPC/qc/include/TPCQC/Clusters.h +++ b/Detectors/TPC/qc/include/TPCQC/Clusters.h @@ -17,10 +17,10 @@ #ifndef AliceO2_TPC_CLUSTERS_H #define AliceO2_TPC_CLUSTERS_H -//root includes +// root includes #include "TCanvas.h" -//o2 includes +// o2 includes #include "TPCBase/CalDet.h" #include "TPCBase/Sector.h" #include "DataFormatsTPC/Defs.h" @@ -75,6 +75,8 @@ class Clusters CalPad& getSigmaPad() { return mSigmaPad; } CalPad& getTimeBin() { return mTimeBin; } + CalPad getOccupancy(int nHBFPerTF = 32); + void endTF() { ++mProcessedTFs; } size_t getProcessedTFs() { return mProcessedTFs; } diff --git a/Detectors/TPC/qc/src/Clusters.cxx b/Detectors/TPC/qc/src/Clusters.cxx index 3a7b1d8481b16..4bf59ced195ed 100644 --- a/Detectors/TPC/qc/src/Clusters.cxx +++ b/Detectors/TPC/qc/src/Clusters.cxx @@ -22,8 +22,10 @@ #include "TPCBase/ROC.h" #include "TPCBase/CRU.h" #include "TPCBase/Mapper.h" +#include "TPCBase/ParameterElectronics.h" #include "DataFormatsTPC/ClusterNative.h" #include "DataFormatsTPC/KrCluster.h" +#include "CommonConstants/LHCConstants.h" ClassImp(o2::tpc::qc::Clusters); @@ -152,6 +154,13 @@ void Clusters::reset() mProcessedTFs = 0; } +//______________________________________________________________________________ +o2::tpc::CalPad Clusters::getOccupancy(int nHBFPerTF) +{ + o2::tpc::CalPad occupancy = mNClusters; + occupancy /= float(mProcessedTFs * (o2::constants::lhc::LHCMaxBunches * nHBFPerTF) / float(o2::tpc::ParameterElectronics::TIMEBININBC)); + return occupancy; +} //______________________________________________________________________________ void Clusters::merge(Clusters& clusters) { From a917b6b5c387e4a6ec2a232b317d5e7468b99091 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 9 May 2025 19:56:48 +0200 Subject: [PATCH 0487/1914] Add TPC occupancy in 10bins/drift in trackStudy --- .../study/src/TrackingStudy.cxx | 76 ++++++++++++------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index f206c43f7f57a..a74349bdeba15 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -95,7 +95,7 @@ class TrackingStudySpec : public Task std::unique_ptr mDBGOut; std::unique_ptr mDBGOutVtx; std::unique_ptr mTPCRefitter; ///< TPC refitter used for TPC tracks refit during the reconstruction - std::vector mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength + std::vector mMltHistTB, mTBinClOccAft, mTBinClOccBef, mTBinClOccWgh; ///< TPC occupancy histo: i-th entry is the integrated occupancy for ~1 orbit starting/preceding from the TB = i*mNTPCOccBinLength std::unique_ptr mOccWghFun; float mITSROFrameLengthMUS = 0.f; float mTPCTBinMUS = 0.f; // TPC bin in microseconds @@ -107,6 +107,7 @@ class TrackingStudySpec : public Task float mMinX = 46.; float mMaxEta = 0.8; float mMinPt = 0.1; + int mNOccBinsDrift = 10; int mMinTPCClusters = 60; int mNTPCOccBinLength = 0; ///< TPC occ. histo bin length in TBs int mNHBPerTF = 0; @@ -142,6 +143,10 @@ void TrackingStudySpec::init(InitContext& ic) mDCAYFormula = ic.options().get("dcay-vs-pt"); mDCAZFormula = ic.options().get("dcaz-vs-pt"); mDoPairsCorr = ic.options().get("pair-correlations"); + mNOccBinsDrift = ic.options().get("noccbins"); + if (mNOccBinsDrift < 3) { + mNOccBinsDrift = 3; + } auto str = ic.options().get("occ-weight-fun"); if (!str.empty()) { mOccWghFun = std::make_unique("occFun", str.c_str(), -100., 100.); @@ -172,42 +177,23 @@ void TrackingStudySpec::run(ProcessingContext& pc) mTBinClOccAft.resize(nTPCOccBins); mTBinClOccBef.resize(nTPCOccBins); float sm = 0., tb = 0.5 * mNTPCOccBinLength; - /* // at the moment not used - if (mOccWghFun) { - mTBinClOccWgh.resize(nTPCBins); - float occBin2MUS = 8 * o2::constants::lhc::LHCBunchSpacingMUS; - int covWghTB = TMath::NInt(100./occBin2MUS); // coverage of weighted occ. in TBins - for (int i = 0; i < nTPCBins; i++) { - sm = 0.; - for (int j=-covWghTB;j=nTPCBins) { - continue; - } - sm += mOccWghFun->Eval(j*occBin2MUS)*mTPCRefitter->getParam()->GetUnscaledMult(j+i); - } - mTBinClOccWgh[i] = sm; - } - } else { - mTBinClOccWgh.resize(1); - } - */ - std::vector mltHistTB(nTPCOccBins); + mMltHistTB.resize(nTPCOccBins); for (int i = 0; i < nTPCOccBins; i++) { - mltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); + mMltHistTB[i] = mTPCRefitter->getParam()->GetUnscaledMult(tb); tb += mNTPCOccBinLength; } for (int i = nTPCOccBins; i--;) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i + sumBins < nTPCOccBins) { - sm -= mltHistTB[i + sumBins]; + sm -= mMltHistTB[i + sumBins]; } mTBinClOccAft[i] = sm; } sm = 0; for (int i = 0; i < nTPCOccBins; i++) { - sm += mltHistTB[i]; + sm += mMltHistTB[i]; if (i - sumBins > 0) { - sm -= mltHistTB[i - sumBins]; + sm -= mMltHistTB[i - sumBins]; } mTBinClOccBef[i] = sm; } @@ -271,13 +257,17 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) o2::dataformats::PrimaryVertexExt pveDummy; o2::dataformats::PrimaryVertexExt vtxDummy(mMeanVtx.getPos(), {}, {}, 0); std::vector pveVec(nv); + std::vector tpcOccAftV, tpcOccBefV; pveVec.back() = vtxDummy; const auto& alpParams = o2::itsmft::DPLAlpideParam::Instance(); float tBiasITS = alpParams.roFrameBiasInBC * o2::constants::lhc::LHCBunchSpacingMUS; const o2::ft0::InteractionTag& ft0Params = o2::ft0::InteractionTag::Instance(); std::vector trcExtVec; std::vector trcPairsVec; - auto vdrit = mTPCVDriftHelper.getVDriftObject().getVDrift(); + auto vdrift = mTPCVDriftHelper.getVDriftObject().getVDrift(); + float maxDriftTB = 250.f / vdrift / (o2::constants::lhc::LHCBunchSpacingMUS * 8); + int groupOcc = std::ceil(maxDriftTB / mNOccBinsDrift / mNTPCOccBinLength); + bool tpcTrackOK = recoData.isTrackSourceLoaded(GTrackID::TPC); auto fillTPCClInfo = [&recoData, this](const o2::tpc::TrackTPC& trc, o2::dataformats::TrackInfoExt& trExt, float timestampTB = -1e9) { @@ -391,6 +381,8 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } }; + tpcOccAftV.resize(mNOccBinsDrift); + tpcOccBefV.resize(mNOccBinsDrift); for (int iv = 0; iv < nv; iv++) { LOGP(debug, "processing PV {} of {}", iv, nv); @@ -455,7 +447,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) continue; } if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -500,7 +492,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } else { o2::track::TrackParCov tmpTPC(*tpcTr); if (iv < nv - 1 && is == GTrackID::TPC && tpcTr && !tpcTr->hasBothSidesClusters()) { // for unconstrained TPC tracks correct track Z - float corz = vdrit * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); + float corz = vdrift * (tpcTr->getTime0() * mTPCTBinMUS - pvvec[iv].getTimeStamp().getTimeStamp()); if (tpcTr->hasASideClustersOnly()) { corz = -corz; // A-side } @@ -554,10 +546,35 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) int tb = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv; tpcOccBef = tb < 0 ? mTBinClOccBef[0] : (tb >= mTBinClOccBef.size() ? mTBinClOccBef.back() : mTBinClOccBef[tb]); tpcOccAft = tb < 0 ? mTBinClOccAft[0] : (tb >= mTBinClOccAft.size() ? mTBinClOccAft.back() : mTBinClOccAft[tb]); + int tbc = pveVec[iv].getTimeStamp().getTimeStamp() * mTPCTBinMUSInv * mNTPCOccBinLengthInv - groupOcc / 2.; + for (int iob = 0; iob < mNOccBinsDrift; iob++) { + float sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig + groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccAftV[iob] = sm; + // + sm = 0; + for (int ig = 0; ig < groupOcc; ig++) { + int ocb = tbc + ig - groupOcc * iob; + if (ocb < 0 || ocb >= (int)mMltHistTB.size()) { + sm = -1; + break; + } + sm += mMltHistTB[ocb]; + } + tpcOccBefV[iob] = sm; + } } (*mDBGOut) << "trpv" << "orbit=" << recoData.startIR.orbit << "tfID=" << TFCount << "tpcOccBef=" << tpcOccBef << "tpcOccAft=" << tpcOccAft + << "tpcOccBefV=" << tpcOccBefV << "tpcOccAftV=" << tpcOccAftV << "pve=" << pveVec[iv] << "trc=" << trcExtVec << "\n"; if (mDoPairsCorr) { @@ -752,6 +769,7 @@ DataProcessorSpec getTrackingStudySpec(GTrackID::mask_t srcTracks, GTrackID::mas {"with-its-only", VariantType::Bool, false, {"Store tracks with ITS only"}}, {"pair-correlations", VariantType::Bool, false, {"Do pairs correlation"}}, {"occ-weight-fun", VariantType::String, "(x>=-40&&x<-5) ? (1./1225*pow(x+40,2)) : ((x>-5&&x<15) ? 1. : ((x>=15&&x<40) ? (-0.4/25*x+1.24 ) : ( (x>40&&x<100) ? -0.4/60*x+0.6+0.8/3 : 0)))", {"Occupancy weighting f-n vs time in musec"}}, + {"noccbins", VariantType::Int, 10, {"Number of occupancy bins per full drift time"}}, {"min-x-prop", VariantType::Float, 100.f, {"track should be propagated to this X at least"}}, }; o2::tpc::VDriftHelper::requestCCDBInputs(dataRequest->inputs); From 1bcf367115a918253404a92a9537311646c895e1 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Sun, 11 May 2025 18:47:32 +0200 Subject: [PATCH 0488/1914] DPL: fix rate limiting handling (#14255) On success, FairMQ returns a positive number of bytes, not 0. --- Framework/Core/src/CommonDataProcessors.cxx | 26 ++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/Framework/Core/src/CommonDataProcessors.cxx b/Framework/Core/src/CommonDataProcessors.cxx index 737e1b7e635c8..c2431b3ab068d 100644 --- a/Framework/Core/src/CommonDataProcessors.cxx +++ b/Framework/Core/src/CommonDataProcessors.cxx @@ -30,9 +30,11 @@ #include "Framework/RuntimeError.h" #include "Framework/RateLimiter.h" #include "Framework/PluginManager.h" +#include "Framework/Signpost.h" #include #include +#include #include #include #include @@ -40,6 +42,9 @@ using namespace o2::framework::data_matcher; +// Special log to track callbacks we know about +O2_DECLARE_DYNAMIC_LOG(callbacks); + namespace o2::framework { @@ -145,6 +150,10 @@ DataProcessorSpec CommonDataProcessors::getGlobalFairMQSink(std::vectordata; auto& timesliceIndex = services->get(); @@ -152,20 +161,35 @@ void retryMetricCallback(uv_async_t* async) auto channel = device->GetChannels().find("metric-feedback"); auto oldestPossingTimeslice = timesliceIndex.getOldestPossibleOutput().timeslice.value; if (channel == device->GetChannels().end()) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Could not find metric-feedback channel."); return; } fair::mq::MessagePtr payload(device->NewMessage()); payload->Rebuild(&oldestPossingTimeslice, sizeof(int64_t), nullptr, nullptr); auto consumed = oldestPossingTimeslice; + size_t start = uv_hrtime(); int64_t result = channel->second[0].Send(payload, 100); + size_t stop = uv_hrtime(); // If the sending worked, we do not retry. - if (result != 0) { + if (result <= 0) { + // Forcefully slow down in case FairMQ returns earlier than expected... + int64_t ellapsed = (stop - start) / 1000000; + if (ellapsed < 100) { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu earlier than expected. Sleeping %llu ms more before, retrying.", + result, ellapsed); + uv_sleep(100 - ellapsed); + } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", + "FairMQ returned %llu, unable to send last consumed timeslice to source for %llu ms, retrying.", result, ellapsed); + } // If the sending did not work, we keep trying until it actually works. // This will schedule other tasks in the queue, so the processing of the // data will still happen. uv_async_send(async); } else { + O2_SIGNPOST_EVENT_EMIT(callbacks, cid, "rate-limiting", "Send %llu bytes, Last timeslice now set to %zu.", result, consumed); lastTimeslice = consumed; } } From f926be7e0b3e05ddce8e040f264b3eadf25a5a84 Mon Sep 17 00:00:00 2001 From: shahoian Date: Sun, 11 May 2025 15:37:02 +0200 Subject: [PATCH 0489/1914] Fix typo in the RecoContainer::getTrackTimeTPCTRD Thanks for Felix for spotting --- DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index 39cc05d8a69e7..d4b4e2b89cbb0 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1594,7 +1594,7 @@ void RecoContainer::getTrackTimeITSTPCTRD(GTrackID gid, float& t, float& tErr) c //________________________________________________________ void RecoContainer::getTrackTimeTPCTRD(GTrackID gid, float& t, float& tErr) const { - const auto trigTPCTRD = getITSTPCTRDTriggers(); + const auto trigTPCTRD = getTPCTRDTriggers(); // very slow: find the trigger this track belongs to for (const auto& trig : trigTPCTRD) { if (trig.getTrackRefs().getEntriesBound() > gid.getIndex()) { From 04ba3bd18306dd47a245d32f6556eec86d546a4b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 10:46:51 +0200 Subject: [PATCH 0490/1914] GPU: Fix performance regression: DETERMINISTIC CODE was used unintentionally in 2 places --- GPU/Common/GPUCommonAlgorithm.h | 37 +++++++++---------- .../Base/cuda/GPUReconstructionCUDArtc.cu | 5 ++- .../GPUTPCCompressionKernels.cxx | 24 ++++++------ 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/GPU/Common/GPUCommonAlgorithm.h b/GPU/Common/GPUCommonAlgorithm.h index d0643391246a8..8cd53ec5e0609 100644 --- a/GPU/Common/GPUCommonAlgorithm.h +++ b/GPU/Common/GPUCommonAlgorithm.h @@ -282,30 +282,27 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp) { #ifndef GPUCA_GPUCODE GPUCommonAlgorithm::sort(begin, end, comp); +#elif defined(GPUCA_DETERMINISTIC_MODE) // Not using GPUCA_DETERMINISTIC_CODE, which is enforced in TPC compression + if (get_local_id(0) == 0) { + GPUCommonAlgorithm::sort(begin, end, comp); + } + GPUbarrier(); #else - GPUCA_DETERMINISTIC_CODE( // clang-format off - GPUbarrier(); - if (get_local_id(0) == 0) { - GPUCommonAlgorithm::sort(begin, end, comp); - } - GPUbarrier(); - , // !GPUCA_DETERMINISTIC_CODE - int32_t n = end - begin; - for (int32_t i = 0; i < n; i++) { - for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { - int32_t offset = i % 2; - int32_t curPos = 2 * tIdx + offset; - int32_t nextPos = curPos + 1; - - if (nextPos < n) { - if (!comp(begin[curPos], begin[nextPos])) { - IterSwap(&begin[curPos], &begin[nextPos]); - } + int32_t n = end - begin; + for (int32_t i = 0; i < n; i++) { + for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) { + int32_t offset = i % 2; + int32_t curPos = 2 * tIdx + offset; + int32_t nextPos = curPos + 1; + + if (nextPos < n) { + if (!comp(begin[curPos], begin[nextPos])) { + IterSwap(&begin[curPos], &begin[nextPos]); } } - GPUbarrier(); } - ) // clang-format on + GPUbarrier(); + } #endif } diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 50a568ab345cf..805397c9b430e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -14,9 +14,12 @@ #define GPUCA_GPUCODE_GENRTC #define GPUCA_GPUCODE_COMPILEKERNELS + +// Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) -// GPUReconstructionCUDAIncludesSystem.h prependended without preprocessor running + +// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index 3b88c8764d0fd..bba97e9eace9b 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -273,19 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->clusters[iSector][iRow])); +#else // GPUCA_DETERMINISTIC_MODE + if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - , // !GPUCA_DETERMINISTIC_CODE - if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { - CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); - } - ) // clang-format on + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) { + CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare(clusters->clusters[iSector][iRow])); + } +#endif // GPUCA_DETERMINISTIC_MODE GPUbarrier(); } From ea6e536b48052186c6bd263cd9df1e83e3f16ce7 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Mon, 12 May 2025 20:49:42 +0200 Subject: [PATCH 0491/1914] DPL: do not create unneeded statics (#14261) --- .../Core/include/Framework/TableBuilder.h | 28 ++++++++----------- Framework/Core/src/TableBuilder.cxx | 4 +-- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 32fe78b852eff..936a8a04d5a5a 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -855,7 +855,7 @@ auto makeEmptyTable(const char* name, framework::pack p) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, std::shared_ptr& projector); + expressions::Projector* projectors, const char* name, std::shared_ptr& projector); /// Expression-based column generator to materialize columns template @@ -867,10 +867,9 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -881,10 +880,9 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, o if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(placeholders_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template @@ -896,15 +894,15 @@ auto spawner(std::vector>&& tables, const char* na if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); + auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -915,15 +913,14 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto fields = o2::soa::createFieldsFromColumns(expression_pack_t{}); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); auto projectors = [](framework::pack) -> std::array { return {{std::move(C::Projector())...}}; } (expression_pack_t{}); - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); } template @@ -933,10 +930,9 @@ auto spawner(framework::pack columns, std::vectornum_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto fields = o2::soa::createFieldsFromColumns(columns); - static auto new_schema = std::make_shared(fields); + static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), fields, name, projector); + return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); } template diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index d9827559c2148..eb19f8d3fe642 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -85,11 +85,11 @@ void TableBuilder::setLabel(const char* label) } std::shared_ptr spawnerHelper(std::shared_ptr const& fullTable, std::shared_ptr newSchema, size_t nColumns, - expressions::Projector* projectors, std::vector> const& fields, const char* name, + expressions::Projector* projectors, const char* name, std::shared_ptr& projector) { if (projector == nullptr) { - projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), fields); + projector = framework::expressions::createProjectorHelper(nColumns, projectors, fullTable->schema(), newSchema->fields()); } arrow::TableBatchReader reader(*fullTable); From 240812f69d5d2cb39cfc5b30ce9ef63a3254f394 Mon Sep 17 00:00:00 2001 From: shahoian Date: Mon, 12 May 2025 15:04:30 +0200 Subject: [PATCH 0492/1914] add TPC chi2 to trackstudy output --- .../study/include/GlobalTrackingStudy/TrackInfoExt.h | 3 ++- Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h index 935e57873bbd9..26eeea858d14b 100644 --- a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TrackInfoExt.h @@ -35,6 +35,7 @@ struct TrackInfoExt { float ttime = 0; float ttimeE = 0; float xmin = 0; + float chi2TPC = 0.f; float chi2ITSTPC = 0.f; float q2ptITS = 0.f; float q2ptTPC = 0.f; @@ -56,7 +57,7 @@ struct TrackInfoExt { float getTPCInY0() const { return innerTPCPos0[1]; } float getTPCInZ0() const { return innerTPCPos0[2]; } - ClassDefNV(TrackInfoExt, 5); + ClassDefNV(TrackInfoExt, 6); }; } // namespace dataformats diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx index a74349bdeba15..a2bf6abd35fef 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx +++ b/Detectors/GlobalTrackingWorkflow/study/src/TrackingStudy.cxx @@ -503,6 +503,7 @@ void TrackingStudySpec::process(o2::globaltracking::RecoContainer& recoData) } } fillTPCClInfo(*tpcTr, trcExt, tsuse); + trcExt.chi2TPC = tpcTr->getChi2(); } auto gidRefs = recoData.getSingleDetectorRefs(vid); if (gidRefs[GTrackID::ITS].isIndexSet()) { From 751bb12f5b93dd6612ad15f470eec418db2800bb Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 08:42:44 +0200 Subject: [PATCH 0493/1914] Move JSON parsing out of line (#14264) --- Framework/Core/CMakeLists.txt | 1 + .../include/Framework/VariantJSONHelpers.h | 430 +--------------- Framework/Core/src/VariantJSONHelpers.cxx | 464 ++++++++++++++++++ 3 files changed, 468 insertions(+), 427 deletions(-) create mode 100644 Framework/Core/src/VariantJSONHelpers.cxx diff --git a/Framework/Core/CMakeLists.txt b/Framework/Core/CMakeLists.txt index 2691d9d33a0c6..17320348d9272 100644 --- a/Framework/Core/CMakeLists.txt +++ b/Framework/Core/CMakeLists.txt @@ -140,6 +140,7 @@ o2_add_library(Framework src/Task.cxx src/Array2D.cxx src/Variant.cxx + src/VariantJSONHelpers.cxx src/VariantPropertyTreeHelpers.cxx src/WorkflowCustomizationHelpers.cxx src/WorkflowHelpers.cxx diff --git a/Framework/Core/include/Framework/VariantJSONHelpers.h b/Framework/Core/include/Framework/VariantJSONHelpers.h index eab78d547ca13..811e6f13d4985 100644 --- a/Framework/Core/include/Framework/VariantJSONHelpers.h +++ b/Framework/Core/include/Framework/VariantJSONHelpers.h @@ -19,439 +19,15 @@ #include #include -#include -#include -#include +#include namespace o2::framework { -namespace -{ -template -struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { - using Ch = rapidjson::UTF8<>::Ch; - using SizeType = rapidjson::SizeType; - - enum struct State { - IN_START, - IN_STOP, - IN_DATA, - IN_KEY, - IN_ARRAY, - IN_ROW, - IN_ERROR - }; - - VariantReader() - : states{}, - rows{0}, - cols{0} - { - debug << "Start" << std::endl; - states.push(State::IN_START); - } - - bool Null() - { - debug << "Null value encountered" << std::endl; - return true; - } - - bool Int(int i) - { - debug << "Int(" << i << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - accumulatedData.push_back(i); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Uint(unsigned i) - { - debug << "Uint -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Int64(int64_t i) - { - debug << "Int64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Uint64(uint64_t i) - { - debug << "Uint64 -> Int" << std::endl; - return Int(static_cast(i)); - } - - bool Double(double d) - { - debug << "Double(" << d << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(std::is_same_v> || std::is_same_v>)) { - states.push(State::IN_ERROR); - return true; - } - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - if constexpr (std::is_same_v>) { - debug << "added to array as double" << std::endl; - accumulatedData.push_back(d); - return true; - } else if constexpr (std::is_same_v>) { - debug << "added to array as float" << std::endl; - accumulatedData.push_back(static_cast(d)); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool Bool(bool b) - { - debug << "Bool(" << b << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!std::is_same_v>) { - states.push(State::IN_ERROR); - return false; - } else { - if (states.top() == State::IN_ARRAY) { - debug << "added to array" << std::endl; - accumulatedData.push_back(b); - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool String(const Ch* str, SizeType, bool) - { - debug << "String(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { - states.push(State::IN_ERROR); - return true; - } else { - if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { - debug << "added to array" << std::endl; - if constexpr (isLabeledArray()) { - if (currentKey == labels_rows_str) { - labels_rows.push_back(str); - return true; - } - if (currentKey == labels_cols_str) { - labels_cols.push_back(str); - return true; - } - } - if (currentKey == "values") { - if constexpr (std::is_same_v>) { - accumulatedData.push_back(str); - } else { - states.push(State::IN_ERROR); - } - return true; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - } - - bool StartObject() - { - debug << "StartObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_START) { - states.push(State::IN_DATA); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool Key(const Ch* str, SizeType, bool) - { - debug << "Key(" << str << ")" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - currentKey = str; - return false; - } - if (states.top() == State::IN_DATA) { - // no previous keys - states.push(State::IN_KEY); - currentKey = str; - return true; - } - if (states.top() == State::IN_KEY) { - currentKey = str; - if constexpr (!isLabeledArray()) { - debug << "extra keys in a single-key variant" << std::endl; - states.push(State::IN_ERROR); - return true; - } - return true; - } - currentKey = str; - states.push(State::IN_ERROR); - return true; - } - - bool EndObject(SizeType) - { - debug << "EndObject()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - if constexpr (isArray()) { - debug << "creating 1d-array variant" << std::endl; - result = Variant(accumulatedData); - } else if constexpr (isArray2D()) { - debug << "creating 2d-array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - result = Variant(Array2D{accumulatedData, rows, cols}); - } else if constexpr (isLabeledArray()) { - debug << "creating labeled array variant" << std::endl; - assert(accumulatedData.size() == rows * cols); - if (labels_rows.empty() == false) { - assert(labels_rows.size() == rows); - } - if (labels_cols.empty() == false) { - assert(labels_cols.size() == cols); - } - result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); - } - states.push(State::IN_STOP); - return true; - } - states.push(State::IN_ERROR); - return true; - } - - bool StartArray() - { - debug << "StartArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_KEY) { - states.push(State::IN_ARRAY); - return true; - } else if (states.top() == State::IN_ARRAY) { - if constexpr (isArray2D() || isLabeledArray()) { - states.push(State::IN_ROW); - return true; - } - } - states.push(State::IN_ERROR); - return true; - } - - bool EndArray(SizeType elementCount) - { - debug << "EndArray()" << std::endl; - if (states.top() == State::IN_ERROR) { - debug << "In ERROR state" << std::endl; - return false; - } - if (states.top() == State::IN_ARRAY) { - // finish up array - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - rows = elementCount; - } - return true; - } else if (states.top() == State::IN_ROW) { - // finish up row - states.pop(); - if constexpr (isArray2D() || isLabeledArray()) { - cols = elementCount; - } - return true; - } - states.push(State::IN_ERROR); - return true; - } - - std::stack states; - std::ostringstream debug; - - uint32_t rows; - uint32_t cols; - std::string currentKey; - std::vector> accumulatedData; - std::vector labels_rows; - std::vector labels_cols; - Variant result; -}; - -template -void writeVariant(std::ostream& o, Variant const& v) -{ - if constexpr (isArray() || isArray2D() || isLabeledArray()) { - using type = variant_array_element_type_t; - rapidjson::OStreamWrapper osw(o); - rapidjson::Writer w(osw); - - auto writeArray = [&](auto* values, size_t size) { - using T = std::remove_pointer_t; - w.StartArray(); - for (auto i = 0u; i < size; ++i) { - if constexpr (std::is_same_v) { - w.Int(values[i]); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(values[i]); - } else if constexpr (std::is_same_v) { - w.Bool(values[i]); - } else if constexpr (std::is_same_v) { - w.String(values[i].c_str()); - } - } - w.EndArray(); - }; - - auto writeVector = [&](auto&& vector) { - return writeArray(vector.data(), vector.size()); - }; - - auto writeArray2D = [&](auto&& array2d) { - using T = typename std::decay_t::element_t; - w.StartArray(); - for (auto i = 0u; i < array2d.rows; ++i) { - w.StartArray(); - for (auto j = 0u; j < array2d.cols; ++j) { - if constexpr (std::is_same_v) { - w.Int(array2d(i, j)); - } else if constexpr (std::is_same_v || std::is_same_v) { - w.Double(array2d(i, j)); - } else if constexpr (std::is_same_v) { - w.String(array2d(i, j).c_str()); - } - } - w.EndArray(); - } - w.EndArray(); - }; - - auto writeLabeledArray = [&](auto&& array) { - w.Key(labels_rows_str); - writeVector(array.getLabelsRows()); - w.Key(labels_cols_str); - writeVector(array.getLabelsCols()); - w.Key("values"); - writeArray2D(array.getData()); - }; - - w.StartObject(); - if constexpr (isArray()) { - w.Key("values"); - writeArray(v.get(), v.size()); - } else if constexpr (isArray2D()) { - w.Key("values"); - writeArray2D(v.get>()); - } else if constexpr (isLabeledArray()) { - writeLabeledArray(v.get>()); - } else if constexpr (V == VariantType::Dict) { - // nothing to do for dicts - } - w.EndObject(); - } -} -} // namespace - struct VariantJSONHelpers { template - static Variant read(std::istream& s) - { - rapidjson::Reader reader; - rapidjson::IStreamWrapper isw(s); - VariantReader vreader; - bool ok = reader.Parse(isw, vreader); - - if (ok == false) { - std::stringstream error; - error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); - throw std::runtime_error(error.str()); - } - return vreader.result; - } + static Variant read(std::istream& s); - static void write(std::ostream& o, Variant const& v) - { - switch (v.type()) { - case VariantType::ArrayInt: - writeVariant(o, v); - break; - case VariantType::ArrayFloat: - writeVariant(o, v); - break; - case VariantType::ArrayDouble: - writeVariant(o, v); - break; - case VariantType::ArrayBool: - throw std::runtime_error("Bool vectors not implemented yet"); - // writeVariant(o, v); - break; - case VariantType::ArrayString: - writeVariant(o, v); - break; - case VariantType::Array2DInt: - writeVariant(o, v); - break; - case VariantType::Array2DFloat: - writeVariant(o, v); - break; - case VariantType::Array2DDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayInt: - writeVariant(o, v); - break; - case VariantType::LabeledArrayFloat: - writeVariant(o, v); - break; - case VariantType::LabeledArrayDouble: - writeVariant(o, v); - break; - case VariantType::LabeledArrayString: - writeVariant(o, v); - break; - case VariantType::Dict: - writeVariant(o, v); - default: - break; - } - } + static void write(std::ostream& o, Variant const& v); }; } // namespace o2::framework diff --git a/Framework/Core/src/VariantJSONHelpers.cxx b/Framework/Core/src/VariantJSONHelpers.cxx new file mode 100644 index 0000000000000..fbb5abb331867 --- /dev/null +++ b/Framework/Core/src/VariantJSONHelpers.cxx @@ -0,0 +1,464 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. +#include "Framework/VariantJSONHelpers.h" +#include "Framework/Variant.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace o2::framework +{ +namespace +{ +template +struct VariantReader : public rapidjson::BaseReaderHandler, VariantReader> { + using Ch = rapidjson::UTF8<>::Ch; + using SizeType = rapidjson::SizeType; + + enum struct State { + IN_START, + IN_STOP, + IN_DATA, + IN_KEY, + IN_ARRAY, + IN_ROW, + IN_ERROR + }; + + VariantReader() + : states{}, + rows{0}, + cols{0} + { + debug << "Start" << std::endl; + states.push(State::IN_START); + } + + bool Null() + { + debug << "Null value encountered" << std::endl; + return true; + } + + bool Int(int i) + { + debug << "Int(" << i << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + accumulatedData.push_back(i); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Uint(unsigned i) + { + debug << "Uint -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Int64(int64_t i) + { + debug << "Int64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Uint64(uint64_t i) + { + debug << "Uint64 -> Int" << std::endl; + return Int(static_cast(i)); + } + + bool Double(double d) + { + debug << "Double(" << d << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(std::is_same_v> || std::is_same_v>)) { + states.push(State::IN_ERROR); + return true; + } + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + if constexpr (std::is_same_v>) { + debug << "added to array as double" << std::endl; + accumulatedData.push_back(d); + return true; + } else if constexpr (std::is_same_v>) { + debug << "added to array as float" << std::endl; + accumulatedData.push_back(static_cast(d)); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool Bool(bool b) + { + debug << "Bool(" << b << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!std::is_same_v>) { + states.push(State::IN_ERROR); + return false; + } else { + if (states.top() == State::IN_ARRAY) { + debug << "added to array" << std::endl; + accumulatedData.push_back(b); + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool String(const Ch* str, SizeType, bool) + { + debug << "String(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if constexpr (!(V == VariantType::ArrayString || isLabeledArray())) { + states.push(State::IN_ERROR); + return true; + } else { + if (states.top() == State::IN_ARRAY || states.top() == State::IN_ROW) { + debug << "added to array" << std::endl; + if constexpr (isLabeledArray()) { + if (currentKey == labels_rows_str) { + labels_rows.push_back(str); + return true; + } + if (currentKey == labels_cols_str) { + labels_cols.push_back(str); + return true; + } + } + if (currentKey == "values") { + if constexpr (std::is_same_v>) { + accumulatedData.push_back(str); + } else { + states.push(State::IN_ERROR); + } + return true; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + } + + bool StartObject() + { + debug << "StartObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_START) { + states.push(State::IN_DATA); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool Key(const Ch* str, SizeType, bool) + { + debug << "Key(" << str << ")" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + currentKey = str; + return false; + } + if (states.top() == State::IN_DATA) { + // no previous keys + states.push(State::IN_KEY); + currentKey = str; + return true; + } + if (states.top() == State::IN_KEY) { + currentKey = str; + if constexpr (!isLabeledArray()) { + debug << "extra keys in a single-key variant" << std::endl; + states.push(State::IN_ERROR); + return true; + } + return true; + } + currentKey = str; + states.push(State::IN_ERROR); + return true; + } + + bool EndObject(SizeType) + { + debug << "EndObject()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + if constexpr (isArray()) { + debug << "creating 1d-array variant" << std::endl; + result = Variant(accumulatedData); + } else if constexpr (isArray2D()) { + debug << "creating 2d-array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + result = Variant(Array2D{accumulatedData, rows, cols}); + } else if constexpr (isLabeledArray()) { + debug << "creating labeled array variant" << std::endl; + assert(accumulatedData.size() == rows * cols); + if (labels_rows.empty() == false) { + assert(labels_rows.size() == rows); + } + if (labels_cols.empty() == false) { + assert(labels_cols.size() == cols); + } + result = Variant(LabeledArray{Array2D{accumulatedData, rows, cols}, labels_rows, labels_cols}); + } + states.push(State::IN_STOP); + return true; + } + states.push(State::IN_ERROR); + return true; + } + + bool StartArray() + { + debug << "StartArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_KEY) { + states.push(State::IN_ARRAY); + return true; + } else if (states.top() == State::IN_ARRAY) { + if constexpr (isArray2D() || isLabeledArray()) { + states.push(State::IN_ROW); + return true; + } + } + states.push(State::IN_ERROR); + return true; + } + + bool EndArray(SizeType elementCount) + { + debug << "EndArray()" << std::endl; + if (states.top() == State::IN_ERROR) { + debug << "In ERROR state" << std::endl; + return false; + } + if (states.top() == State::IN_ARRAY) { + // finish up array + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + rows = elementCount; + } + return true; + } else if (states.top() == State::IN_ROW) { + // finish up row + states.pop(); + if constexpr (isArray2D() || isLabeledArray()) { + cols = elementCount; + } + return true; + } + states.push(State::IN_ERROR); + return true; + } + + std::stack states; + std::ostringstream debug; + + uint32_t rows; + uint32_t cols; + std::string currentKey; + std::vector> accumulatedData; + std::vector labels_rows; + std::vector labels_cols; + Variant result; +}; +} // namespace + +template +Variant VariantJSONHelpers::read(std::istream& s) +{ + rapidjson::Reader reader; + rapidjson::IStreamWrapper isw(s); + VariantReader vreader; + bool ok = reader.Parse(isw, vreader); + + if (ok == false) { + std::stringstream error; + error << "Cannot parse serialized Variant, error: " << rapidjson::GetParseError_En(reader.GetParseErrorCode()) << " at offset: " << reader.GetErrorOffset(); + throw std::runtime_error(error.str()); + } + return vreader.result; +} + +template +void writeVariant(std::ostream& o, Variant const& v) +{ + if constexpr (isArray() || isArray2D() || isLabeledArray()) { + using type = variant_array_element_type_t; + rapidjson::OStreamWrapper osw(o); + rapidjson::Writer w(osw); + + auto writeArray = [&](auto* values, size_t size) { + using T = std::remove_pointer_t; + w.StartArray(); + for (auto i = 0u; i < size; ++i) { + if constexpr (std::is_same_v) { + w.Int(values[i]); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(values[i]); + } else if constexpr (std::is_same_v) { + w.Bool(values[i]); + } else if constexpr (std::is_same_v) { + w.String(values[i].c_str()); + } + } + w.EndArray(); + }; + + auto writeVector = [&](auto&& vector) { + return writeArray(vector.data(), vector.size()); + }; + + auto writeArray2D = [&](auto&& array2d) { + using T = typename std::decay_t::element_t; + w.StartArray(); + for (auto i = 0u; i < array2d.rows; ++i) { + w.StartArray(); + for (auto j = 0u; j < array2d.cols; ++j) { + if constexpr (std::is_same_v) { + w.Int(array2d(i, j)); + } else if constexpr (std::is_same_v || std::is_same_v) { + w.Double(array2d(i, j)); + } else if constexpr (std::is_same_v) { + w.String(array2d(i, j).c_str()); + } + } + w.EndArray(); + } + w.EndArray(); + }; + + auto writeLabeledArray = [&](auto&& array) { + w.Key(labels_rows_str); + writeVector(array.getLabelsRows()); + w.Key(labels_cols_str); + writeVector(array.getLabelsCols()); + w.Key("values"); + writeArray2D(array.getData()); + }; + + w.StartObject(); + if constexpr (isArray()) { + w.Key("values"); + writeArray(v.get(), v.size()); + } else if constexpr (isArray2D()) { + w.Key("values"); + writeArray2D(v.get>()); + } else if constexpr (isLabeledArray()) { + writeLabeledArray(v.get>()); + } else if constexpr (V == VariantType::Dict) { + // nothing to do for dicts + } + w.EndObject(); + } +} + +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); +template Variant VariantJSONHelpers::read(std::istream& s); + +void VariantJSONHelpers::write(std::ostream& o, Variant const& v) +{ + switch (v.type()) { + case VariantType::ArrayInt: + writeVariant(o, v); + break; + case VariantType::ArrayFloat: + writeVariant(o, v); + break; + case VariantType::ArrayDouble: + writeVariant(o, v); + break; + case VariantType::ArrayBool: + throw std::runtime_error("Bool vectors not implemented yet"); + // writeVariant(o, v); + break; + case VariantType::ArrayString: + writeVariant(o, v); + break; + case VariantType::Array2DInt: + writeVariant(o, v); + break; + case VariantType::Array2DFloat: + writeVariant(o, v); + break; + case VariantType::Array2DDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayInt: + writeVariant(o, v); + break; + case VariantType::LabeledArrayFloat: + writeVariant(o, v); + break; + case VariantType::LabeledArrayDouble: + writeVariant(o, v); + break; + case VariantType::LabeledArrayString: + writeVariant(o, v); + break; + case VariantType::Dict: + writeVariant(o, v); + default: + break; + } +} +} // namespace o2::framework From 8de719349f3dc57e670b7ebf6b8206dee1e426b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:12 +0200 Subject: [PATCH 0494/1914] GPU: Add GPUCA_RTC_CONSTEXPR macro for constexpr only in RTC --- GPU/Common/GPUCommonDef.h | 4 ++++ GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 1 + GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu | 1 + 3 files changed, 6 insertions(+) diff --git a/GPU/Common/GPUCommonDef.h b/GPU/Common/GPUCommonDef.h index d7e99f53d4ce8..d9a5bdf92b6ac 100644 --- a/GPU/Common/GPUCommonDef.h +++ b/GPU/Common/GPUCommonDef.h @@ -72,6 +72,10 @@ #define GPUCA_RTC_SPECIAL_CODE(...) #endif +#ifndef GPUCA_RTC_CONSTEXPR + #define GPUCA_RTC_CONSTEXPR +#endif + #ifndef GPUCA_DETERMINISTIC_CODE #ifdef GPUCA_DETERMINISTIC_MODE #define GPUCA_DETERMINISTIC_CODE(det, indet) det // In deterministic mode, take deterministic code path diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 5706f32e73e96..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,6 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + + std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu index 805397c9b430e..66c02d6ed251c 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu @@ -18,6 +18,7 @@ // Keep some preprocessor calls unprocessed #define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__) #define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__) +#define GPUCA_RTC_CONSTEXPR GPUCA_RTC_CONSTEXPR // GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running #include "GPUReconstructionCUDADef.h" From 1dc506884cf82e11378a33ad1621319e11a17402 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 11:43:40 +0200 Subject: [PATCH 0495/1914] GPU TPC: Compute alternative dEdx only if it has different settings than normal dEdx --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 17 +++-- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 70 +++++++++++-------- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 6e7de7ee48ca6..d2aba503be6a6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -301,7 +301,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9dc6ddc59c2b4..9ead17ea5c7c0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -106,6 +106,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); TrackTPC* outputTracks = merger.OutputTracksTPCO2(); uint32_t* clusRefs = merger.OutputClusRefsTPCO2(); + const auto& param = merger.Param(); GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); uint2* GPUrestrict() tmpData = merger.ClusRefTmp(); @@ -130,9 +131,15 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled) { - oTrack.setdEdx(tracksdEdx[i]); - oTrack.setdEdxAlt(tracksdEdxAlt[i]); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled) { + oTrack.setdEdx(tracksdEdx[i]); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + oTrack.setdEdxAlt(tracksdEdxAlt[i]); + } else { + oTrack.setdEdxAlt(tracksdEdx[i]); + } + } } auto snpOut = outerPar.P[2]; @@ -148,9 +155,9 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks outerPar.C[6], outerPar.C[7], outerPar.C[8], outerPar.C[9], outerPar.C[10], outerPar.C[11], outerPar.C[12], outerPar.C[13], outerPar.C[14]})); - if (merger.Param().par.dodEdx && merger.Param().dodEdxEnabled && merger.Param().rec.tpc.enablePID) { + if (param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.enablePID) { PIDResponse pidResponse{}; - auto pid = pidResponse.getMostProbablePID(oTrack, merger.Param().rec.tpc.PID_EKrangeMin, merger.Param().rec.tpc.PID_EKrangeMax, merger.Param().rec.tpc.PID_EPrangeMin, merger.Param().rec.tpc.PID_EPrangeMax, merger.Param().rec.tpc.PID_EDrangeMin, merger.Param().rec.tpc.PID_EDrangeMax, merger.Param().rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); + auto pid = pidResponse.getMostProbablePID(oTrack, param.rec.tpc.PID_EKrangeMin, param.rec.tpc.PID_EKrangeMax, param.rec.tpc.PID_EPrangeMin, param.rec.tpc.PID_EPrangeMax, param.rec.tpc.PID_EDrangeMin, param.rec.tpc.PID_EDrangeMax, param.rec.tpc.PID_ETrangeMin, merger.Param().rec.tpc.PID_ETrangeMax, merger.Param().rec.tpc.PID_useNsigma, merger.Param().rec.tpc.PID_sigma); auto pidRemap = merger.Param().rec.tpc.PID_remap[pid]; if (pidRemap >= 0) { pid = pidRemap; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index f5bfbe985fb8c..0d8547263207b 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -216,11 +216,15 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - bool dodEdx = param.par.dodEdx && param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; - dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); - if (dodEdx) { - dEdx.fillSubThreshold(lastRow - wayDirection); - dEdxAlt.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; + dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); + if (dodEdx) { + dEdx.fillSubThreshold(lastRow - wayDirection); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.fillSubThreshold(lastRow - wayDirection); + } + } } } @@ -367,31 +371,35 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if (param.par.dodEdx && param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters - bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; - if (acc || accAlt) { - float qtot = 0, qmax = 0, pad = 0, relTime = 0; - const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; - for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { - if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { - qtot += clustersXYZ[ihit].amp; - } else { - const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; - qtot += cl.qTot; - qmax = CAMath::Max(qmax, cl.qMax); - pad += cl.getPad(); - relTime += cl.getTime(); + if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters + bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; + if (acc || accAlt) { + float qtot = 0, qmax = 0, pad = 0, relTime = 0; + const int32_t clusterCount = (ihit - ihitMergeFirst) * wayDirection + 1; + for (int32_t iTmp = ihitMergeFirst; iTmp != ihit + wayDirection; iTmp += wayDirection) { + if (merger->GetConstantMem()->ioPtrs.clustersNative == nullptr) { + qtot += clustersXYZ[ihit].amp; + } else { + const ClusterNative& cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num]; + qtot += cl.qTot; + qmax = CAMath::Max(qmax, cl.qMax); + pad += cl.getPad(); + relTime += cl.getTime(); + } + } + qtot /= clusterCount; // TODO: Weighted Average + pad /= clusterCount; + relTime /= clusterCount; + relTime = relTime - CAMath::Round(relTime); + if (acc) { + dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if (accAlt) { + dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); + } } - } - qtot /= clusterCount; // TODO: Weighted Average - pad /= clusterCount; - relTime /= clusterCount; - relTime = relTime - CAMath::Round(relTime); - if (acc) { - dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); - } - if (accAlt) { - dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } } } @@ -428,7 +436,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + } } Alpha = prop.GetAlpha(); MoveToReference(prop, param, Alpha); From 760f73e62a3a7898c81eee53e1d67012c58c39be Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:19:41 +0200 Subject: [PATCH 0496/1914] GPU CMake: If deterministic mode is set to MaxOptO2, do not impose -O2 when BUILD_TYPE is DEBUG --- GPU/GPUTracking/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 2e26622d05291..52848692e7516 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -16,11 +16,12 @@ set(MODULE GPUTracking) if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}") +elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2}) set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O2") + else() + set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() -elseif(NOT CMAKE_BUILD_TYPE_UPPER STREQUAL "DEBUG") - set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O3 -ffast-math") endif() set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_DENORMALS_FLAGS}") From 89b35ba2d75113e60b2045ed01e169b28d860a07 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 12 May 2025 21:22:52 +0200 Subject: [PATCH 0497/1914] GPU: Workaround for Clang Frontend issue This is fixed with Clang >= 20 and C++23 (P2280R4) --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ++++ GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index acc77648d954b..67ad608c13417 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,7 +38,11 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + +#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + +#else + std::string("#define GPUCA_RTC_CONSTEXPR\n") + +#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..c89ef1769ad81 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,3 +270,8 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() + +set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" +TARGET_DIRECTORY O2::GPUTrackingHIP +PROPERTIES +COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 4b72f186ec6e2981cae50338b97855efde70dc8a Mon Sep 17 00:00:00 2001 From: tubagundem Date: Tue, 13 May 2025 11:20:40 +0200 Subject: [PATCH 0498/1914] TPC: Fix digitizer workflow to load GEM params from CCDB before creating Polya file --- .../DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index 75141425f7c49..a04f73a62fbf8 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -286,7 +286,7 @@ int getNumTPCLanes(std::vector const& sectors, ConfigContext const& configc // ------------------------------------------------------------------ -void initTPC() +void initTPC(long timestamp) { // We only want to do this for the DPL master // I am not aware of an easy way to query if "I am DPL master" so @@ -308,6 +308,12 @@ void initTPC() auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); + + // IMPORTANT: load ParameterGEM from CCDB + auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); + LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); @@ -592,7 +598,7 @@ WorkflowSpec defineDataProcessing(ConfigContext const& configcontext) if (isEnabled(o2::detectors::DetID::TPC)) { if (!helpasked && ismaster) { - initTPC(); + initTPC(hbfu.startTime); } tpcsectors = o2::RangeTokenizer::tokenize(configcontext.options().get("tpc-sectors")); From 947a1a8cc06cd2e694cb8e5beb561e9392629af5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:12:57 +0200 Subject: [PATCH 0499/1914] GPU TPC: Remove some obsolete code and track members (leftover from Run 2 by Sergey and totally forgotten), which were wasting performance --- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 9 -------- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 21 +------------------ 2 files changed, 1 insertion(+), 29 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 578fe1eeb4ca7..6ef2ed2ede668 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -41,9 +41,6 @@ class GPUTPCGMMergedTrack { return mAlpha; } - GPUd() float LastX() const { return mLastX; } - GPUd() float LastY() const { return mLastY; } - GPUd() float LastZ() const { return mLastZ; } GPUd() bool OK() const { return mFlags & 0x01; } GPUd() bool Looper() const { return mFlags & 0x02; } GPUd() bool CSide() const { return mFlags & 0x04; } @@ -55,9 +52,6 @@ class GPUTPCGMMergedTrack GPUd() void SetFirstClusterRef(int32_t v) { mFirstClusterRef = v; } GPUd() void SetParam(const GPUTPCGMTrackParam& v) { mParam = v; } GPUd() void SetAlpha(float v) { mAlpha = v; } - GPUd() void SetLastX(float v) { mLastX = v; } - GPUd() void SetLastY(float v) { mLastY = v; } - GPUd() void SetLastZ(float v) { mLastZ = v; } GPUd() void SetOK(bool v) { if (v) { @@ -110,9 +104,6 @@ class GPUTPCGMMergedTrack gputpcgmmergertypes::GPUTPCOuterParam mOuterParam; //* outer param float mAlpha; //* alpha angle - float mLastX; //* outer X - float mLastY; //* outer Y - float mLastZ; //* outer Z uint32_t mFirstClusterRef; //* index of the first track cluster in corresponding cluster arrays // TODO: Change to 8 bit uint32_t mNClusters; //* number of track clusters diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 0d8547263207b..366f75cb05e56 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -1143,26 +1143,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr track.Param() = t; track.Alpha() = Alpha; - if (track.OK()) { - int32_t ind = track.FirstClusterRef(); - const GPUParam& GPUrestrict() param = merger->Param(); - float alphaa = param.Alpha(merger->Clusters()[ind].sector); - float xx, yy, zz; - if (merger->Param().par.earlyTpcTransform) { - xx = merger->ClustersXYZ()[ind].x; - yy = merger->ClustersXYZ()[ind].y; - zz = merger->ClustersXYZ()[ind].z - track.Param().GetTZOffset(); - } else { - const ClusterNative& GPUrestrict() cl = merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[merger->Clusters()[ind].num]; - merger->GetConstantMem()->calibObjects.fastTransformHelper->Transform(merger->Clusters()[ind].sector, merger->Clusters()[ind].row, cl.getPad(), cl.getTime(), xx, yy, zz, track.Param().GetTZOffset()); - } - float sinA, cosA; - CAMath::SinCos(alphaa - track.Alpha(), sinA, cosA); - track.SetLastX(xx * cosA - yy * sinA); - track.SetLastY(xx * sinA + yy * cosA); - track.SetLastZ(zz); - // merger->DebugRefitMergedTrack(track); - } + // if (track.OK()) merger->DebugRefitMergedTrack(track); } GPUd() void GPUTPCGMTrackParam::Rotate(float alpha) From f3f10a25f8abaf9dcf62e764a963f68dfefdc6b4 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:17:56 +0200 Subject: [PATCH 0500/1914] GPU TPC: Rename some variables with misleading name --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +-- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUTPCCompressionKernels.cxx | 10 +++--- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +-- .../Global/GPUChainTrackingMerger.cxx | 12 +++---- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 32 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 4 +-- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 ++++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 6 ++-- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 4 +-- 12 files changed, 48 insertions(+), 48 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 8796f063abdc5..d3dd561dcea2f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -109,13 +109,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NOutputTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); } template <> diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index 82834a694d0ba..ec1636dfe7f59 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -125,7 +125,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include - mMaxTracks = mRec->GetConstantMem().tpcMerger.NOutputTracks(); + mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); } diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx index bba97e9eace9b..73b195e8f4fe4 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx @@ -201,7 +201,7 @@ GPUdii() void GPUTPCCompressionKernels::ThreadclusterOffset[iSector][iRow]; - const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; + const uint32_t idOffsetOut = clusters->clusterOffset[iSector][iRow] * compressor.mMaxClusterFactorBase1024 / 1024; // 32 bit enough for number of clusters per row * 1024 const uint32_t idOffsetOutMax = ((const uint32_t*)clusters->clusterOffset[iSector])[iRow + 1] * compressor.mMaxClusterFactorBase1024 / 1024; // Array out of bounds access is ok, since it goes to the correct nClustersTotal if (iThread == nThreads - 1) { smem.nCount = 0; @@ -214,7 +214,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread(clusters->nClusters[iSector][iRow]); for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) { const int32_t idx = idOffset + i; - int32_t cidx = 0; + int32_t storeCluster = 0; do { if (i >= clusters->nClusters[iSector][iRow]) { break; @@ -239,13 +239,13 @@ GPUdii() void GPUTPCCompressionKernels::ThreadtpcTrackers[i].NTrackHits(), processors()->tpcTrackers[i].NMaxTrackHits()); } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); - addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NOutputTracks(), processors()->tpcMerger.NMaxTracks()); + addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { @@ -181,7 +181,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTracks NCl %d NTrk %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTracks()); GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } - GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTracks()); + GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 6e86be03e7950..bd1fa7796dadf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -220,7 +220,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mOutputQueue.clear(); } - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0) : GetGridAuto(0), mergerSortTracks ? 1 : 0); if (param().rec.tpc.retryRefit == 1) { runKernel(GetGridAuto(0), -1); } @@ -233,7 +233,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); if (param().rec.tpc.mergeLoopersAfterburner) { - runKernel(doGPU ? GetGrid(Merger.NOutputTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); + runKernel(doGPU ? GetGrid(Merger.NMergedTracks(), 0, deviceType) : GetGridAuto(0, deviceType)); if (doGPU) { TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0); SynchronizeStream(0); // TODO: could probably synchronize on an event after runKernel @@ -255,10 +255,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NOutputTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } mIOPtrs.mergedTracks = Merger.OutputTracks(); - mIOPtrs.nMergedTracks = Merger.NOutputTracks(); + mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (doGPU) { processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); - processorsShadow()->ioPtrs.nMergedTracks = Merger.NOutputTracks(); + processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index d2aba503be6a6..e96bbeee774bf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -415,7 +415,7 @@ int32_t GPUTPCGMMerger::CheckSectors() GPUd() void GPUTPCGMMerger::ClearTrackLinks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, bool output) { - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = iBlock * nThreads + iThread; i < n; i += nThreads * nBlocks) { mTrackLinks[i] = -1; } @@ -1271,7 +1271,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; @@ -1392,7 +1392,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nOutputTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1533,7 +1533,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = 0; for (int32_t ipart = 0; ipart < nParts; ipart++) { const GPUTPCGMSectorTrack* t = trackParts[ipart]; - CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nOutputTracks, ipart, t->QPt(), t->DzDs())); + CADEBUG(printf("Collect Track %d Part %d QPt %f DzDs %f\n", mMemory->nMergedTracks, ipart, t->QPt(), t->DzDs())); int32_t nTrackHits = t->NClusters(); trackCluster* c2 = trackClusters + nHits + nTrackHits - 1; for (int32_t i = 0; i < nTrackHits; i++, c2--) { @@ -1678,10 +1678,10 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread cl[i].leg = trackClusters[i].leg; } - uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nOutputTracks, 1u); + uint32_t iOutputTrack = CAMath::AtomicAdd(&mMemory->nMergedTracks, 1u); if (iOutputTrack >= mNMaxTracks) { raiseError(GPUErrors::ERROR_MERGER_TRACK_OVERFLOW, iOutputTrack, mNMaxTracks); - CAMath::AtomicExch(&mMemory->nOutputTracks, mNMaxTracks); + CAMath::AtomicExch(&mMemory->nMergedTracks, mNMaxTracks); continue; } @@ -1718,9 +1718,9 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread p1.QPt() = 100.f / Param().rec.bz0Pt10MeV; } - // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nOutputTracks, p1.QPt(), nParts, nHits); + // if (nParts > 1) printf("Merged %d: QPt %f %d parts %d hits\n", mMemory->nMergedTracks, p1.QPt(), nParts, nHits); - /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nOutputTracks)) + /*if (GPUQA::QAAvailable() && mRec->GetQA() && mRec->GetQA()->SuppressTrack(mMemory->nMergedTracks)) { mergedTrack.SetOK(0); mergedTrack.SetNClusters(0); @@ -1742,14 +1742,14 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUd() void GPUTPCGMMerger::SortTracksPrepare(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackOrderProcess[i] = i; } } GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackSort[i] = i; } } @@ -1784,7 +1784,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } @@ -1810,13 +1810,13 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int ) // clang-format on }; - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nOutputTracks, comp); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (trk.OK()) { @@ -1848,7 +1848,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { @@ -1858,7 +1858,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTracks; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; @@ -1893,7 +1893,7 @@ GPUd() void GPUTPCGMMerger::Finalize2(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold - for (uint32_t i = get_global_id(0); i < mMemory->nOutputTracks; i += get_global_size(0)) { + for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c6e0e02a2dc2..6c9c14b557798 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -69,7 +69,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nRetryRefit; GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; - GPUAtomic(uint32_t) nOutputTracks; + GPUAtomic(uint32_t) nMergedTracks; GPUAtomic(uint32_t) nOutputTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; @@ -103,7 +103,7 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersOutputState(void* mem); void* SetPointersMemory(void* mem); - GPUhdi() int32_t NOutputTracks() const { return mMemory->nOutputTracks; } + GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index ac55f423b1c42..02d0ac98b05b0 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -94,7 +94,7 @@ void GPUTPCGMMerger::DumpMergeRanges(std::ostream& out, int32_t withinSector, in void GPUTPCGMMerger::DumpTrackLinks(std::ostream& out, bool output, const char* type) const { out << "\nTPC Merger Links " << type << "\n"; - const int32_t n = output ? mMemory->nOutputTracks : SectorTrackInfoLocalTotal(); + const int32_t n = output ? mMemory->nMergedTracks : SectorTrackInfoLocalTotal(); for (int32_t i = 0; i < n; i++) { if (mTrackLinks[i] != -1) { out << " " << i << ": " << mTrackLinks[i] << "\n"; @@ -138,7 +138,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; @@ -150,7 +150,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const { DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; @@ -162,11 +162,11 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const { out << "\nTPC Merger Refit Prepare\n"; out << " Sort\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { out << " " << i << ": " << mTrackOrderAttach[i] << "\n"; } out << " Clusters\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { @@ -195,7 +195,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const std::streamsize ss = out.precision(); out << std::setprecision(2); out << "\nTPC Merger Refit\n"; - for (uint32_t i = 0; i < mMemory->nOutputTracks; i++) { + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { const auto& trk = mOutputTracks[i]; if (trk.NClusters() == 0) { continue; @@ -212,7 +212,7 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; - for (uint32_t j = 0; j < mMemory->nOutputTracks; j++) { + for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { const auto& trk = mOutputTracks[j]; if (trk.NClusters() == 0) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index d72d59a6250e7..68763b3549547 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -21,7 +21,7 @@ using namespace o2::gpu; template <> GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger, int32_t mode) { - const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NOutputTracks(); + const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 9ead17ea5c7c0..72e9f63e5da83 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -38,7 +38,7 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - const uint32_t nTracks = merger.NOutputTracks(); + const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx index 78eea63edecdd..ebc9d22560524 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx @@ -34,7 +34,7 @@ void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPC } seeds->Clear(); int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -112,7 +112,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; @@ -134,7 +134,7 @@ void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) return; } int32_t index = 0; - for (int32_t i = 0; i < merger->NOutputTracks(); i++) { + for (int32_t i = 0; i < merger->NMergedTracks(); i++) { const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; if (!track.OK()) { continue; diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index a21593b7ba9e9..e63bb82a9b09e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -100,7 +100,7 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread Date: Tue, 13 May 2025 10:25:11 +0200 Subject: [PATCH 0501/1914] GPU: Add additional optional debbug dumps for validation --- GPU/GPUTracking/Definitions/GPUSettingsList.h | 2 +- .../Global/GPUChainTrackingDebug.h | 45 ++++++++++--------- .../Global/GPUChainTrackingMerger.cxx | 7 +-- .../Global/GPUChainTrackingSectorTracker.cxx | 4 +- GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 1 + GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 15 ++++++- 7 files changed, 47 insertions(+), 28 deletions(-) diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9e0aa32155f0d..9400a429fca81 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -295,7 +295,7 @@ AddOption(trdNCandidates, int32_t, 3, "", 0, "Number of branching track candidat AddOption(trdTrackModelO2, bool, false, "", 0, "Use O2 track model instead of GPU track model for TRD tracking") AddOption(debugLevel, int32_t, -1, "debug", 'd', "Set debug level (-2 = silent, -1 = autoselect (-2 for O2, 0 for standalone))") AddOption(allocDebugLevel, int32_t, 0, "allocDebug", 0, "Some debug output for memory allocations (without messing with normal debug level)") -AddOption(debugMask, uint32_t, 262143, "", 0, "Mask for debug output dumps to file") +AddOption(debugMask, uint32_t, (1 << 18) - 1, "debugMask", 0, "Mask for debug output dumps to file") AddOption(debugLogSuffix, std::string, "", "debugSuffix", 0, "Suffix for debug log files with --debug 6") AddOption(serializeGPU, int8_t, 0, "", 0, "Synchronize after each kernel call (bit 1) and DMA transfer (bit 2) and identify failures") AddOption(recoTaskTiming, bool, 0, "", 0, "Perform summary timing after whole reconstruction tasks") diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h index 810f40a1d8654..6c995f65f3dd3 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebug.h +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebug.h @@ -23,28 +23,29 @@ namespace o2::gpu { // NOTE: Values below 262144 are activated by default with --debug 6 in GPUSettingsList.h::debugMask enum GPUChainTrackingDebugFlags : uint32_t { - TPCSectorTrackingData = 1, - TPCPreLinks = 2, - TPCLinks = 4, - TPCStartHits = 8, - TPCTracklets = 16, - TPCSectorTracks = 32, - TPCHitWeights = 256, - TPCCompressedClusters = 512, - TPCDecompressedClusters = 1024, - TPCMergingRanges = 2048, - TPCMergingSectorTracks = 4096, - TPCMergingMergedTracks = 8192, - TPCMergingCollectedTracks = 16384, - TPCMergingCE = 32768, - TPCMergingRefit = 65536, - TPCClustererClusters = 131072, - TPCClusterer = 262144, - TPCClustererDigits = 262144 << 1, - TPCClustererPeaks = 262144 << 2, - TPCClustererSuppressedPeaks = 262144 << 3, - TPCClustererChargeMap = 262144 << 4, - TPCClustererZeroedCharges = 262144 << 5 + TPCSectorTrackingData = 1 << 0, + TPCPreLinks = 1 << 1, + TPCLinks = 1 << 2, + TPCStartHits = 1 << 3, + TPCTracklets = 1 << 4, + TPCSectorTracks = 1 << 5, + TPCHitWeights = 1 << 6, + TPCMergingRanges = 1 << 7, + TPCMergingSectorTracks = 1 << 8, + TPCMergingMatching = 1 << 9, + TPCMergingCollectedTracks = 1 << 10, + TPCMergingCE = 1 << 11, + TPCMergingPrepareFit = 1 << 12, + TPCMergingRefit = 1 << 13, + TPCMergingLoopers = 1 << 14, + TPCCompressedClusters = 1 << 15, + TPCDecompressedClusters = 1 << 16, + TPCClustererClusters = 1 << 17, + TPCClustererDigits = 1 << 18, + TPCClustererPeaks = 1 << 19, + TPCClustererSuppressedPeaks = 1 << 20, + TPCClustererChargeMap = 1 << 21, + TPCClustererZeroedCharges = 1 << 22 }; template diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index bd1fa7796dadf..df80eabfb8761 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -143,7 +143,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); RunTPCTrackingMerger_MergeBorderTracks(1, 0, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedWithinSectors, *mDebugFile); runKernel(GetGridAuto(0, deviceType), false); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -158,7 +158,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridBlk(std::max(2u, numBlocks), 0, deviceType), 0, 1, 1); RunTPCTrackingMerger_MergeBorderTracks(0, -1, deviceType); RunTPCTrackingMerger_Resolve(0, 1, deviceType); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMergedTracks, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingMatching, doGPU, Merger, &GPUTPCGMMerger::DumpMergedBetweenSectors, *mDebugFile); runKernel({{1, -WarpSize(), 0, deviceType, RecoStep::TPCMerging}}, MergerShadowAll.TmpCounter(), 2 * NSECTORS * sizeof(*MergerShadowAll.TmpCounter())); @@ -202,7 +202,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) runKernel(GetGridAuto(0, deviceType)); runKernel(GetGridAuto(0, deviceType)); - DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingPrepareFit, doGPU, Merger, &GPUTPCGMMerger::DumpFitPrepare, *mDebugFile); if (doGPU) { CondWaitEvent(waitForTransfer, &mEvents->single); @@ -240,6 +240,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } runKernel(GetGridAuto(0, deviceType)); runKernel(doGPU ? GetGrid(Merger.Memory()->nLooperMatchCandidates, 0, deviceType) : GetGridAuto(0, deviceType)); + DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingLoopers, Merger, &GPUTPCGMMerger::DumpLoopers, *mDebugFile); } DoDebugAndDump(RecoStep::TPCMerging, GPUChainTrackingDebugFlags::TPCMergingRefit, doGPU, Merger, &GPUTPCGMMerger::DumpFinal, *mDebugFile); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx index ef38d53173c2b..67ef402961a20 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingSectorTracker.cxx @@ -176,7 +176,9 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() } if (GetProcessingSettings().debugLevel >= 6) { - *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + if ((GetProcessingSettings().debugMask & 63)) { + *mDebugFile << "\n\nReconstruction: Sector " << iSector << "/" << NSECTORS << std::endl; + } if (GetProcessingSettings().debugMask & GPUChainTrackingDebugFlags::TPCSectorTrackingData) { if (doGPU) { TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &trk, -1, true); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h index 6ef2ed2ede668..73b14ba1b2fdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergedTrack.h @@ -95,6 +95,7 @@ class GPUTPCGMMergedTrack GPUd() void SetFlags(uint8_t v) { mFlags = v; } GPUd() void SetLegs(uint8_t v) { mLegs = v; } GPUd() uint8_t Legs() const { return mLegs; } + GPUd() uint8_t Flags() const { return mFlags; } GPUd() const gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() const { return mOuterParam; } GPUd() gputpcgmmergertypes::GPUTPCOuterParam& OuterParam() { return mOuterParam; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 6c9c14b557798..ae85f20b17b48 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -201,6 +201,7 @@ class GPUTPCGMMerger : public GPUProcessor void DumpFitPrepare(std::ostream& out) const; void DumpRefit(std::ostream& out) const; void DumpFinal(std::ostream& out) const; + void DumpLoopers(std::ostream& out) const; template void MergedTrackStreamerInternal(const GPUTPCGMBorderTrack& b1, const GPUTPCGMBorderTrack& b2, const char* name, int32_t sector1, int32_t sector2, int32_t mergeMode, float weight, float frac) const; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 02d0ac98b05b0..3be32a2d87610 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -204,11 +204,24 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) - << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << "\n"; + << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] + << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } out << std::setprecision(ss); } +void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const +{ + out << "\n TPC Merger Looper Afterburner\n"; + for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { + if (i && i % 100 == 0) { + out << "\n"; + } + out << (int)mOutputTracks[i].MergedLooper() << " "; + } + out << "\n"; +} + void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; From d20b540fb0b5693ad6fa39b128d589160c91163f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:25:47 +0200 Subject: [PATCH 0502/1914] GPU TPC: Fix deterministic mode for TPC cluster compression / decompression / looper merging afterburner --- .../Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc | 4 ++-- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index d3dd561dcea2f..1d633eb5e748f 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -82,14 +82,14 @@ struct GPUTPCGMMergerSortTracksQPt_comp { struct GPUTPCGMMergerMergeLoopers_comp { GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) { - return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); } }; struct GPUTPCGMO2OutputSort_comp { GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) { - return (a.y > b.y); + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); } }; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index e96bbeee774bf..99ef548b2d78e 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1948,7 +1948,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, if (iThread || iBlock) { return; } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return CAMath::Abs(a.refz) < CAMath::Abs(b.refz); }; + auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 72e9f63e5da83..624c9ab487c8d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -88,7 +88,7 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, return; } GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return (a.y > b.y); }; + auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From 7732f5c426049d1eba9711b31626d4fb86d701b7 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 10:51:13 +0200 Subject: [PATCH 0503/1914] GPU: Deduplicate sort comparisons: Use structs, since both hipcub and rocthrust do not work with lambdas for some reason --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 82 ---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 143 ++++++++++-------- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 20 ++- 3 files changed, 98 insertions(+), 147 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 1d633eb5e748f..44cde3d4ac48a 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -14,88 +14,6 @@ #if defined(GPUCA_SPECIALIZE_THRUST_SORTS) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -namespace o2::gpu::internal -{ -namespace // anonymous -{ -struct MergeBorderTracks_compMax { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); - } -}; -struct MergeBorderTracks_compMin { - GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) - { - return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); - } -}; - -struct GPUTPCGMMergerSortTracks_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerSortTracksQPt_comp { - const GPUTPCGMMergedTrack* const mCmp; - GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} - GPUd() bool operator()(const int32_t aa, const int32_t bb) - { - const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - } -}; - -struct GPUTPCGMMergerMergeLoopers_comp { - GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) - { - return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); - } -}; - -struct GPUTPCGMO2OutputSort_comp { - GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) - { - return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); - } -}; - -} // anonymous namespace -} // namespace o2::gpu::internal - template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz, GPUTPCGMBorderRange* const& range, int32_t const& N, int32_t const& cmpMax) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 99ef548b2d78e..b12375a10023a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -59,17 +59,13 @@ #include "SimulationDataFormat/MCCompLabel.h" #endif -namespace o2::gpu::internal -{ -} +static constexpr int32_t kMaxParts = 400; +static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; + using namespace o2::gpu; -using namespace o2::gpu::internal; using namespace o2::tpc; using namespace gputpcgmmergertypes; -static constexpr int32_t kMaxParts = 400; -static constexpr int32_t kMaxClusters = GPUCA_MERGER_MAX_TRACK_CLUSTERS; - namespace o2::gpu::internal { struct MergeLooperParam { @@ -78,8 +74,79 @@ struct MergeLooperParam { float y; uint32_t id; }; + +struct MergeBorderTracks_compMax { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); + } +}; +struct MergeBorderTracks_compMin { + GPUd() bool operator()(const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) + { + return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); + } +}; + +struct GPUTPCGMMergerSortTracks_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracks_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + if (a.CCE() != b.CCE()) { + return a.CCE() > b.CCE(); + } + if (a.Legs() != b.Legs()) { + return a.Legs() > b.Legs(); + } + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (a.NClusters() != b.NClusters()) { + return a.NClusters() > b.NClusters(); + } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return aa > bb; + , // !GPUCA_DETERMINISTIC_CODE + return a.NClusters() > b.NClusters(); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerSortTracksQPt_comp { + const GPUTPCGMMergedTrack* const mCmp; + GPUhd() GPUTPCGMMergerSortTracksQPt_comp(GPUTPCGMMergedTrack* cmp) : mCmp(cmp) {} + GPUd() bool operator()(const int32_t aa, const int32_t bb) + { + const GPUTPCGMMergedTrack& GPUrestrict() a = mCmp[aa]; + const GPUTPCGMMergedTrack& GPUrestrict() b = mCmp[bb]; + GPUCA_DETERMINISTIC_CODE( // clang-format off + if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + } if (a.GetParam().GetY() != b.GetParam().GetY()) { + return a.GetParam().GetY() > b.GetParam().GetY(); + } + return a.GetParam().GetZ() > b.GetParam().GetZ(); + , // !GPUCA_DETERMINISTIC_CODE + return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); + ) // clang-format on + } +}; + +struct GPUTPCGMMergerMergeLoopers_comp { + GPUd() bool operator()(const MergeLooperParam& a, const MergeLooperParam& b) + { + return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); + } +}; + } // namespace o2::gpu::internal +using namespace o2::gpu::internal; + #ifndef GPUCA_GPUCODE #include "GPUQA.h" @@ -742,11 +809,11 @@ template <> GPUd() void GPUTPCGMMerger::MergeBorderTracks<3>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUTPCGMBorderRange* range, int32_t N, int32_t cmpMax) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread == 0) { + if (iThread == 0 && iBlock == 0) { if (cmpMax) { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMax != b.fMax) ? (a.fMax < b.fMax) : (a.fId < b.fId), a.fMax < b.fMax); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMax()); } else { - GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, [](const GPUTPCGMBorderRange& a, const GPUTPCGMBorderRange& b) { return GPUCA_DETERMINISTIC_CODE((a.fMin != b.fMin) ? (a.fMin < b.fMin) : (a.fId < b.fId), a.fMin < b.fMin); }); + GPUCommonAlgorithm::sortDeviceDynamic(range, range + N, MergeBorderTracks_compMin()); } } #endif @@ -1757,60 +1824,18 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit0(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - if (a.CCE() != b.CCE()) { - return a.CCE() > b.CCE(); - } - if (a.Legs() != b.Legs()) { - return a.Legs() > b.Legs(); - } - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (a.NClusters() != b.NClusters()) { - return a.NClusters() > b.NClusters(); - } if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return aa > bb; - , // !GPUCA_DETERMINISTIC_CODE - return a.NClusters() > b.NClusters(); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, comp); #endif } GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); } - // TODO: Fix this: Have to duplicate sort comparison: Thrust cannot use the Lambda but OpenCL cannot use the object - auto comp = [cmp = mOutputTracks](const int32_t aa, const int32_t bb) { - const GPUTPCGMMergedTrack& GPUrestrict() a = cmp[aa]; - const GPUTPCGMMergedTrack& GPUrestrict() b = cmp[bb]; - GPUCA_DETERMINISTIC_CODE( // clang-format off - if (CAMath::Abs(a.GetParam().GetQPt()) != CAMath::Abs(b.GetParam().GetQPt())) { - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - } if (a.GetParam().GetY() != b.GetParam().GetY()) { - return a.GetParam().GetY() > b.GetParam().GetY(); - } - return a.GetParam().GetZ() > b.GetParam().GetZ(); - , // !GPUCA_DETERMINISTIC_CODE - return CAMath::Abs(a.GetParam().GetQPt()) > CAMath::Abs(b.GetParam().GetQPt()); - ) // clang-format on - }; - - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, comp); #endif } @@ -1945,11 +1970,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, GPUd() void GPUTPCGMMerger::MergeLoopersSort(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, GPUTPCGMMergerMergeLoopers_comp()); } - auto comp = [](const MergeLooperParam& a, const MergeLooperParam& b) { return GPUCA_DETERMINISTIC_CODE(CAMath::Abs(a.refz) != CAMath::Abs(b.refz) ? CAMath::Abs(a.refz) < CAMath::Abs(b.refz) : a.id < b.id, CAMath::Abs(a.refz) < CAMath::Abs(b.refz)); }; - GPUCommonAlgorithm::sortDeviceDynamic(mLooperCandidates, mLooperCandidates + mMemory->nLooperMatchCandidates, comp); #endif } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 624c9ab487c8d..1e08058fb22dd 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -34,6 +34,18 @@ using namespace o2::tpc::constants; GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } +namespace o2::gpu::internal +{ + +struct GPUTPCGMO2OutputSort_comp { + GPUd() bool operator()(const GPUTPCGMMerger::tmpSort& a, const GPUTPCGMMerger::tmpSort& b) + { + return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); + } +}; + +} // namespace o2::gpu::internal + template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { @@ -84,12 +96,10 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS - if (iThread || iBlock) { - return; + if (iThread == 0 && iBlock == 0) { + GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); + GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, internal::GPUTPCGMO2OutputSort_comp()); } - GPUTPCGMMerger::tmpSort* GPUrestrict() trackSort = merger.TrackSortO2(); - auto comp = [](const auto& a, const auto& b) { return GPUCA_DETERMINISTIC_CODE(a.y != b.y ? a.y > b.y : a.x > b.x, a.y > b.y); }; - GPUCommonAlgorithm::sortDeviceDynamic(trackSort, trackSort + merger.Memory()->nO2Tracks, comp); #endif } From f75693ddbe0b19eb445da5a9d9972f73fdd86b96 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:35 +0200 Subject: [PATCH 0504/1914] GPU: Remove obsolete files used for tests in Run 2 --- .../Merger/GPUTPCGMTracksToTPCSeeds.cxx | 149 ------------------ .../Merger/GPUTPCGMTracksToTPCSeeds.h | 29 ---- 2 files changed, 178 deletions(-) delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx delete mode 100644 GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx deleted file mode 100644 index ebc9d22560524..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.cxx +++ /dev/null @@ -1,149 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.cxx -/// \author David Rohr - -#include "GPUTPCGMTracksToTPCSeeds.h" -#include "GPUTPCGlobalMergerComponent.h" -#include "GPUTPCGMMergerTypes.h" -#include "GPUTPCGMMerger.h" -#include "GPULogging.h" -#include "AliTPCtracker.h" -#include "AliTPCtrack.h" -#include "AliTPCseed.h" -#include "AliTPCtrackerSector.h" -#include "TObjArray.h" -#include "AliTPCclusterMI.h" - -using namespace o2::gpu; - -void GPUTPCGMTracksToTPCSeeds::CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - seeds->Clear(); - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - - AliTPCtrack tr; - tr.Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - AliTPCseed* seed = new (tpctracker->NextFreeSeed()) AliTPCseed(tr); - for (int32_t j = 0; j < GPUCA_ROW_COUNT; j++) { - seed->SetClusterPointer(j, nullptr); - seed->SetClusterIndex(j, -1); - } - int32_t ncls = 0; - int32_t lastrow = -1; - int32_t lastleg = -1; - for (int32_t j = track.NClusters() - 1; j >= 0; j--) { - const GPUTPCGMMergedTrackHit& cls = merger->Clusters()[track.FirstClusterRef() + j]; - if (cls.state & GPUTPCGMMergedTrackHit::flagReject) { - continue; - } - if (lastrow != -1 && (cls.row < lastrow || cls.leg != lastleg)) { - break; - } - if (cls.row == lastrow) { - continue; - } - - AliTPCtrackerRow& row = tpctracker->GetRow(cls.sector % 18, cls.row); - uint32_t clIndexOffline = 0; - AliTPCclusterMI* clOffline = row.FindNearest2(cls.y, cls.z, 0.01f, 0.01f, clIndexOffline); - if (!clOffline) { - continue; - } - clIndexOffline = row.GetIndex(clIndexOffline); - - clOffline->Use(10); - seed->SetClusterPointer(cls.row, clOffline); - seed->SetClusterIndex2(cls.row, clIndexOffline); - - lastrow = cls.row; - lastleg = cls.leg; - ncls++; - } - - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - seed->SetNumberOfClusters(ncls); - seed->SetNFoundable(ncls); - seed->SetChi2(track.GetParam().GetChi2()); - - float alpha = seed->GetAlpha(); - if (alpha >= 2.f * M_PI) { - alpha -= 2.f * M_PI; - } - if (alpha < 0) { - alpha += 2.f * M_PI; - } - seed->SetRelativeSector(track.GetAlpha() / (M_PI / 9.f)); - - seed->SetPoolID(tpctracker->GetLastSeedId()); - seed->SetIsSeeding(kTRUE); - seed->SetSeed1(GPUCA_ROW_COUNT - 1); - seed->SetSeed2(GPUCA_ROW_COUNT - 2); - seed->SetSeedType(0); - seed->SetFirstPoint(-1); - seed->SetLastPoint(-1); - seeds->AddLast(seed); // note, track is seed, don't free the seed - index++; - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsOuter(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - const gputpcgmmergertypes::GPUTPCOuterParam& param = track.OuterParam(); - seed->Set(param.X, param.alpha, param.P, param.C); - } -} - -void GPUTPCGMTracksToTPCSeeds::UpdateParamsInner(TObjArray* seeds) -{ - const GPUTPCGMMerger* merger = GPUTPCGlobalMergerComponent::GetCurrentMerger(); - if (merger == nullptr) { - return; - } - int32_t index = 0; - for (int32_t i = 0; i < merger->NMergedTracks(); i++) { - const GPUTPCGMMergedTrack& track = merger->OutputTracks()[i]; - if (!track.OK()) { - continue; - } - if (index > seeds->GetEntriesFast()) { - GPUError("Invalid number of offline seeds"); - return; - } - AliTPCseed* seed = (AliTPCseed*)seeds->UncheckedAt(index++); - seed->Set(track.GetParam().GetX(), track.GetAlpha(), track.GetParam().GetPar(), track.GetParam().GetCov()); - } -} diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h b/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h deleted file mode 100644 index 029cb108d4119..0000000000000 --- a/GPU/GPUTracking/Merger/GPUTPCGMTracksToTPCSeeds.h +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2019-2020 CERN and copyright holders of ALICE O2. -// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -// All rights not expressly granted are reserved. -// -// This software is distributed under the terms of the GNU General Public -// License v3 (GPL Version 3), copied verbatim in the file "COPYING". -// -// In applying this license CERN does not waive the privileges and immunities -// granted to it by virtue of its status as an Intergovernmental Organization -// or submit itself to any jurisdiction. - -/// \file GPUTPCGMTracksToTPCSeeds.h -/// \author David Rohr - -#ifndef GPUTPCGMTRACKSTOTPCSEEDS_H -#define GPUTPCGMTRACKSTOTPCSEEDS_H - -class TObjArray; -class AliTPCtracker; - -class GPUTPCGMTracksToTPCSeeds -{ - public: - static void CreateSeedsFromHLTTracks(TObjArray* seeds, AliTPCtracker* tpctracker); - static void UpdateParamsOuter(TObjArray* seeds); - static void UpdateParamsInner(TObjArray* seeds); -}; - -#endif From 747fb860184729b2d219e7b0a044d09e15c7a1b5 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 11:04:56 +0200 Subject: [PATCH 0505/1914] GPU TPC: Some more member variable renaming --- ...GPUReconstructionCUDAKernelsSpecialize.inc | 4 +- .../Global/GPUChainTrackingMerger.cxx | 8 ++-- .../Global/GPUChainTrackingRefit.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 38 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 18 ++++----- GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx | 14 +++---- GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 12 +++--- .../Merger/GPUTPCGlobalDebugSortKernels.cxx | 10 ++--- 10 files changed, 59 insertions(+), 59 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc index 44cde3d4ac48a..85567d70d70d6 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernelsSpecialize.inc @@ -27,13 +27,13 @@ inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackOrderProcess(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracks_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> inline void GPUCA_M_CAT(GPUReconstruction, GPUCA_GPUTYPE)::runKernelBackendTimed(const krnlSetupTime& _xyz) { - GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.OutputTracks())); + GPUCommonAlgorithm::sortOnDevice(this, _xyz.x.stream, mProcessorsShadow->tpcMerger.TrackSort(), processors()->tpcMerger.NMergedTracks(), GPUTPCGMMergerSortTracksQPt_comp(mProcessorsShadow->tpcMerger.MergedTracks())); } template <> diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index df80eabfb8761..2b3d719a27dea 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -256,10 +256,10 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) throw std::runtime_error("QA Scratch buffer exceeded"); } } - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracks(), MergerShadowAll.OutputTracks(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracks()), outputStream, 0, nullptr, waitEvent); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracks(), MergerShadowAll.MergedTracks(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracks()), outputStream, 0, nullptr, waitEvent); waitEvent = nullptr; if (param().dodEdxEnabled) { - GPUMemCpy(RecoStep::TPCMerging, Merger.OutputTracksdEdx(), MergerShadowAll.OutputTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.OutputTracksdEdx()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { @@ -326,7 +326,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mRec->ReturnVolatileDeviceMemory(); } - mIOPtrs.mergedTracks = Merger.OutputTracks(); + mIOPtrs.mergedTracks = Merger.MergedTracks(); mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); @@ -340,7 +340,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.outputTracksTPCO2MC = Merger.OutputTracksTPCO2MC(); if (doGPU) { - processorsShadow()->ioPtrs.mergedTracks = MergerShadow.OutputTracks(); + processorsShadow()->ioPtrs.mergedTracks = MergerShadow.MergedTracks(); processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx index 4662b5464f710..5ca20a39d0462 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingRefit.cxx @@ -33,7 +33,7 @@ int32_t GPUChainTracking::RunRefit() SetupGPUProcessor(&Refit, false); RefitShadow.SetPtrsFromGPUConstantMem(processorsShadow(), doGPU ? &processorsDevice()->param : nullptr); RefitShadow.SetPropagator(doGPU ? processorsShadow()->calibObjects.o2Propagator : GetO2Propagator()); - RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.OutputTracks(); + RefitShadow.mPTracks = (doGPU ? processorsShadow() : processors())->tpcMerger.MergedTracks(); WriteToConstantMemory(RecoStep::Refit, (char*)&processors()->trackingRefit - (char*)processors(), &RefitShadow, sizeof(RefitShadow), 0); // TransferMemoryResourcesToGPU(RecoStep::Refit, &Refit, 0); if (param().rec.trackingRefitGPUModel) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index b12375a10023a..f1a0816529c3a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -365,11 +365,11 @@ void* GPUTPCGMMerger::SetPointersRefitScratch(void* mem) void* GPUTPCGMMerger::SetPointersOutput(void* mem) { - computePointerWithAlignment(mem, mOutputTracks, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracks, mNMaxTracks); if (mRec->GetParam().dodEdxEnabled) { - computePointerWithAlignment(mem, mOutputTracksdEdx, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdx, mNMaxTracks); if (mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMask != mRec->GetParam().rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - computePointerWithAlignment(mem, mOutputTracksdEdxAlt, mNMaxTracks); + computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); @@ -1318,7 +1318,7 @@ GPUd() void GPUTPCGMMerger::MergeCEFill(const GPUTPCGMSectorTrack* track, const const float x0 = GPUTPCGeometry::Row2X(attempt == 0 ? 63 : cls.row); if (track->TransportToX(this, x0, Param().bzCLight, b, GPUCA_MAX_SIN_PHI_LOW)) { b.SetTrackID(itr); - b.SetNClusters(mOutputTracks[itr].NClusters()); + b.SetNClusters(mMergedTracks[itr].NClusters()); if (CAMath::Abs(b.Cov()[4]) >= 0.5f) { b.SetCov(4, 0.5f); // TODO: Is this needed and better than the cut in BorderTrack? } @@ -1339,11 +1339,11 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i { const ClusterNative* cls = Param().par.earlyTpcTransform ? nullptr : mConstantMem->ioPtrs.clustersNative->clustersLinear; for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - if (mOutputTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { + if (mMergedTracks[i].CSide() == 0 && mTrackLinks[i] >= 0) { if (mTrackLinks[mTrackLinks[i]] != (int32_t)i) { continue; } - GPUTPCGMMergedTrack* trk[2] = {&mOutputTracks[i], &mOutputTracks[mTrackLinks[i]]}; + GPUTPCGMMergedTrack* trk[2] = {&mMergedTracks[i], &mMergedTracks[mTrackLinks[i]]}; if (!trk[1]->OK() || trk[1]->CCE()) { continue; @@ -1459,7 +1459,7 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i } } - // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mOutputTracks[i].CCE() == false) {mOutputTracks[i].SetNClusters(0);mOutputTracks[i].SetOK(false);}} //Remove all non-CE tracks + // for (int32_t i = 0;i < mMemory->nMergedTracks;i++) {if (mMergedTracks[i].CCE() == false) {mMergedTracks[i].SetNClusters(0);mMergedTracks[i].SetOK(false);}} //Remove all non-CE tracks } namespace o2::gpu::internal @@ -1752,7 +1752,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread continue; } - GPUTPCGMMergedTrack& mergedTrack = mOutputTracks[iOutputTrack]; + GPUTPCGMMergedTrack& mergedTrack = mMergedTracks[iOutputTrack]; mergedTrack.SetFlags(0); mergedTrack.SetOK(1); @@ -1825,7 +1825,7 @@ GPUd() void GPUTPCGMMerger::SortTracks(int32_t nBlocks, int32_t nThreads, int32_ { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackOrderProcess, mTrackOrderProcess + mMemory->nMergedTracks, GPUTPCGMMergerSortTracks_comp(mMergedTracks)); } #endif } @@ -1834,7 +1834,7 @@ GPUd() void GPUTPCGMMerger::SortTracksQPt(int32_t nBlocks, int32_t nThreads, int { #ifndef GPUCA_SPECIALIZE_THRUST_SORTS if (iThread == 0 && iBlock == 0) { - GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mOutputTracks)); + GPUCommonAlgorithm::sortDeviceDynamic(mTrackSort, mTrackSort + mMemory->nMergedTracks, GPUTPCGMMergerSortTracksQPt_comp(mMergedTracks)); } #endif } @@ -1843,7 +1843,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nBlocks * nThreads) { mTrackOrderAttach[mTrackSort[i]] = i; - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (trk.OK()) { for (uint32_t j = 0; j < trk.NClusters(); j++) { mClusterAttachment[mClusters[trk.FirstClusterRef() + j].num] = attachAttached | attachGood; @@ -1884,7 +1884,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { - const GPUTPCGMMergedTrack& trk = mOutputTracks[i]; + const GPUTPCGMMergedTrack& trk = mMergedTracks[i]; if (!trk.OK() || trk.NClusters() == 0) { continue; } @@ -1919,7 +1919,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersInit(int32_t nBlocks, int32_t nThreads, { const float lowPtThresh = Param().rec.tpc.rejectQPtB5 * 1.1f; // Might need to merge tracks above the threshold with parts below the threshold for (uint32_t i = get_global_id(0); i < mMemory->nMergedTracks; i += get_global_size(0)) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); const float qptabs = CAMath::Abs(p.GetQPt()); if (trk.NClusters() && qptabs * Param().qptB5Scaler > 5.f && qptabs * Param().qptB5Scaler <= lowPtThresh) { @@ -1983,7 +1983,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, #if GPUCA_MERGE_LOOPER_MC && !defined(GPUCA_GPUCODE) std::vector paramLabels(mMemory->nLooperMatchCandidates); for (uint32_t i = 0; i < mMemory->nLooperMatchCandidates; i++) { - paramLabels[i] = GetTrackLabel(mOutputTracks[params[i].id]); + paramLabels[i] = GetTrackLabel(mMergedTracks[params[i].id]); } /*std::vector dropped(mMemory->nLooperMatchCandidates); std::vector droppedMC(mMemory->nLooperMatchCandidates); @@ -2005,8 +2005,8 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, // bs |= 1; continue; } - const auto& trk1 = mOutputTracks[params[i].id]; - const auto& trk2 = mOutputTracks[params[j].id]; + const auto& trk1 = mMergedTracks[params[i].id]; + const auto& trk2 = mMergedTracks[params[j].id]; const auto& param1 = trk1.GetParam(); const auto& param2 = trk2.GetParam(); if (CAMath::Abs(param1.GetDzDs()) > 0.03f && CAMath::Abs(param2.GetDzDs()) > 0.03f && param1.GetDzDs() * param2.GetDzDs() * param1.GetQPt() * param2.GetQPt() < 0) { @@ -2045,7 +2045,7 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, const int64_t label2 = paramLabels[j]; bool labelEQ = label1 != -1 && label1 == label2; if (1 || EQ || labelEQ) { - // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mOutputTracks[params[i].id].CSide(), (int32_t)mOutputTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); + // printf("Matching track %d/%d %u-%u (%ld/%ld): dist %f side %d %d, tgl %f %f, qpt %f %f, x %f %f, y %f %f\n", (int32_t)EQ, (int32_t)labelEQ, i, j, label1, label2, d, (int32_t)mMergedTracks[params[i].id].CSide(), (int32_t)mMergedTracks[params[j].id].CSide(), params[i].tgl, params[j].tgl, params[i].qpt, params[j].qpt, params[i].x, params[j].x, params[i].y, params[j].y); static auto& tup = GPUROOTDump::get("mergeloopers", "labeleq:sides:d2xy:tgl1:tgl2:qpt1:qpt2:dz:dzcorr:dtgl:dqpt:dznorm:bs"); tup.Fill((float)labelEQ, (trk1.CSide() ? 1 : 0) | (trk2.CSide() ? 2 : 0), d2xy, param1.GetDzDs(), param2.GetDzDs(), param1.GetQPt(), param2.GetQPt(), CAMath::Abs(params[j].refz) - CAMath::Abs(params[i].refz), dzcorr, dtgl, dqpt, dznorm, bs); static auto tup2 = GPUROOTDump::getNew("mergeloopers2", "labeleq:refz1:refz2:tgl1:tgl2:qpt1:qpt2:snp1:snp2:a1:a2:dzn:phasecor:phasedir:dzcorr"); @@ -2063,9 +2063,9 @@ GPUd() void GPUTPCGMMerger::MergeLoopersMain(int32_t nBlocks, int32_t nThreads, }*/ #endif if (EQ) { - mOutputTracks[params[j].id].SetMergedLooper(true); + mMergedTracks[params[j].id].SetMergedLooper(true); if (CAMath::Abs(param2.GetQPt() * Param().qptB5Scaler) >= Param().rec.tpc.rejectQPtB5) { - mOutputTracks[params[i].id].SetMergedLooper(true); + mMergedTracks[params[i].id].SetMergedLooper(true); } } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index ae85f20b17b48..4487b6d937dc2 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -104,12 +104,12 @@ class GPUTPCGMMerger : public GPUProcessor void* SetPointersMemory(void* mem); GPUhdi() int32_t NMergedTracks() const { return mMemory->nMergedTracks; } - GPUhdi() const GPUTPCGMMergedTrack* OutputTracks() const { return mOutputTracks; } - GPUhdi() GPUTPCGMMergedTrack* OutputTracks() { return mOutputTracks; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdx() const { return mOutputTracksdEdx; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdx() { return mOutputTracksdEdx; } - GPUhdi() const GPUdEdxInfo* OutputTracksdEdxAlt() const { return mOutputTracksdEdxAlt; } - GPUhdi() GPUdEdxInfo* OutputTracksdEdxAlt() { return mOutputTracksdEdxAlt; } + GPUhdi() const GPUTPCGMMergedTrack* MergedTracks() const { return mMergedTracks; } + GPUhdi() GPUTPCGMMergedTrack* MergedTracks() { return mMergedTracks; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdx() const { return mMergedTracksdEdx; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdx() { return mMergedTracksdEdx; } + GPUhdi() const GPUdEdxInfo* MergedTracksdEdxAlt() const { return mMergedTracksdEdxAlt; } + GPUhdi() GPUdEdxInfo* MergedTracksdEdxAlt() { return mMergedTracksdEdxAlt; } GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } @@ -262,9 +262,9 @@ class GPUTPCGMMerger : public GPUProcessor uint16_t mMemoryResOutputO2Scratch = (uint16_t)-1; int32_t mNClusters = 0; // Total number of incoming clusters (from sector tracks) - GPUTPCGMMergedTrack* mOutputTracks = nullptr; //* array of output merged tracks - GPUdEdxInfo* mOutputTracksdEdx = nullptr; //* dEdx information - GPUdEdxInfo* mOutputTracksdEdxAlt = nullptr; //* dEdx alternative information + GPUTPCGMMergedTrack* mMergedTracks = nullptr; //* array of output merged tracks + GPUdEdxInfo* mMergedTracksdEdx = nullptr; //* dEdx information + GPUdEdxInfo* mMergedTracksdEdxAlt = nullptr; //* dEdx alternative information GPUTPCGMSectorTrack* mSectorTrackInfos = nullptr; //* additional information for sector tracks int32_t* mSectorTrackInfoIndex = nullptr; GPUTPCGMMergedTrackHit* mClusters = nullptr; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx index 3be32a2d87610..9c924e74ec519 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx @@ -139,7 +139,7 @@ void GPUTPCGMMerger::DumpCollected(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Collected Tracks\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; const auto& p = trk.GetParam(); out << " Track " << i << ": Loop " << trk.Looper() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << "\n"; } @@ -151,7 +151,7 @@ void GPUTPCGMMerger::DumpMergeCE(std::ostream& out) const DumpTrackLinks(out, true, " for CE merging"); out << "\nTPC Merger Merge CE\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.CCE()) { out << " Track " << i << ": CCE\n"; } @@ -167,7 +167,7 @@ void GPUTPCGMMerger::DumpFitPrepare(std::ostream& out) const } out << " Clusters\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; out << " Track " << j << ": "; for (uint32_t i = trk.FirstClusterRef(); i < trk.FirstClusterRef() + trk.NClusters(); i++) { out << j << "/" << (i - trk.FirstClusterRef()) << ": " << mClusters[i].num << "/" << (int32_t)mClusters[i].state << ", "; @@ -196,14 +196,14 @@ void GPUTPCGMMerger::DumpRefit(std::ostream& out) const out << std::setprecision(2); out << "\nTPC Merger Refit\n"; for (uint32_t i = 0; i < mMemory->nMergedTracks; i++) { - const auto& trk = mOutputTracks[i]; + const auto& trk = mMergedTracks[i]; if (trk.NClusters() == 0) { continue; } const auto& p = trk.GetParam(); const auto& po = trk.OuterParam(); out << " Track " << i << ": OK " << trk.OK() << " Alpha " << trk.GetAlpha() << " X " << p.GetX() << " offset " << p.GetTZOffset() << " Y " << p.GetY() << " Z " << p.GetZ() << " SPhi " << p.GetSinPhi() << " Tgl " << p.GetDzDs() << " QPt " << p.GetQPt() << " NCl " << trk.NClusters() << " / " << trk.NClustersFitted() << " Cov " << p.GetErr2Y() << "/" << p.GetErr2Z() - << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mOutputTracksdEdx[i].dEdxMaxTPC : -1.f) + << " dEdx " << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxTotTPC : -1.f) << "/" << (trk.OK() && Param().dodEdxEnabled ? mMergedTracksdEdx[i].dEdxMaxTPC : -1.f) << " Outer " << po.P[0] << "/" << po.P[1] << "/" << po.P[2] << "/" << po.P[3] << "/" << po.P[4] << " NFitted " << trk.NClustersFitted() << " legs " << (int)trk.Legs() << " flags " << (int)trk.Flags() << "\n"; } @@ -217,7 +217,7 @@ void GPUTPCGMMerger::DumpLoopers(std::ostream& out) const if (i && i % 100 == 0) { out << "\n"; } - out << (int)mOutputTracks[i].MergedLooper() << " "; + out << (int)mMergedTracks[i].MergedLooper() << " "; } out << "\n"; } @@ -226,7 +226,7 @@ void GPUTPCGMMerger::DumpFinal(std::ostream& out) const { out << "\nTPC Merger Finalized\n"; for (uint32_t j = 0; j < mMemory->nMergedTracks; j++) { - const auto& trk = mOutputTracks[j]; + const auto& trk = mMergedTracks[j]; if (trk.NClusters() == 0) { continue; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx index 68763b3549547..1631777d80482 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMergerGPU.cxx @@ -24,7 +24,7 @@ GPUdii() void GPUTPCGMMergerTrackFit::Thread<0>(int32_t nBlocks, int32_t nThread const int32_t iEnd = mode == -1 ? merger.Memory()->nRetryRefit : merger.NMergedTracks(); GPUCA_TBB_KERNEL_LOOP(merger.GetRec(), int32_t, ii, iEnd, { const int32_t i = mode == -1 ? merger.RetryRefitIds()[ii] : mode ? merger.TrackOrderProcess()[ii] : ii; - GPUTPCGMTrackParam::RefitTrack(merger.OutputTracks()[i], i, &merger, mode == -1); + GPUTPCGMTrackParam::RefitTrack(merger.MergedTracks()[i], i, &merger, mode == -1); }); } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 1e08058fb22dd..eb22ca49e9242 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -49,10 +49,10 @@ struct GPUTPCGMO2OutputSort_comp { template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); const uint32_t nTracks = merger.NMergedTracks(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); - const GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); + const GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); constexpr uint8_t flagsReject = getFlagsReject(); const uint32_t flagsRequired = getFlagsRequired(merger.Param().rec); @@ -107,9 +107,9 @@ template <> GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() merger) { constexpr float MinDelta = 0.1f; - const GPUTPCGMMergedTrack* tracks = merger.OutputTracks(); - GPUdEdxInfo* tracksdEdx = merger.OutputTracksdEdx(); - GPUdEdxInfo* tracksdEdxAlt = merger.OutputTracksdEdxAlt(); + const GPUTPCGMMergedTrack* tracks = merger.MergedTracks(); + GPUdEdxInfo* tracksdEdx = merger.MergedTracksdEdx(); + GPUdEdxInfo* tracksdEdxAlt = merger.MergedTracksdEdxAlt(); const int32_t nTracks = merger.NOutputTracksTPCO2(); const GPUTPCGMMergedTrackHit* trackClusters = merger.Clusters(); constexpr uint8_t flagsReject = getFlagsReject(); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 366f75cb05e56..4b616fce83f5f 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -94,10 +94,10 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ storeOuter = 0; if (iWay == nWays - 1) { StoreOuter(outerParam, prop, 0); - if (merger->OutputTracks()[iTrk].Looper()) { + if (merger->MergedTracks()[iTrk].Looper()) { storeOuter = 1; } - } else if (iWay == nWays - 2 && merger->OutputTracks()[iTrk].Looper()) { + } else if (iWay == nWays - 2 && merger->MergedTracks()[iTrk].Looper()) { storeOuter = 2; } } @@ -435,9 +435,9 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ // TODO: we have looping tracks here with 0 accepted clusters in the primary leg. In that case we should refit the track using only the primary leg. if (param.par.dodEdx && param.dodEdxEnabled) { - dEdx.computedEdx(merger->OutputTracksdEdx()[iTrk], param); + dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { - dEdxAlt.computedEdx(merger->OutputTracksdEdxAlt()[iTrk], param); + dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } Alpha = prop.GetAlpha(); @@ -596,7 +596,7 @@ GPUd() float GPUTPCGMTrackParam::AttachClusters(const GPUTPCGMMerger* GPUrestric return -1e6f; } - const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->OutputTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); + const float zOffset = Merger->Param().par.earlyTpcTransform ? ((Merger->MergedTracks()[iTrack].CSide() ^ (sector >= 18)) ? -mTZOffset : mTZOffset) : Merger->GetConstantMem()->calibObjects.fastTransformHelper->getCorrMap()->convVertexTimeToZOffset(sector, mTZOffset, Merger->Param().continuousMaxTimeBin); const float y0 = row.Grid().YMin(); const float stepY = row.HstepY(); const float z0 = row.Grid().ZMin() - zOffset; // We can use our own ZOffset, since this is only used temporarily anyway @@ -1136,7 +1136,7 @@ GPUd() void GPUTPCGMTrackParam::RefitTrack(GPUTPCGMMergedTrack& GPUrestrict() tr t.QPt() = 1.e-4f; } - CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->OutputTracks()[iTrk].Looper()); }); + CADEBUG(if (t.GetX() > 250) { printf("ERROR, Track %d at impossible X %f, Pt %f, Looper %d\n", iTrk, t.GetX(), CAMath::Abs(1.f / t.QPt()), (int32_t)merger->MergedTracks()[iTrk].Looper()); }); track.SetOK(ok); track.SetNClustersFitted(nTrackHits); diff --git a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx index e63bb82a9b09e..5af3ebb51b9d6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGlobalDebugSortKernels.cxx @@ -105,8 +105,8 @@ GPUdii() void GPUTPCGlobalDebugSortKernels::Thread= 0) { int32_t firstIdx = j; - auto firstItem = merger.OutputTracks()[firstIdx]; + auto firstItem = merger.MergedTracks()[firstIdx]; int32_t currIdx = firstIdx; int32_t sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; do { tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = merger.OutputTracks()[sourceIdx]; + merger.MergedTracks()[currIdx] = merger.MergedTracks()[sourceIdx]; currIdx = sourceIdx; sourceIdx = tmp[currIdx]; tmp2[sourceIdx] = currIdx; } while (sourceIdx != firstIdx); tmp[currIdx] = -1; - merger.OutputTracks()[currIdx] = firstItem; + merger.MergedTracks()[currIdx] = firstItem; } } } From b0414adf124ee153273fd9fefc7080690119eb89 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:00:50 +0200 Subject: [PATCH 0506/1914] Out of line FairTBuffer destructor (#14265) --- Framework/Core/include/Framework/TMessageSerializer.h | 3 +++ Framework/Core/src/TMessageSerializer.cxx | 3 +++ 2 files changed, 6 insertions(+) diff --git a/Framework/Core/include/Framework/TMessageSerializer.h b/Framework/Core/include/Framework/TMessageSerializer.h index 769d23a7a3427..b6e355638c1e3 100644 --- a/Framework/Core/include/Framework/TMessageSerializer.h +++ b/Framework/Core/include/Framework/TMessageSerializer.h @@ -40,6 +40,8 @@ class FairOutputTBuffer : public TBufferFile : TBufferFile(TBuffer::kWrite, msg.GetSize() - sizeof(char*), embedInItself(msg), false, fairMQrealloc) { } + + ~FairOutputTBuffer() override; // Helper function to keep track of the FairMQ message that holds the data // in the data itself. We can use this to make sure the message can be reallocated // even if we simply have a pointer to the data. Hopefully ROOT will not play dirty @@ -60,6 +62,7 @@ class FairInputTBuffer : public TBufferFile : TBufferFile(TBuffer::kRead, size - sizeof(char*), data + sizeof(char*), false, nullptr) { } + ~FairInputTBuffer() override; }; struct TMessageSerializer { diff --git a/Framework/Core/src/TMessageSerializer.cxx b/Framework/Core/src/TMessageSerializer.cxx index c5da4cc576242..81a1c6e537d09 100644 --- a/Framework/Core/src/TMessageSerializer.cxx +++ b/Framework/Core/src/TMessageSerializer.cxx @@ -15,6 +15,9 @@ using namespace o2::framework; +FairOutputTBuffer::~FairOutputTBuffer() = default; +FairInputTBuffer::~FairInputTBuffer() = default; + void* FairOutputTBuffer::embedInItself(fair::mq::Message& msg) { // The first bytes of the message are used to store the pointer to the message itself From f4a478c778dc6b2672f71096f7915c10d18543d2 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Tue, 13 May 2025 23:02:37 +0200 Subject: [PATCH 0507/1914] DPL: keep codechecker happy (#14270) --- Detectors/CTP/reconstruction/src/RawDataDecoder.cxx | 6 ++++-- Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index 74bd08ce943ee..b216f5ec54570 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -615,8 +615,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, continue; } mClassCountersA[i]++; - if (cls->descriptor == nullptr) + if (cls->descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls->descriptor->getInputsMask(); uint64_t diginpmask = digit.CTPInputMask.to_ullong(); if (!((clsinpmask & diginpmask) == clsinpmask)) { @@ -632,8 +633,9 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, // if inps => class mask for (auto const& cls : mCTPConfig.getCTPClasses()) { // cls.printStream(std::cout); - if (cls.descriptor == nullptr) + if (cls.descriptor == nullptr) { continue; + } uint64_t clsinpmask = cls.descriptor->getInputsMask(); // class definition uint64_t diginpmask = digit.CTPInputMask.to_ullong(); uint64_t digclsmask = digit.CTPClassMask.to_ullong(); diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 3f7c729b351a3..2df6bc981ce44 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -71,8 +71,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) std::cout << std::endl; LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) + if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + } std::array clsA = mDecoder.getClassCountersA(); std::array clsB = mDecoder.getClassCountersB(); std::array clsEA = mDecoder.getClassErrorsA(); @@ -80,8 +81,9 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) + if (clsEA[i]) { LOG(error) << " Class without inputs:"; + } LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } From 17345d60e5a63f085e86d1064c315d6f88c326b3 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 14 May 2025 19:29:47 +0200 Subject: [PATCH 0508/1914] DPL: Out of line NumericBuilders (#14273) --- Framework/Core/include/Framework/TableBuilder.h | 6 ++++++ Framework/Core/src/TableBuilder.cxx | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 936a8a04d5a5a..8d7601cefc634 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -48,6 +48,12 @@ struct BulkInfo { size_t size; }; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; +extern template class arrow::NumericBuilder; + namespace o2::framework { namespace detail diff --git a/Framework/Core/src/TableBuilder.cxx b/Framework/Core/src/TableBuilder.cxx index eb19f8d3fe642..2169722efa9da 100644 --- a/Framework/Core/src/TableBuilder.cxx +++ b/Framework/Core/src/TableBuilder.cxx @@ -131,3 +131,9 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& } } // namespace o2::framework + +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; +template class arrow::NumericBuilder; From f44f2362e789b6e3a43214b5a0f48ba9c40838f7 Mon Sep 17 00:00:00 2001 From: Felix Schlepper Date: Wed, 14 May 2025 14:32:37 +0200 Subject: [PATCH 0509/1914] Update RecoContainer.cxx --- .../Detectors/GlobalTracking/src/RecoContainer.cxx | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx index d4b4e2b89cbb0..dd206ffe3b70d 100644 --- a/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx +++ b/DataFormats/Detectors/GlobalTracking/src/RecoContainer.cxx @@ -1440,8 +1440,7 @@ RecoContainer::GlobalIDSet RecoContainer::getSingleDetectorRefs(GTrackID gidx) c table[GTrackID::TRD] = parent0.getTrackRef(); // there is no standalone TRD track, so use the index for the ITSTPCTRD track array } else if (src == GTrackID::TPCTRDTOF) { const auto& parent0 = getTOFMatch(gidx); // TPCTRD : TOF - const auto& parent1 = getITSTPCTRDTrack(parent0.getTrackRef()); - const auto& parent2 = getTPCITSTrack(parent1.getRefGlobalTrackId()); + const auto& parent1 = getTPCTRDTrack(parent0.getTrackRef()); table[GTrackID::TPCTRD] = parent0.getTrackRef(); table[GTrackID::TPC] = parent1.getRefGlobalTrackId(); table[GTrackID::TOF] = {unsigned(parent0.getIdxTOFCl()), GTrackID::TOF}; @@ -1547,8 +1546,6 @@ const o2::dataformats::MCTruthContainer* RecoContainer::getE void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1557,8 +1554,6 @@ void RecoContainer::getTrackTimeITSTPCTRDTOF(GTrackID gid, float& t, float& tErr void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getTPCTRDTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC-TRD track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } @@ -1567,8 +1562,6 @@ void RecoContainer::getTrackTimeTPCTRDTOF(GTrackID gid, float& t, float& tErr) c void RecoContainer::getTrackTimeITSTPCTOF(GTrackID gid, float& t, float& tErr) const { const auto& match = getITSTPCTOFMatches()[gid]; - auto gidx = match.getTrackRef(); // this should be corresponding ITS-TPC track - // const auto& tofCl = getTOFClusters()[match.getTOFClIndex()]; t = (match.getSignal() - match.getLTIntegralOut().getTOF(o2::track::PID::Pion)) * PS2MUS; // tof time in \mus, FIXME: account for time of flight to R TOF tErr = 0.010f; } From 07096be128091de462d688c88e5f4cf0f5866729 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:56:55 +0200 Subject: [PATCH 0510/1914] GPU: Add some more type trait templates for GPU code --- GPU/Common/GPUCommonTypeTraits.h | 44 +++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index 6d72565d1f1fb..f837019c11875 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -22,7 +22,7 @@ #include #endif #else -// We just reimplement some type traits in std for the GPU +// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template @@ -35,6 +35,7 @@ struct conditional { }; template using contitional_t = typename conditional::type; + template struct is_same { static constexpr bool value = false; @@ -45,6 +46,7 @@ struct is_same { }; template static constexpr bool is_same_v = is_same::value; + template struct enable_if { }; @@ -52,6 +54,7 @@ template struct enable_if { typedef T type; }; + template struct remove_cv { typedef T type; @@ -68,6 +71,9 @@ template struct remove_cv { typedef T type; }; +template +using remove_cv_t = typename remove_cv::type; + template struct remove_const { typedef T type; @@ -76,6 +82,9 @@ template struct remove_const { typedef T type; }; +template +using remove_const_t = typename remove_const::type; + template struct remove_volatile { typedef T type; @@ -84,6 +93,9 @@ template struct remove_volatile { typedef T type; }; +template +using remove_volatile_t = typename remove_volatile::type; + template struct is_pointer_t { static constexpr bool value = false; @@ -95,6 +107,36 @@ struct is_pointer_t { template struct is_pointer : is_pointer_t::type> { }; + +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +struct remove_reference { + typedef T type; +}; +template +using remove_reference_t = typename remove_reference::type; + +template +struct is_member_pointer_helper { + static constexpr bool value = false; +}; +template +struct is_member_pointer_helper { + static constexpr bool value = true; +}; +template +struct is_member_pointer : is_member_pointer_helper::type> { +}; +template +static constexpr bool is_member_pointer_v = is_member_pointer::value; + } // namespace std #endif From 4654958fe006df87ce60aaf48d61184db85e76d3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 15:57:10 +0200 Subject: [PATCH 0511/1914] Revert "GPU: Workaround for Clang Frontend issue" This reverts commit 89b35ba2d75113e60b2045ed01e169b28d860a07. --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx | 4 ---- GPU/GPUTracking/Base/hip/CMakeLists.txt | 5 ----- 2 files changed, 9 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx index 67ad608c13417..acc77648d954b 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx @@ -38,11 +38,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile) { std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") + std::string(GetProcessingSettings().rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + -#ifndef GPUCA_HIP_WORKAROUND_CONSTEXPR // TODO: Fixme, once we have C++ P2280R4 in Clang std::string(GetProcessingSettings().rtc.optConstexpr ? "#define GPUCA_RTC_CONSTEXPR constexpr\n" : "#define GPUCA_RTC_CONSTEXPR\n") + -#else - std::string("#define GPUCA_RTC_CONSTEXPR\n") + -#endif GPUParamRTC::generateRTCCode(param(), GetProcessingSettings().rtc.optConstexpr); if (filename == "") { filename = "/tmp/o2cagpu_rtc_"; diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index c89ef1769ad81..6eded3499e46e 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -270,8 +270,3 @@ add_dependencies(GPUTrackingHIPExternalProvider O2::GPUTracking) # must not depe if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}") add_dependencies(GPUTrackingHIPExternalProvider ${MODULE}_HIPIFIED) endif() - -set_source_files_properties("${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx" -TARGET_DIRECTORY O2::GPUTrackingHIP -PROPERTIES -COMPILE_DEFINITIONS "GPUCA_HIP_WORKAROUND_CONSTEXPR") From 46ef93fdb9436f1b1bcebd01a3458235ed918c80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:29 +0200 Subject: [PATCH 0512/1914] GPU: Template workaround to get static constexpr values as constexpr from references --- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 67 +++++++++++++++++++ GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 5 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 11 +-- 3 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 GPU/GPUTracking/Definitions/GPUGetConstexpr.h diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h new file mode 100644 index 0000000000000..8001b4e98c83f --- /dev/null +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -0,0 +1,67 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUGetConstexpr.h +/// \author David Rohr + +#ifndef GPUGETCONSTEXPR_H +#define GPUGETCONSTEXPR_H + +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" + +// This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references + +#if defined(__clang__) && __cplusplus >= 202002L && __cplusplus < 202302L + +namespace o2::gpu::internal +{ + +#define GPUCA_GET_CONSTEXPR(obj, val) ( \ + std::is_member_pointer_v::val)> ? o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>(&obj).value) : o2::gpu::internal::getConstexpr(&std::remove_reference_t::val, o2::gpu::internal::getConstexprHelper::val), decltype(&obj)>().value)) + +template +struct getConstexprHelper; + +template + requires(!std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const void* = nullptr) {} + static constexpr const void* value = nullptr; +}; + +template + requires(std::is_member_pointer_v) +struct getConstexprHelper { + GPUdi() constexpr getConstexprHelper(const S& v) : value(v) {} + GPUdDefault() constexpr getConstexprHelper() = default; + const S value = nullptr; +}; + +GPUdi() constexpr auto getConstexpr(const auto* v, const void* = nullptr) +{ + return *v; +} + +GPUdi() constexpr auto getConstexpr(const auto v, const auto w) +{ + return w->*v; +} + +} // namespace o2::gpu::internal + +#else // __clang__ + +#define GPUCA_GET_CONSTEXPR(obj, val) (obj).val + +#endif + +#endif // GPUGETCONSTEXPR_H diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index eb22ca49e9242..7bb28a9f22e31 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -20,6 +20,7 @@ #include "DataFormatsTPC/PIDResponse.h" #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" +#include "GPUGetConstexpr.h" #ifndef GPUCA_GPUCODE #include "SimulationDataFormat/ConstMCTruthContainer.h" @@ -141,10 +142,10 @@ GPUdii() void GPUTPCGMO2Output::Thread(int32_t nBlocks oTrack.setChi2(tracks[i].GetParam().GetChi2()); auto& outerPar = tracks[i].OuterParam(); - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled) { oTrack.setdEdx(tracksdEdx[i]); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { oTrack.setdEdxAlt(tracksdEdxAlt[i]); } else { oTrack.setdEdxAlt(tracksdEdx[i]); diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 4b616fce83f5f..1072e4b178bdf 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -39,6 +39,7 @@ #include "GPUTPCConvertImpl.h" #include "GPUTPCGMMergerTypes.h" #include "GPUParam.inc" +#include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED #include "../utils/qconfig.h" @@ -216,12 +217,12 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ continue; } } else if (allowModification && lastRow != 255 && CAMath::Abs(cluster.row - lastRow) > 1) { - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { bool dodEdx = param.dodEdxEnabled && param.rec.tpc.adddEdxSubThresholdClusters && iWay == nWays - 1 && CAMath::Abs(cluster.row - lastRow) == 2 && cluster.leg == clusters[maxN - 1].leg; dodEdx = AttachClustersPropagate(merger, cluster.sector, lastRow, cluster.row, iTrk, cluster.leg == clusters[maxN - 1].leg, prop, inFlyDirection, GPUCA_MAX_SIN_PHI, dodEdx); if (dodEdx) { dEdx.fillSubThreshold(lastRow - wayDirection); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.fillSubThreshold(lastRow - wayDirection); } } @@ -371,7 +372,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ CADEBUG(printf("Reinit linearization\n")); prop.SetTrack(this, prop.GetAlpha()); } - if GPUCA_RTC_CONSTEXPR (param.par.dodEdx) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.par, dodEdx)) { if (param.dodEdxEnabled && iWay == nWays - 1 && cluster.leg == clusters[maxN - 1].leg) { // TODO: Costimize flag to remove, and option to remove double-clusters bool acc = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMask) == 0, accAlt = (clusterState & param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) == 0; if (acc || accAlt) { @@ -395,7 +396,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (acc) { dEdx.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { if (accAlt) { dEdxAlt.fillCluster(qtot, qmax, cluster.row, cluster.sector, mP[2], mP[3], merger->GetConstantMem()->calibObjects, zz, pad, relTime); } @@ -436,7 +437,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (param.par.dodEdx && param.dodEdxEnabled) { dEdx.computedEdx(merger->MergedTracksdEdx()[iTrk], param); - if GPUCA_RTC_CONSTEXPR (param.rec.tpc.dEdxClusterRejectionFlagMask != param.rec.tpc.dEdxClusterRejectionFlagMaskAlt) { + if GPUCA_RTC_CONSTEXPR (GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMask) != GPUCA_GET_CONSTEXPR(param.rec.tpc, dEdxClusterRejectionFlagMaskAlt)) { dEdxAlt.computedEdx(merger->MergedTracksdEdxAlt()[iTrk], param); } } From 4d647840509e57b890f4ce71fdb062f2edf7b234 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 13 May 2025 18:52:48 +0200 Subject: [PATCH 0513/1914] GPU: Simplify some type_traits use, get rid of ::values and ::type --- .../Base/cuda/GPUReconstructionCUDAInternals.h | 2 +- GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 4 ++-- GPU/GPUTracking/qa/GPUQAHelper.h | 8 ++++---- GPU/GPUTracking/utils/bitfield.h | 2 +- GPU/GPUTracking/utils/qconfig.cxx | 8 ++++---- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h index 0813c9d22ea09..493c09e448e5e 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAInternals.h @@ -79,7 +79,7 @@ class GPUDebugTiming bool mDo; }; -static_assert(std::is_convertible::value, "CUDA event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "CUDA event type incompatible to deviceEvent"); } // namespace o2::gpu diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx index 949dd6195b262..ce05e159461e5 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx @@ -18,7 +18,7 @@ #include -static_assert(std::is_convertible::value, "OpenCL event type incompatible to deviceEvent"); +static_assert(std::is_convertible_v, "OpenCL event type incompatible to deviceEvent"); #define GPUErrorReturn(...) \ { \ diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index f1a0816529c3a..73ca449252d1d 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -235,7 +235,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const { GPUTPCGMSectorTrack* sectorTrack = nullptr; int32_t nClusters = 0; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { sectorTrack = &mSectorTrackInfos[trk.TrackID()]; nClusters = sectorTrack->OrigTrack()->NHits(); } else { @@ -244,7 +244,7 @@ int64_t GPUTPCGMMerger::GetTrackLabelA(const S& trk) const auto acc = GPUTPCTrkLbl(resolveMCLabels(GetConstantMem()->ioPtrs.clustersNative ? GetConstantMem()->ioPtrs.clustersNative->clustersMCTruth : nullptr, GetConstantMem()->ioPtrs.mcLabelsTPC), 0.5f); for (int32_t i = 0; i < nClusters; i++) { int32_t id; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { const GPUTPCTracker& tracker = GetConstantMem()->tpcTrackers[sectorTrack->Sector()]; const GPUTPCHitId& ic = tracker.TrackHits()[sectorTrack->OrigTrack()->FirstHitID() + i]; id = tracker.Data().ClusterDataIndex(tracker.Data().Row(ic.RowIndex()), ic.HitIndex()) + GetConstantMem()->ioPtrs.clustersNative->clusterOffset[sectorTrack->Sector()][0]; diff --git a/GPU/GPUTracking/qa/GPUQAHelper.h b/GPU/GPUTracking/qa/GPUQAHelper.h index a7811c6fd55ed..a830562119467 100644 --- a/GPU/GPUTracking/qa/GPUQAHelper.h +++ b/GPU/GPUTracking/qa/GPUQAHelper.h @@ -48,7 +48,7 @@ class GPUTPCTrkLbl } inline void addLabel(uint32_t elementId) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { for (uint32_t i = 0; i < sizeof(mClusterLabels[elementId]) / sizeof(mClusterLabels[elementId].fClusterID[0]); i++) { const auto& element = mClusterLabels[elementId].fClusterID[i]; if (element.fMCID >= 0) { @@ -101,7 +101,7 @@ class GPUTPCTrkLbl } } auto& bestLabel = mLabels[bestLabelNum].first; - if constexpr (std::is_same::value && WEIGHT) { + if constexpr (std::is_same_v && WEIGHT) { *labelWeight = bestLabel.fWeight; *totalWeight = mTotalWeight; *maxCount = bestLabelCount; @@ -147,7 +147,7 @@ struct GPUTPCTrkLbl_ret { template class S, typename... Args> static inline auto GPUTPCTrkLbl(const S* x, Args... args) { - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl>(x, args...); } else { return internal::GPUTPCTrkLbl, U>(x, args...); @@ -159,7 +159,7 @@ static inline auto GPUTPCTrkLbl(const AliHLTTPCClusterMCLabel* x, Args... args) { using S = AliHLTTPCClusterMCLabel; using T = AliHLTTPCClusterMCWeight; - if constexpr (std::is_same::value) { + if constexpr (std::is_same_v) { return internal::GPUTPCTrkLbl(x, args...); } else { return internal::GPUTPCTrkLbl(x, args...); diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index 9730f6c6c234f..a3a3ac9a5bd95 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -93,7 +93,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) - static_assert(std::is_integral::value, "Storage type non integral"); + static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif diff --git a/GPU/GPUTracking/utils/qconfig.cxx b/GPU/GPUTracking/utils/qconfig.cxx index cdb41ec5813f2..839954e52ded3 100644 --- a/GPU/GPUTracking/utils/qconfig.cxx +++ b/GPU/GPUTracking/utils/qconfig.cxx @@ -126,7 +126,7 @@ static inline int32_t qAddOptionMainTupleElem(qConfigSettings settings = settingsTup; return (qAddOptionType(settings, ref, i, argv, argc, def)); } -template ::value> +template > struct qAddOptionMainTupleStruct { static inline int32_t qAddOptionMainTuple(qConfigSettings::settingsType> settings, T& tup, int32_t& i, const char** argv, const int argc) { @@ -157,13 +157,13 @@ struct qConfigType { // Recursive handling of additional settings static inline void qProcessSetting(qConfigSettings& settings, qmin_t minval) { - static_assert(!std::is_same::value, "min option not supported for boolean settings"); + static_assert(!std::is_same_v, "min option not supported for boolean settings"); settings.checkMin = true; settings.min = minval.v; } static inline void qProcessSetting(qConfigSettings& settings, qmax_t maxval) { - static_assert(!std::is_same::value, "max option not supported for boolean settings"); + static_assert(!std::is_same_v, "max option not supported for boolean settings"); settings.checkMax = true; settings.max = maxval.v; } @@ -244,7 +244,7 @@ struct qConfigType { static inline void qConfigHelpOption(const char* name, const char* type, const char* def, const char* optname, char optnameshort, const char* preopt, char preoptshort, int32_t optionType, const char* help, Args&&... args) { auto settings = qConfigGetSettings(args...); - const bool boolType = optionType != 1 && std::is_same::value; + const bool boolType = optionType != 1 && std::is_same_v; const char* arguments = settings.doSet ? " (" : (settings.doDefault || optionType == 1 || boolType) ? " [arg] (" : optionType == 2 ? " [...] (" : " arg ("; char argBuffer[4] = {0}; uint32_t argBufferPos = 0; From 073cd1697027762311775ec251cea232c701db80 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 14 May 2025 20:43:33 +0200 Subject: [PATCH 0514/1914] GPU CMake: Use CUDA/HIP compilers to preprocess CUDA/HIP RTC files --- GPU/GPUTracking/Base/cuda/CMakeLists.txt | 2 +- GPU/GPUTracking/Base/hip/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index c31dd0c8d3fe2..d9ee132d7c5f5 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -68,7 +68,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} COMMAND_EXPAND_LISTS diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index 6eded3499e46e..a47c659c8717d 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -115,7 +115,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) add_custom_command( OUTPUT ${GPU_RTC_BIN}.src COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src - COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src + COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} DEPENDS ${MODULE}_HIPIFIED From 895906ee43086663b84232a35e935474a06d6cb7 Mon Sep 17 00:00:00 2001 From: Mario Sitta Date: Thu, 15 May 2025 15:41:14 +0200 Subject: [PATCH 0515/1914] Implementation of MFT rails inside Cage --- .../simulation/include/ITSSimulation/V3Cage.h | 21 +++ .../ITSMFT/ITS/simulation/src/V3Cage.cxx | 146 ++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h index e90f0cfeb0aed..44d72284112c2 100644 --- a/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h +++ b/Detectors/ITSMFT/ITS/simulation/include/ITSSimulation/V3Cage.h @@ -113,6 +113,16 @@ class V3Cage : public V11Geometry /// \param mgr The GeoManager (used only to get the proper material) TGeoVolume* createCageClosingCross(const TGeoManager* mgr = gGeoManager); + /// Creates and places the MFT rails inside the Cage + /// \param mother The mother volume to place the rails into + /// \param mgr The GeoManager (used only to get the proper material) + void createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr = gGeoManager); + + /// Creates a pair of MFT rails inside the Cage + /// \param motmed Medium material of the mother volume + /// \param mgr The GeoManager (used only to get the proper material) + TGeoVolume* createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr = gGeoManager); + // Parameters static const Double_t sCageYInBarrel; ///< Global Y translation @@ -244,6 +254,17 @@ class V3Cage : public V11Geometry static const Double_t sCageCrossBarThick; ///< Closing cross bar thickness static const Double_t sCageCrossBarPhi; ///< Closing cross bar angle + // MFT Rails inside the Cage + static const Double_t sCageMFTRailZLen; ///< Total length of the rail + static const Double_t sCageMFTRailTotWidth; ///< Total width of the rail + static const Double_t sCageMFTRailExtWidth; ///< Width of the external part + static const Double_t sCageMFTRailIntWidth; ///< Width of the internal part + static const Double_t sCageMFTRailBaseWidth; ///< Width of the rail base + static const Double_t sCageMFTRailTotHeight; ///< Total height of the rail + static const Double_t sCageMFTRailExtHeight; ///< Height of the external part + static const Double_t sCageMFTRailIntHeight; ///< Height of the internal part + static const Double_t sCageMFTRailsXDist; ///< X distance between rails + ClassDefOverride(V3Cage, 0); // ITS v3 support geometry }; } // namespace its diff --git a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx index 3b17d7afeef3d..528b74dc339f1 100644 --- a/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx +++ b/Detectors/ITSMFT/ITS/simulation/src/V3Cage.cxx @@ -167,6 +167,16 @@ const Double_t V3Cage::sCageCrossZLength = 8 * sMm; const Double_t V3Cage::sCageCrossBarThick = 20 * sMm; const Double_t V3Cage::sCageCrossBarPhi = 25; // Deg +const Double_t V3Cage::sCageMFTRailZLen = 1807 * sMm; +const Double_t V3Cage::sCageMFTRailTotWidth = 27 * sMm; +const Double_t V3Cage::sCageMFTRailExtWidth = 24 * sMm; +const Double_t V3Cage::sCageMFTRailIntWidth = 17.5 * sMm; +const Double_t V3Cage::sCageMFTRailBaseWidth = 22 * sMm; +const Double_t V3Cage::sCageMFTRailTotHeight = 8.9 * sMm; +const Double_t V3Cage::sCageMFTRailExtHeight = 5.9 * sMm; +const Double_t V3Cage::sCageMFTRailIntHeight = 3.5 * sMm; +const Double_t V3Cage::sCageMFTRailsXDist = 44 * sMm; + ClassImp(V3Cage); V3Cage::V3Cage() @@ -251,6 +261,9 @@ void V3Cage::createAndPlaceCage(TGeoVolume* mother, const TGeoManager* mgr) zpos = sBPSuppZPos + sBPSuppCollarBeamWid / 2; mother->AddNode(cageBPSupport, 1, new TGeoTranslation(0, ypos, zpos)); + // The MFT Rails inside the Cage + createAndPlaceMFTRailsInsideCage(mother, mgr); + return; } @@ -1648,3 +1661,136 @@ TGeoVolume* V3Cage::createCageClosingCross(const TGeoManager* mgr) // Finally return the closing cross volume return closCrossVol; } + +void V3Cage::createAndPlaceMFTRailsInsideCage(TGeoVolume* mother, const TGeoManager* mgr) +{ + // + // Creates the MFT Rails located inside the Cage and place them + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + Double_t rdist, rpos, xpos, ypos, alpha; + Double_t xbox, ybox; + + // Create a pair of rails (a BBox container is returned) + TGeoVolume* cageMFTRails = createMFTRailsPair(mother->GetMedium(), mgr); + + // Now compute the radial distance and the XY position of the box + xbox = (static_cast(cageMFTRails->GetShape()))->GetDX(); + ybox = (static_cast(cageMFTRails->GetShape()))->GetDY(); + + rdist = TMath::Sqrt(sCageCoverRint * sCageCoverRint - xbox * xbox); + rpos = rdist - ybox; + + // Finally place the four pairs of rails inside the mother volume + xpos = rpos * TMath::Sin(sCageEndCapCableCutPhi * TMath::DegToRad()); + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) + sCageYInBarrel; + + alpha = -sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 1, new TGeoCombiTrans(xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = sCageEndCapCableCutPhi + 180; + mother->AddNode(cageMFTRails, 2, new TGeoCombiTrans(-xpos, ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + ypos = rpos * TMath::Cos(sCageEndCapCableCutPhi * TMath::DegToRad()) - sCageYInBarrel; + + alpha = sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 3, new TGeoCombiTrans(xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + alpha = -sCageEndCapCableCutPhi; + mother->AddNode(cageMFTRails, 4, new TGeoCombiTrans(-xpos, -ypos, 0, new TGeoRotation("", alpha, 0, 0))); + + return; +} + +TGeoVolume* V3Cage::createMFTRailsPair(const TGeoMedium* motmed, const TGeoManager* mgr) +{ + // + // Creates a pair of MFT Rails located inside the Cage (from drawings + // ALI-MFT-DF-0057 and elements therein) + // A box containing a pair of rails is returned (a physical box + // is preferred over an Assembly for better performance) + // + // Input: + // motmat : the material of the mother volume (for the container box) + // mgr : the GeoManager (used only to get the proper material) + // + // Output: + // + // Return: + // A rail pair as a TGeoVolume + // + // Created: 10 May 2025 Mario Sitta + // + + // Local variables + const Int_t nv = 16; + Double_t xv[nv], yv[nv]; + Double_t deltah, xlen, ylen, zlen; + Double_t xpos, ypos; + + // The shape of a single rail: a Xtru + xv[0] = sCageMFTRailBaseWidth / 2; + yv[0] = 0.; + xv[1] = xv[0]; + yv[1] = sCageMFTRailTotHeight - sCageMFTRailExtHeight; + xv[2] = sCageMFTRailTotWidth / 2; + yv[2] = yv[1]; + xv[3] = xv[2]; + yv[3] = sCageMFTRailTotHeight; + xv[4] = sCageMFTRailIntWidth / 2; + yv[4] = yv[3]; + deltah = (sCageMFTRailExtHeight - sCageMFTRailIntHeight) / 2; + xv[5] = xv[4]; + yv[5] = yv[4] - deltah; + xv[6] = sCageMFTRailExtWidth / 2; + yv[6] = yv[5]; + xv[7] = xv[6]; + yv[7] = yv[6] - sCageMFTRailIntHeight; + + for (Int_t i = 8; i < nv; i++) { + xv[i] = -xv[15 - i]; + yv[i] = yv[15 - i]; + } + + zlen = sCageMFTRailZLen / 2; + + TGeoXtru* mftRailSh = new TGeoXtru(2); + mftRailSh->SetName("mftrailshape"); + mftRailSh->DefinePolygon(nv, xv, yv); + mftRailSh->DefineSection(0, -zlen); + mftRailSh->DefineSection(1, zlen); + + // The air container: a BBox + xlen = 2 * sCageMFTRailTotWidth + sCageMFTRailsXDist; + ylen = sCageMFTRailTotHeight / 2; + zlen = sCageMFTRailZLen / 2; + TGeoBBox* mftRailBoxSh = new TGeoBBox(xlen / 2, ylen, zlen); + + // We have the shape: now create the real volume + TGeoMedium* medAl = mgr->GetMedium(Form("%s_ALUMINUM$", GetDetName())); + + TGeoVolume* mftRailVol = new TGeoVolume("MFTRailInsideCage", mftRailSh, medAl); + mftRailVol->SetFillColor(kGray); + mftRailVol->SetLineColor(kGray); + + TGeoVolume* mftRailBoxVol = new TGeoVolume("MFTRailPairInsideCage", mftRailBoxSh, motmed); + + // Put the two rails inside the holding box + // (rail Y origin is on its lower face) + xpos = mftRailBoxSh->GetDX() - 0.5 * sCageMFTRailTotWidth; + ypos = mftRailBoxSh->GetDY(); + mftRailBoxVol->AddNode(mftRailVol, 1, new TGeoTranslation(xpos, -ypos, 0)); + mftRailBoxVol->AddNode(mftRailVol, 2, new TGeoTranslation(-xpos, -ypos, 0)); + + // Finally return the rails volume + return mftRailBoxVol; +} From c2cd436aaed5b1e0e21ba831f22b37b7184cd9b3 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:32:10 +0200 Subject: [PATCH 0516/1914] GPU: Some work to prepare using and headers from system for GPU --- .../MathUtils/include/MathUtils/SMatrixGPU.h | 4 ++- .../include/CommonDataFormat/AbstractRef.h | 5 +++- Detectors/Raw/include/DetectorsRaw/RDHUtils.h | 6 +++-- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 25 +++++++++++++++++++ GPU/GPUTracking/Base/cuda/CMakeLists.txt | 4 ++- .../GPUReconstructionCUDAIncludesSystem.h | 7 +++--- GPU/GPUTracking/Base/hip/CMakeLists.txt | 3 ++- .../hip/GPUReconstructionHIPIncludesSystem.h | 4 +++ GPU/GPUTracking/Base/opencl/CMakeLists.txt | 8 +++++- .../Base/opencl/GPUReconstructionOCL.cl | 2 ++ GPU/GPUTracking/DataTypes/GPUDataTypes.h | 2 +- GPU/GPUTracking/Definitions/GPUGetConstexpr.h | 4 ++- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 5 +++- GPU/GPUTracking/Standalone/cmake/config.cmake | 2 +- .../TRDTracking/GPUTRDTrackerKernels.cxx | 5 +++- GPU/GPUTracking/utils/bitfield.h | 5 +--- 16 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUStdSystemHeaders.h diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 5ecdcd75a9906..675719cfc0751 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -29,7 +29,9 @@ #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2::math_utils::detail { diff --git a/DataFormats/common/include/CommonDataFormat/AbstractRef.h b/DataFormats/common/include/CommonDataFormat/AbstractRef.h index 403bab3cbd62f..72c195cfb7bc8 100644 --- a/DataFormats/common/include/CommonDataFormat/AbstractRef.h +++ b/DataFormats/common/include/CommonDataFormat/AbstractRef.h @@ -18,7 +18,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2::dataformats { diff --git a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h index 2fac6f35d40c4..a5d8cc8615c79 100644 --- a/Detectors/Raw/include/DetectorsRaw/RDHUtils.h +++ b/Detectors/Raw/include/DetectorsRaw/RDHUtils.h @@ -19,13 +19,15 @@ #include "GPUCommonRtypes.h" #include "Headers/RAWDataHeader.h" #include "Headers/RDHAny.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif #if !defined(GPUCA_GPUCODE) #include "CommonDataFormat/InteractionRecord.h" #endif #if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) #include "Headers/DAQID.h" -#endif // GPUCA_GPUCODE / GPUCA_STANDALONE +#endif namespace o2 { diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h new file mode 100644 index 0000000000000..6598085d309c7 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -0,0 +1,25 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUStdSystemHeaders.h +/// \author David Rohr + +#ifndef GPUSTDSYSTEMHEADERS_H +#define GPUSTDSYSTEMHEADERS_H + +#include +#include +#include +#include +#include +#include + +#endif diff --git a/GPU/GPUTracking/Base/cuda/CMakeLists.txt b/GPU/GPUTracking/Base/cuda/CMakeLists.txt index d9ee132d7c5f5..97972265b3007 100644 --- a/GPU/GPUTracking/Base/cuda/CMakeLists.txt +++ b/GPU/GPUTracking/Base/cuda/CMakeLists.txt @@ -67,7 +67,9 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_CUDA_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -E -Xcompiler "-nostdinc -P" ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h index 3f072059a9ad7..263d6939909c8 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h @@ -15,9 +15,10 @@ #ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H #define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H -#include -#include -#include +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/hip/CMakeLists.txt b/GPU/GPUTracking/Base/hip/CMakeLists.txt index a47c659c8717d..04a65922ad453 100644 --- a/GPU/GPUTracking/Base/hip/CMakeLists.txt +++ b/GPU/GPUTracking/Base/hip/CMakeLists.txt @@ -114,7 +114,8 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionHIPrtc) # cmake-format: off add_custom_command( OUTPUT ${GPU_RTC_BIN}.src - COMMAND cp ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h ${GPU_RTC_BIN}.src + COMMAND cp ${GPUDIR}/Base/GPUStdSystemHeaders.h ${GPU_RTC_BIN}.src + COMMAND cat ${GPUDIR}/Base/hip/GPUReconstructionHIPIncludesSystem.h | grep -v GPUStdSystemHeaders.h >> ${GPU_RTC_BIN}.src COMMAND ${CMAKE_HIP_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_HIP_STANDARD} -D__HIPCC__ -D__HIP_DEVICE_COMPILE__ -x c++ -nostdinc -E -P ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src MAIN_DEPENDENCY ${GPU_RTC_SRC} IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC} diff --git a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h index 1a3a1ff0108af..0228f993aaee3 100644 --- a/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h +++ b/GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h @@ -15,6 +15,10 @@ #ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H #define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H +#ifndef GPUCA_GPUCODE_GENRTC +#include "GPUStdSystemHeaders.h" +#endif + #include #include #include diff --git a/GPU/GPUTracking/Base/opencl/CMakeLists.txt b/GPU/GPUTracking/Base/opencl/CMakeLists.txt index 99ec36615a1d1..1a8a739adbecf 100644 --- a/GPU/GPUTracking/Base/opencl/CMakeLists.txt +++ b/GPU/GPUTracking/Base/opencl/CMakeLists.txt @@ -23,7 +23,7 @@ endif() set(CL_SRC ${GPUDIR}/Base/opencl/GPUReconstructionOCL.cl) set(CL_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionOCLCode) -set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) +set(OCL_FLAGS -Dcl_clang_storage_class_specifiers -x cl -cl-std=CLC++2021 ${GPUCA_OCL_DENORMALS_FLAGS}) if(NOT GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH}) set(OCL_FLAGS ${OCL_FLAGS} -cl-mad-enable -cl-no-signed-zeros -cl-fast-relaxed-math) else() @@ -43,6 +43,11 @@ if (NOT DEFINED GPUCA_OCL_SPIRV_VERSION) set(GPUCA_OCL_SPIRV_VERSION 1.2) endif() +# execute_process(COMMAND bash -c "${LLVM_CLANG} -stdlib=libc++ -E -H -x c++ - <<< '#include ' 2>&1 1>/dev/null | grep type_traits | head -n 1 | sed 's/^\\.* *//'" +# OUTPUT_VARIABLE CLANG_STD_INCLUDE_DIR) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" DIRECTORY) +# get_filename_component(CLANG_STD_INCLUDE_DIR "${CLANG_STD_INCLUDE_DIR}" ABSOLUTE) # TODO: For using in OpenCL, we would need to add -I${CLANG_STD_INCLUDE_DIR} + if(OPENCL_ENABLED_SPIRV) # BUILD OpenCL intermediate code for SPIR-V target # executes clang to create llvm IL code # Add -fintegrated-objemitter once we switch to clang >= 17 @@ -71,6 +76,7 @@ if(OPENCL_ENABLED) # BUILD OpenCL source code for runtime compilation target add_custom_command( OUTPUT ${CL_BIN}.src COMMAND ${LLVM_CLANG} + -target spir64 -Wno-unused-command-line-argument ${OCL_FLAGS} ${OCL_DEFINECL} diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index ffdc34d6c9881..3f58c0fea75e9 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -70,6 +70,8 @@ typedef signed char int8_t; #endif #define assert(param) +#include "GPUCommonDef.h" +#include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/DataTypes/GPUDataTypes.h b/GPU/GPUTracking/DataTypes/GPUDataTypes.h index 6cc1e7266e722..3e9623e23559b 100644 --- a/GPU/GPUTracking/DataTypes/GPUDataTypes.h +++ b/GPU/GPUTracking/DataTypes/GPUDataTypes.h @@ -20,9 +20,9 @@ // These are basic and non-complex data types, which will also be visible on the GPU. // Please add complex data types required on the host but not GPU to GPUHostDataTypes.h and forward-declare! #ifndef GPUCA_GPUCODE_DEVICE +#include // for bitfield below #include #endif -#include "GPUCommonTypeTraits.h" #include "GPUTRDDef.h" struct AliHLTTPCClusterMCLabel; diff --git a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h index 8001b4e98c83f..b70890738e1b7 100644 --- a/GPU/GPUTracking/Definitions/GPUGetConstexpr.h +++ b/GPU/GPUTracking/Definitions/GPUGetConstexpr.h @@ -16,7 +16,9 @@ #define GPUGETCONSTEXPR_H #include "GPUCommonDef.h" -#include "GPUCommonTypeTraits.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif // This is a temporary workaround required for clang (with c++20), until we can go to C++23 with P2280R4, which allows getting constexpr static values from references diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index 502a70cb57762..a1993ec4a0ffa 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -28,7 +28,10 @@ #include "GPUCommonArray.h" #include "GPUParam.h" #include "GPUTrackParamConvert.h" -#include "GPUCommonTypeTraits.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif using namespace o2::gpu; using namespace o2::track; diff --git a/GPU/GPUTracking/Standalone/cmake/config.cmake b/GPU/GPUTracking/Standalone/cmake/config.cmake index 1de0cfa27d7ee..0c7544aff7035 100644 --- a/GPU/GPUTracking/Standalone/cmake/config.cmake +++ b/GPU/GPUTracking/Standalone/cmake/config.cmake @@ -30,7 +30,7 @@ set(GPUCA_BUILD_DEBUG 0) set(GPUCA_BUILD_DEBUG_SANITIZE 0) set(GPUCA_DETERMINISTIC_MODE 0) # OFF / NO_FAST_MATH / OPTO2 / GPU / WHOLEO2 #set(GPUCA_CUDA_GCCBIN c++-14) -#set(GPUCA_OPENCL_CLANGBIN clang-19) +#set(GPUCA_OPENCL_CLANGBIN clang-20) set(HIP_AMDGPUTARGET "default") # "gfx906;gfx908;gfx90a" set(CUDA_COMPUTETARGET "default") # 86 89 #set(GPUCA_CUDA_COMPILE_MODE perkernel) # onefile / perkernel / rtc diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx index d18f04e554043..dea4cdbca430e 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx +++ b/GPU/GPUTracking/TRDTracking/GPUTRDTrackerKernels.cxx @@ -15,10 +15,13 @@ #include "GPUTRDTrackerKernels.h" #include "GPUTRDGeometry.h" #include "GPUConstantMem.h" -#include "GPUCommonTypeTraits.h" #include "GPUReconstructionThreading.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + using namespace o2::gpu; template diff --git a/GPU/GPUTracking/utils/bitfield.h b/GPU/GPUTracking/utils/bitfield.h index a3a3ac9a5bd95..c5df80f1d6277 100644 --- a/GPU/GPUTracking/utils/bitfield.h +++ b/GPU/GPUTracking/utils/bitfield.h @@ -15,10 +15,6 @@ #ifndef Q_BITFIELD_H #define Q_BITFIELD_H -#if !defined(GPUCA_GPUCODE_DEVICE) && !defined(GPUCA_GPUCODE_COMPILEKERNELS) -#include -#endif - template class bitfield { @@ -93,6 +89,7 @@ class bitfield } #if !defined(GPUCA_GPUCODE_DEVICE) + static_assert(std::is_void_v, "type_traits header missing"); static_assert(std::is_integral_v, "Storage type non integral"); static_assert(sizeof(S) >= sizeof(T), "Storage type has insufficient capacity"); #endif From a9e52c1e0175a24e673c74ec9bf2d8bf03017c0b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:34:58 +0200 Subject: [PATCH 0517/1914] GPU: Rename some misleading flag names --- GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h | 2 +- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMPropagator.h | 8 ++++---- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 6 +++--- GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h index 3c86dbfcd8d18..4ddd70efb5d1c 100644 --- a/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h +++ b/GPU/GPUTracking/DataTypes/GPUTPCGMMergedTrackHit.h @@ -35,7 +35,7 @@ struct GPUTPCGMMergedTrackHit { flagRejectDistance = 0x20, flagRejectErr = 0x40, flagReject = 0x60, - flagNotFit = 0x80 }; + flagHighIncl = 0x80 }; }; struct GPUTPCGMMergedTrackHitXYZ { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 73ca449252d1d..1d5a7a0b1df47 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1895,7 +1895,7 @@ GPUd() void GPUTPCGMMerger::Finalize1(int32_t nBlocks, int32_t nThreads, int32_t uint8_t clusterState = mClusters[trk.FirstClusterRef() + j].state; if (!(clusterState & GPUTPCGMMergedTrackHit::flagReject)) { weight |= attachGood; - } else if (clusterState & GPUTPCGMMergedTrackHit::flagNotFit) { + } else if (clusterState & GPUTPCGMMergedTrackHit::flagHighIncl) { weight |= attachHighIncl; } if (mClusters[trk.FirstClusterRef() + j].leg == goodLeg) { diff --git a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx index 7bb28a9f22e31..74a8df388d163 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMO2Output.cxx @@ -32,7 +32,7 @@ using namespace o2::gpu; using namespace o2::tpc; using namespace o2::tpc::constants; -GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit; } +GPUdi() static constexpr uint8_t getFlagsReject() { return GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl; } GPUdi() static uint32_t getFlagsRequired(const GPUSettingsRec& rec) { return rec.tpc.dropSecondaryLegsInOutput ? gputpcgmmergertypes::attachGoodLeg : gputpcgmmergertypes::attachZero; } namespace o2::gpu::internal diff --git a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h index db7a3b5884a12..97b307ce7a550 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMPropagator.h @@ -53,9 +53,9 @@ class GPUTPCGMPropagator updateErrorFitFailed = -1, updateErrorClusterRejected = 2, updateErrorClusterRejectedDistance = 2, - updateErrorEdgeCluster = 3, - updateErrorClusterRejectedInInterpolation = 4, - updateErrorClusterRejectedInUpdate = 5 + updateErrorClusterRejectedInInterpolation = 3, + updateErrorClusterRejectedInUpdate = 4, + updateErrorClusterRejectedEdge = 5 }; enum RejectChi2Mode { rejectDirect = 1, @@ -188,7 +188,7 @@ class GPUTPCGMPropagator GPUTPCGMPhysicalTrackModel mT0; MaterialCorrection mMaterial; FieldRegion mFieldRegion = TPC; - bool mSeedingErrors = 0; + bool mSeedingErrors = 0; // TODO: Hide variable in Run3 mode bool mFitInProjections = 1; // fit (Y,SinPhi,QPt) and (Z,DzDs) paramteres separatelly bool mPropagateBzOnly = 0; // Use Bz only in propagation bool mToyMCEvents = 0; // events are simulated with simple home-made simulation diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index 1072e4b178bdf..aed42e4f98f0c 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -299,7 +299,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ if (mC[0] > param.rec.tpc.trackFitCovLimit || mC[2] > param.rec.tpc.trackFitCovLimit) { break; } - MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + MarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); nMissed2++; NTolerated++; CADEBUG(printf(" --- break (%d, %d)\n", err, err2)); @@ -334,7 +334,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ #endif GPUCA_DEBUG_STREAMER_CHECK(GPUTPCGMPropagator::DebugStreamerVals debugVals;); if (param.rec.tpc.rejectEdgeClustersInTrackFit && uncorrectedY > -1e6f && param.rejectEdgeClusterByY(uncorrectedY, cluster.row, CAMath::Sqrt(mC[0]))) { // uncorrectedY > -1e6f implies allowModification - retVal = GPUTPCGMPropagator::updateErrorEdgeCluster; + retVal = GPUTPCGMPropagator::updateErrorClusterRejectedEdge; } else { const float time = merger->GetConstantMem()->ioPtrs.clustersNative ? merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].getTime() : -1.f; const float invSqrtCharge = merger->GetConstantMem()->ioPtrs.clustersNative ? CAMath::InvSqrt(merger->GetConstantMem()->ioPtrs.clustersNative->clustersLinear[cluster.num].qMax) : 0.f; @@ -363,7 +363,7 @@ GPUd() bool GPUTPCGMTrackParam::Fit(GPUTPCGMMerger* GPUrestrict() merger, int32_ lastUpdateX = mX; covYYUpd = mC[0]; nMissed = nMissed2 = 0; - UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagNotFit); + UnmarkClusters(clusters, ihitMergeFirst, ihit, wayDirection, GPUTPCGMMergedTrackHit::flagHighIncl); N++; ihitStart = ihit; float dy = mP[0] - prop.Model().Y(); diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index a1993ec4a0ffa..b74c1bb6a4534 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -268,7 +268,7 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov if constexpr (std::is_same_v) { const auto& hit = mPtrackHits[trkX.FirstClusterRef() + i]; cl = &mPclusterNative->clustersLinear[hit.num]; - if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagNotFit)) { + if (hit.state & (GPUTPCGMMergedTrackHit::flagReject | GPUTPCGMMergedTrackHit::flagHighIncl)) { cl = nullptr; if (i + direction != stop) { i += direction; From 132943deb4940ec93748ebbc419ccaa90ca29247 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 14:57:54 +0200 Subject: [PATCH 0518/1914] GPU: Use instead of GPUCommonArray for CUDA / ROCm, not yet for OpenCL... --- .../DCAFitter/include/DCAFitter/DCAFitterN.h | 68 +++++++++---------- .../MathUtils/include/MathUtils/SMatrixGPU.h | 8 +-- .../include/MathUtils/detail/basicMath.h | 11 +-- .../include/MathUtils/detail/trigonometric.h | 13 ++-- .../ITS/include/DataFormatsITS/TrackITS.h | 8 +-- .../include/ReconstructionDataFormats/DCA.h | 4 +- .../TrackParametrization.h | 10 +-- .../TrackParametrizationWithError.h | 8 +-- .../ReconstructionDataFormats/TrackUtils.h | 6 +- .../ReconstructionDataFormats/Vertex.h | 12 ++-- .../src/TrackParametrization.cxx | 6 +- .../src/TrackParametrizationWithError.cxx | 10 +-- Detectors/AOD/src/AODProducerWorkflowSpec.cxx | 2 +- Detectors/Align/src/AlignableDetectorTPC.cxx | 4 +- .../Base/include/DetectorsBase/Propagator.h | 6 +- Detectors/Base/src/Propagator.cxx | 8 +-- .../postprocessing/studies/src/Efficiency.cxx | 12 ++-- .../studies/src/ImpactParameter.cxx | 2 +- .../GPU/ITStrackingGPU/VertexingKernels.h | 4 +- .../ITS/tracking/GPU/cuda/VertexingKernels.cu | 4 +- .../tracking/include/ITStracking/Cluster.h | 15 ++-- .../tracking/include/ITStracking/Constants.h | 28 ++++---- Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx | 8 ++- Detectors/TPC/qc/src/Tracks.cxx | 6 +- .../TPC/workflow/src/MIPTrackFilterSpec.cxx | 2 +- Detectors/TPC/workflow/src/TPCRefitter.cxx | 2 +- .../TPC/workflow/src/TPCTimeSeriesSpec.cxx | 10 +-- GPU/Common/GPUCommonArray.h | 12 ++-- GPU/Common/GPUCommonTypeTraits.h | 3 +- GPU/GPUTracking/Base/GPUStdSystemHeaders.h | 1 + .../Base/opencl/GPUReconstructionOCL.cl | 1 + GPU/GPUTracking/Refit/GPUTrackingRefit.cxx | 4 +- .../TRDTracking/GPUTRDInterfaces.h | 8 +-- 33 files changed, 156 insertions(+), 150 deletions(-) diff --git a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h index 569b3ea49e515..aac451f1f8978 100644 --- a/Common/DCAFitter/include/DCAFitter/DCAFitterN.h +++ b/Common/DCAFitter/include/DCAFitter/DCAFitterN.h @@ -110,11 +110,11 @@ class DCAFitterN using MatSymND = o2::math_utils::SMatrix>; using MatStdND = o2::math_utils::SMatrix>; using TrackCoefVtx = MatStd3D; - using ArrTrack = o2::gpu::gpustd::array; // container for prongs (tracks) at single vertex cand. - using ArrTrackCovI = o2::gpu::gpustd::array; // container for inv.cov.matrices at single vertex cand. - using ArrTrCoef = o2::gpu::gpustd::array; // container of TrackCoefVtx coefficients at single vertex cand. - using ArrTrDer = o2::gpu::gpustd::array; // container of Track 1st and 2nd derivative over their X param - using ArrTrPos = o2::gpu::gpustd::array; // container of Track positions + using ArrTrack = std::array; // container for prongs (tracks) at single vertex cand. + using ArrTrackCovI = std::array; // container for inv.cov.matrices at single vertex cand. + using ArrTrCoef = std::array; // container of TrackCoefVtx coefficients at single vertex cand. + using ArrTrDer = std::array; // container of Track 1st and 2nd derivative over their X param + using ArrTrPos = std::array; // container of Track positions public: enum BadCovPolicy : uint8_t { // if encountering non-positive defined cov. matrix, the choice is: @@ -158,7 +158,7 @@ class DCAFitterN GPUd() const auto getPCACandidatePos(int cand = 0) const { const auto& vd = mPCA[mOrder[cand]]; - return o2::gpu::gpustd::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; + return std::array{static_cast(vd[0]), static_cast(vd[1]), static_cast(vd[2])}; } ///< return position of quality-ordered candidate in the internal structures @@ -213,7 +213,7 @@ class DCAFitterN GPUd() MatSym3D calcPCACovMatrix(int cand = 0) const; - o2::gpu::gpustd::array calcPCACovMatrixFlat(int cand = 0) const + std::array calcPCACovMatrixFlat(int cand = 0) const { auto m = calcPCACovMatrix(cand); return {static_cast(m(0, 0)), static_cast(m(1, 0)), static_cast(m(1, 1)), static_cast(m(2, 0)), static_cast(m(2, 1)), static_cast(m(2, 2))}; @@ -364,39 +364,39 @@ class DCAFitterN private: // vectors of 1st derivatives of track local residuals over X parameters - o2::gpu::gpustd::array, N> mDResidDx; + std::array, N> mDResidDx; // vectors of 1nd derivatives of track local residuals over X parameters // (cross-derivatives DR/(dx_j*dx_k) = 0 for j!=k, therefore the hessian is diagonal) - o2::gpu::gpustd::array, N> mD2ResidDx2; + std::array, N> mD2ResidDx2; VecND mDChi2Dx; // 1st derivatives of chi2 over tracks X params MatSymND mD2Chi2Dx2; // 2nd derivatives of chi2 over tracks X params (symmetric matrix) MatSymND mCosDif; // matrix with cos(alp_j-alp_i) for j mOrigTrPtr; - o2::gpu::gpustd::array mTrAux; // Aux track info for each track at each cand. vertex - CrossInfo mCrossings; // info on track crossing - - o2::gpu::gpustd::array mTrcEInv; // errors for each track at each cand. vertex - o2::gpu::gpustd::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) - o2::gpu::gpustd::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex - o2::gpu::gpustd::array mTrDer; // Track derivativse - o2::gpu::gpustd::array mTrPos; // Track positions - o2::gpu::gpustd::array mTrRes; // Track residuals - o2::gpu::gpustd::array mPCA; // PCA for each vertex candidate - o2::gpu::gpustd::array mChi2 = {0}; // Chi2 at PCA candidate - o2::gpu::gpustd::array mNIters; // number of iterations for each seed - o2::gpu::gpustd::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA - o2::gpu::gpustd::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate + std::array mOrigTrPtr; + std::array mTrAux; // Aux track info for each track at each cand. vertex + CrossInfo mCrossings; // info on track crossing + + std::array mTrcEInv; // errors for each track at each cand. vertex + std::array mCandTr; // tracks at each cond. vertex (Note: Errors are at seed XY point) + std::array mTrCFVT; // TrackCoefVtx for each track at each cand. vertex + std::array mTrDer; // Track derivativse + std::array mTrPos; // Track positions + std::array mTrRes; // Track residuals + std::array mPCA; // PCA for each vertex candidate + std::array mChi2 = {0}; // Chi2 at PCA candidate + std::array mNIters; // number of iterations for each seed + std::array mTrPropDone{}; // Flag that the tracks are fully propagated to PCA + std::array mPropFailed{}; // Flag that some propagation failed for this PCA candidate LogLogThrottler mLoggerBadCov{}; LogLogThrottler mLoggerBadInv{}; LogLogThrottler mLoggerBadProp{}; MatSym3D mWeightInv; // inverse weight of single track, [sum{M^T E M}]^-1 in EQ.T - o2::gpu::gpustd::array mOrder{0}; + std::array mOrder{0}; int mCurHyp = 0; int mCrossIDCur = 0; int mCrossIDAlt = -1; BadCovPolicy mBadCovPolicy{BadCovPolicy::Discard}; // what to do in case of non-pos-def. cov. matrix, see BadCovPolicy enum - o2::gpu::gpustd::array mFitStatus{}; // fit status of each hypothesis fit + std::array mFitStatus{}; // fit status of each hypothesis fit bool mAllowAltPreference = true; // if the fit converges to alternative PCA seed, abandon the current one bool mUseAbsDCA = false; // use abs. distance minimization rather than chi2 bool mWeightedFinalPCA = false; // recalculate PCA as a cov-matrix weighted mean, even if absDCA method was used @@ -657,7 +657,7 @@ template GPUd() void DCAFitterN::calcChi2Derivatives() { //< calculate 1st and 2nd derivatives of wighted DCA (chi2) over track parameters X, see EQ.Chi2 in the ref - o2::gpu::gpustd::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i + std::array, N> covIDrDx; // tempory vectors of covI_j * dres_j/dx_i // chi2 1st derivative for (int i = N; i--;) { @@ -1175,13 +1175,13 @@ GPUd() o2::track::TrackParCov DCAFitterN::createParentTrackParCov(in { const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); - o2::gpu::gpustd::array covV = {0.}; - o2::gpu::gpustd::array pvecV = {0.}; + std::array covV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; - o2::gpu::gpustd::array covT = {0.}; + std::array pvecT = {0.}; + std::array covT = {0.}; trc.getPxPyPzGlo(pvecT); trc.getCovXYZPxPyPzGlo(covT); constexpr int MomInd[6] = {9, 13, 14, 18, 19, 20}; // cov matrix elements for momentum component @@ -1210,18 +1210,18 @@ GPUd() o2::track::TrackPar DCAFitterN::createParentTrackPar(int cand const auto& trP = getTrack(0, cand); const auto& trN = getTrack(1, cand); const auto& wvtx = getPCACandidate(cand); - o2::gpu::gpustd::array pvecV = {0.}; + std::array pvecV = {0.}; int q = 0; for (int it = 0; it < N; it++) { const auto& trc = getTrack(it, cand); - o2::gpu::gpustd::array pvecT = {0.}; + std::array pvecT = {0.}; trc.getPxPyPzGlo(pvecT); for (int i = 0; i < 3; i++) { pvecV[i] += pvecT[i]; } q += trc.getCharge(); } - const o2::gpu::gpustd::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; + const std::array vertex = {(float)wvtx[0], (float)wvtx[1], (float)wvtx[2]}; return o2::track::TrackPar(vertex, pvecV, q, sectorAlpha); } diff --git a/Common/MathUtils/include/MathUtils/SMatrixGPU.h b/Common/MathUtils/include/MathUtils/SMatrixGPU.h index 675719cfc0751..8158a93666a92 100644 --- a/Common/MathUtils/include/MathUtils/SMatrixGPU.h +++ b/Common/MathUtils/include/MathUtils/SMatrixGPU.h @@ -25,12 +25,12 @@ #define ALICEO2_SMATRIX_GPU_H #include "GPUCommonDef.h" -#include "GPUCommonArray.h" #include "GPUCommonMath.h" #include "GPUCommonAlgorithm.h" #include "GPUCommonLogger.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif namespace o2::math_utils::detail @@ -283,14 +283,14 @@ struct make_indices : make_indices_impl<0, indices<>, N> { }; template -constexpr auto do_make(F f, indices) -> gpu::gpustd::array +constexpr auto do_make(F f, indices) -> std::array { - gpu::gpustd::array retarr = {f(I0 + I)...}; + std::array retarr = {f(I0 + I)...}; return retarr; } template -constexpr auto make(F f) -> gpu::gpustd::array +constexpr auto make(F f) -> std::array { return do_make(f, typename make_indices::type()); } diff --git a/Common/MathUtils/include/MathUtils/detail/basicMath.h b/Common/MathUtils/include/MathUtils/detail/basicMath.h index 3565764435a68..1abe6ee878c39 100644 --- a/Common/MathUtils/include/MathUtils/detail/basicMath.h +++ b/Common/MathUtils/include/MathUtils/detail/basicMath.h @@ -16,14 +16,15 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ +#include "GPUCommonDef.h" +#include "GPUCommonMath.h" +#include "CommonConstants/MathConstants.h" + #ifndef GPUCA_GPUCODE_DEVICE #include #include +#include #endif -#include "GPUCommonArray.h" -#include "GPUCommonDef.h" -#include "GPUCommonMath.h" -#include "CommonConstants/MathConstants.h" namespace o2 { @@ -130,4 +131,4 @@ GPUdi() double log(double x) } // namespace math_utils } // namespace o2 -#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ \ No newline at end of file +#endif /* MATHUTILS_INCLUDE_MATHUTILS_DETAIL_BASICMATH_H_ */ diff --git a/Common/MathUtils/include/MathUtils/detail/trigonometric.h b/Common/MathUtils/include/MathUtils/detail/trigonometric.h index 462affdceb17f..457210202ca54 100644 --- a/Common/MathUtils/include/MathUtils/detail/trigonometric.h +++ b/Common/MathUtils/include/MathUtils/detail/trigonometric.h @@ -16,16 +16,17 @@ #ifndef MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ #define MATHUTILS_INCLUDE_MATHUTILS_DETAIL_TRIGONOMETRIC_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif -#include "GPUCommonArray.h" #include "GPUCommonDef.h" #include "GPUCommonMath.h" #include "CommonConstants/MathConstants.h" #include "MathUtils/detail/basicMath.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif + namespace o2 { namespace math_utils @@ -156,7 +157,7 @@ GPUhdi() std::tuple rotateZInv(T xG, T yG, T snAlp, T csAlp) #endif template -GPUhdi() void rotateZ(gpu::gpustd::array& xy, T alpha) +GPUhdi() void rotateZ(std::array& xy, T alpha) { // transforms vector in tracking frame alpha to global frame T sin, cos; diff --git a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h index e9931b89ecd4a..06d4fba51bd54 100644 --- a/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h +++ b/DataFormats/Detectors/ITSMFT/ITS/include/DataFormatsITS/TrackITS.h @@ -170,14 +170,14 @@ class TrackITSExt : public TrackITS using TrackITS::TrackITS; // inherit base constructors GPUh() TrackITSExt(o2::track::TrackParCov&& parCov, short ncl, float chi2, - o2::track::TrackParCov&& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov&& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); } GPUh() TrackITSExt(o2::track::TrackParCov& parCov, short ncl, float chi2, std::uint32_t rof, - o2::track::TrackParCov& outer, o2::gpu::gpustd::array cls) + o2::track::TrackParCov& outer, std::array cls) : TrackITS(parCov, chi2, outer), mIndex{cls} { setNumberOfClusters(ncl); @@ -205,13 +205,13 @@ class TrackITSExt : public TrackITS mIndex[layer] = idx; } - GPUh() o2::gpu::gpustd::array& getClusterIndexes() + GPUh() std::array& getClusterIndexes() { return mIndex; } private: - o2::gpu::gpustd::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters + std::array mIndex = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; ///< Indices of associated clusters ClassDefNV(TrackITSExt, 2); }; } // namespace its diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h index 922470f8992f5..6eb41b798e101 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/DCA.h @@ -14,10 +14,10 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE #include +#include #endif /// \author ruben.shahoyan@cern.ch @@ -67,7 +67,7 @@ class DCA private: float mY = 0.f; float mZ = 0.f; - gpu::gpustd::array mCov; ///< s2y, syz, s2z + std::array mCov; ///< s2y, syz, s2z ClassDefNV(DCA, 1); }; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index bfd56eb8f024f..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE @@ -39,6 +38,7 @@ #include #include #include +#include #endif #ifndef GPUCA_ALIGPUCODE // Used only by functions that are hidden on the GPU @@ -128,9 +128,9 @@ class TrackParametrization public: using value_t = value_T; - using dim2_t = gpu::gpustd::array; - using dim3_t = gpu::gpustd::array; - using params_t = gpu::gpustd::array; + using dim2_t = std::array; + using dim3_t = std::array; + using params_t = std::array; struct yzerr_t { // 2 measurement with error dim2_t yz; @@ -209,7 +209,7 @@ class TrackParametrization GPUd() math_utils::Point3D getXYZGlo() const; GPUd() void getXYZGlo(dim3_t& xyz) const; GPUd() bool getPxPyPzGlo(dim3_t& pxyz) const; - GPUd() bool getPosDirGlo(gpu::gpustd::array& posdirp) const; + GPUd() bool getPosDirGlo(std::array& posdirp) const; // methods for track params estimate at other point GPUd() bool getYZAt(value_t xk, value_t b, value_t& y, value_t& z) const; diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h index cc783298e14cd..cd9d1517a81b1 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrizationWithError.h @@ -38,14 +38,14 @@ class TrackParametrizationWithError : public TrackParametrization static_assert(std::is_floating_point_v); #endif - using covMat_t = gpu::gpustd::array; + using covMat_t = std::array; using MatrixDSym5 = o2::math_utils::SMatrix>; using MatrixD5 = o2::math_utils::SMatrix>; GPUhd() TrackParametrizationWithError(); GPUd() TrackParametrizationWithError(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUhdDefault() TrackParametrizationWithError(const TrackParametrizationWithError& src) = default; GPUdDefault() TrackParametrizationWithError(TrackParametrizationWithError&& src) = default; @@ -57,7 +57,7 @@ class TrackParametrizationWithError : public TrackParametrization using TrackParametrization::set; GPUd() void set(value_t x, value_t alpha, const params_t& par, const covMat_t& cov, int charge = 1, const PID pid = PID::Pion); GPUd() void set(value_t x, value_t alpha, const value_t* par, const value_t* cov, int charge = 1, const PID pid = PID::Pion); - GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const gpu::gpustd::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); + GPUd() void set(const dim3_t& xyz, const dim3_t& pxpypz, const std::array& cv, int sign, bool sectorAlpha = true, const PID pid = PID::Pion); GPUd() const covMat_t& getCov() const; GPUd() value_t getSigmaY2() const; GPUd() value_t getSigmaZY() const; @@ -77,7 +77,7 @@ class TrackParametrizationWithError : public TrackParametrization GPUd() value_t getCovarElem(int i, int j) const; GPUd() value_t getDiagError2(int i) const; - GPUd() bool getCovXYZPxPyPzGlo(gpu::gpustd::array& c) const; + GPUd() bool getCovXYZPxPyPzGlo(std::array& c) const; GPUd() void print() const; GPUd() void printHexadecimal(); diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h index 0ee0ca4461ab0..8a79130d64eda 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackUtils.h @@ -18,9 +18,9 @@ #define INCLUDE_RECONSTRUCTIONDATAFORMATS_TRACKUTILS_H_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #ifndef GPUCA_GPUCODE_DEVICE +#include #include #endif @@ -39,11 +39,11 @@ template GPUd() value_T BetheBlochSolidOpt(value_T bg); template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect); +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect); //____________________________________________________ template -GPUd() void g3helx3(value_T qfield, value_T step, gpu::gpustd::array& vect) +GPUd() void g3helx3(value_T qfield, value_T step, std::array& vect) { /****************************************************************** * * diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h index d14bc7ac786c8..fc89f162a0727 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/Vertex.h @@ -14,7 +14,6 @@ #include "GPUCommonDef.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include #include "CommonDataFormat/TimeStamp.h" @@ -22,6 +21,7 @@ #include #include #include +#include #endif namespace o2 @@ -42,7 +42,7 @@ class VertexBase static constexpr int kNCov = 6; GPUhdDefault() VertexBase() = default; GPUhdDefault() ~VertexBase() = default; - GPUhd() VertexBase(const math_utils::Point3D& pos, const gpu::gpustd::array& cov) : mPos(pos), mCov(cov) + GPUhd() VertexBase(const math_utils::Point3D& pos, const std::array& cov) : mPos(pos), mCov(cov) { } @@ -65,7 +65,7 @@ class VertexBase GPUd() float getSigmaY() const { return gpu::CAMath::Sqrt(getSigmaY2()); } GPUd() float getSigmaZ() const { return gpu::CAMath::Sqrt(getSigmaZ2()); } - GPUd() const gpu::gpustd::array& getCov() const { return mCov; } + GPUd() const std::array& getCov() const { return mCov; } GPUd() math_utils::Point3D getXYZ() const { return mPos; } GPUd() math_utils::Point3D& getXYZ() { return mPos; } @@ -101,14 +101,14 @@ class VertexBase setSigmaXZ(sxz); setSigmaYZ(syz); } - GPUd() void setCov(const gpu::gpustd::array& cov) { mCov = cov; } + GPUd() void setCov(const std::array& cov) { mCov = cov; } bool operator==(const VertexBase& other) const; bool operator!=(const VertexBase& other) const { return !(*this == other); } protected: math_utils::Point3D mPos{0., 0., 0.}; ///< cartesian position - gpu::gpustd::array mCov{}; ///< errors, see CovElems enum + std::array mCov{}; ///< errors, see CovElems enum ClassDefNV(VertexBase, 1); }; @@ -130,7 +130,7 @@ class Vertex : public VertexBase GPUhdDefault() Vertex() = default; GPUhdDefault() ~Vertex() = default; - GPUhd() Vertex(const math_utils::Point3D& pos, const gpu::gpustd::array& cov, ushort nCont, float chi2) + GPUhd() Vertex(const math_utils::Point3D& pos, const std::array& cov, ushort nCont, float chi2) : VertexBase(pos, cov), mChi2(chi2), mNContributors(nCont) { } diff --git a/DataFormats/Reconstruction/src/TrackParametrization.cxx b/DataFormats/Reconstruction/src/TrackParametrization.cxx index 4b68ea425bfbd..1bdf9b55a60a0 100644 --- a/DataFormats/Reconstruction/src/TrackParametrization.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrization.cxx @@ -130,7 +130,7 @@ GPUd() bool TrackParametrization::getPxPyPzGlo(dim3_t& pxyz) const //____________________________________________________ template -GPUd() bool TrackParametrization::getPosDirGlo(gpu::gpustd::array& posdirp) const +GPUd() bool TrackParametrization::getPosDirGlo(std::array& posdirp) const { // fill vector with lab x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha value_t ptI = getPtInv(); @@ -231,7 +231,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di step *= gpu::CAMath::Sqrt(1.f + getTgl() * getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!getPosDirGlo(vecLab)) { return false; } @@ -250,7 +250,7 @@ GPUd() bool TrackParametrization::propagateParamTo(value_t xk, const di costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], diff --git a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx index 81963adf79938..aee24238f1247 100644 --- a/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx +++ b/DataFormats/Reconstruction/src/TrackParametrizationWithError.cxx @@ -259,7 +259,7 @@ GPUd() bool TrackParametrizationWithError::propagateToDCA(const o2::dat //______________________________________________________________ template GPUd() TrackParametrizationWithError::TrackParametrizationWithError(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // construct track param and covariance from kinematics and lab errors set(xyz, pxpypz, cv, charge, sectorAlpha, pid); @@ -268,7 +268,7 @@ GPUd() TrackParametrizationWithError::TrackParametrizationWithError(con //______________________________________________________________ template GPUd() void TrackParametrizationWithError::set(const dim3_t& xyz, const dim3_t& pxpypz, - const gpu::gpustd::array& cv, int charge, bool sectorAlpha, const PID pid) + const std::array& cv, int charge, bool sectorAlpha, const PID pid) { // set track param and covariance from kinematics and lab errors @@ -475,7 +475,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons step *= gpu::CAMath::Sqrt(1.f + this->getTgl() * this->getTgl()); // // get the track x,y,z,px/p,py/p,pz/p,p,sinAlpha,cosAlpha in the Global System - gpu::gpustd::array vecLab{0.f}; + std::array vecLab{0.f}; if (!this->getPosDirGlo(vecLab)) { return false; } @@ -542,7 +542,7 @@ GPUd() bool TrackParametrizationWithError::propagateTo(value_t xk, cons costet = b[2] / bb; sintet = bt / bb; } - gpu::gpustd::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], + std::array vect{costet * cosphi * vecLab[0] + costet * sinphi * vecLab[1] - sintet * vecLab[2], -sinphi * vecLab[0] + cosphi * vecLab[1], sintet * cosphi * vecLab[0] + sintet * sinphi * vecLab[1] + costet * vecLab[2], costet * cosphi * vecLab[3] + costet * sinphi * vecLab[4] - sintet * vecLab[5], @@ -1115,7 +1115,7 @@ GPUd() bool TrackParametrizationWithError::correctForMaterial(value_t x //______________________________________________________________ template -GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(gpu::gpustd::array& cv) const +GPUd() bool TrackParametrizationWithError::getCovXYZPxPyPzGlo(std::array& cv) const { //--------------------------------------------------------------------- // This function returns the global covariance matrix of the track params diff --git a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx index 2b133770357fc..8fb2db9c3c9a6 100644 --- a/Detectors/AOD/src/AODProducerWorkflowSpec.cxx +++ b/Detectors/AOD/src/AODProducerWorkflowSpec.cxx @@ -2615,7 +2615,7 @@ AODProducerWorkflowDPL::TrackQA AODProducerWorkflowDPL::processBarrelTrackQA(int o2::track::TrackParametrization tpcTMP = tpcOrig; /// get backup of the track const o2::base::Propagator::MatCorrType mMatType = o2::base::Propagator::MatCorrType::USEMatCorrLUT; /// should be parameterized const o2::dataformats::VertexBase v = mVtx.getMeanVertex(collisionID < 0 ? 0.f : data.getPrimaryVertex(collisionID).getZ()); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; if (prop->propagateToDCABxByBz({v.getX(), v.getY(), v.getZ()}, tpcTMP, 2.f, mMatType, &dcaInfo)) { trackQAHolder.tpcdcaR = 100. * dcaInfo[0] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); trackQAHolder.tpcdcaZ = 100. * dcaInfo[1] / sqrt(1. + trackPar.getQ2Pt() * trackPar.getQ2Pt()); diff --git a/Detectors/Align/src/AlignableDetectorTPC.cxx b/Detectors/Align/src/AlignableDetectorTPC.cxx index f66d9e3f3ab95..b3d2102559974 100644 --- a/Detectors/Align/src/AlignableDetectorTPC.cxx +++ b/Detectors/Align/src/AlignableDetectorTPC.cxx @@ -214,8 +214,8 @@ int AlignableDetectorTPC::processPoints(GIndex gid, int npntCut, bool inv) auto* sectSensor = (AlignableSensorTPC*)getSensor(currentSector); const auto* sysE = sectSensor->getAddError(); // additional syst error - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; mController->getTPCParam()->GetClusterErrors2(sector, currentRow, z, trkParam.getSnp(), trkParam.getTgl(), -1.f, 0.f, 0.f, c[0], c[2]); // TODO: Note this disables occupancy / charge components of the error estimation mController->getTPCParam()->UpdateClusterError2ByState(clusterState, c[0], c[2]); int nrComb = std::abs(row - currentRow) + 1; diff --git a/Detectors/Base/include/DetectorsBase/Propagator.h b/Detectors/Base/include/DetectorsBase/Propagator.h index a9e2ce6e0383d..dbdef47e4edc0 100644 --- a/Detectors/Base/include/DetectorsBase/Propagator.h +++ b/Detectors/Base/include/DetectorsBase/Propagator.h @@ -17,7 +17,6 @@ #define ALICEO2_BASE_PROPAGATOR_ #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "CommonConstants/PhysicsConstants.h" #include "ReconstructionDataFormats/Track.h" #include "ReconstructionDataFormats/DCA.h" @@ -25,6 +24,7 @@ #include "DetectorsBase/MatLayerCylSet.h" #ifndef GPUCA_GPUCODE +#include #include #endif @@ -111,12 +111,12 @@ class PropagatorImpl GPUd() bool propagateToDCA(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type bZ, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; GPUd() bool propagateToDCABxByBz(const o2::math_utils::Point3D& vtx, o2::track::TrackParametrization& track, value_type maxStep = MAX_STEP, MatCorrType matCorr = MatCorrType::USEMatCorrLUT, - gpu::gpustd::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, + std::array* dca = nullptr, track::TrackLTIntegral* tofInfo = nullptr, int signCorr = 0, value_type maxD = 999.f) const; PropagatorImpl(PropagatorImpl const&) = delete; diff --git a/Detectors/Base/src/Propagator.cxx b/Detectors/Base/src/Propagator.cxx index 754c0c14e6f60..1c44cea65c69c 100644 --- a/Detectors/Base/src/Propagator.cxx +++ b/Detectors/Base/src/Propagator.cxx @@ -170,7 +170,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackParCov_t& track, va signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -239,7 +239,7 @@ GPUd() bool PropagatorImpl::PropagateToXBxByBz(TrackPar_t& track, value signCorr = -dir; // sign of eloss correction is not imposed } - gpu::gpustd::array b{}; + std::array b{}; while (math_utils::detail::abs(dx) > Epsilon) { auto step = math_utils::detail::min(math_utils::detail::abs(dx), maxStep); if (dir < 0) { @@ -553,7 +553,7 @@ GPUd() bool PropagatorImpl::propagateToDCABxByBz(const o2::dataformats: template GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D& vtx, TrackPar_t& track, value_type bZ, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex @@ -601,7 +601,7 @@ GPUd() bool PropagatorImpl::propagateToDCA(const math_utils::Point3D GPUd() bool PropagatorImpl::propagateToDCABxByBz(const math_utils::Point3D& vtx, TrackPar_t& track, value_type maxStep, PropagatorImpl::MatCorrType matCorr, - gpu::gpustd::array* dca, track::TrackLTIntegral* tofInfo, + std::array* dca, track::TrackLTIntegral* tofInfo, int signCorr, value_type maxD) const { // propagate track to DCA to the vertex diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx index bca1ec1e85001..494603641cde5 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/Efficiency.cxx @@ -593,7 +593,7 @@ int EfficiencyStudy::getDCAClusterTrackMC(int countDuplicated = 0) LOGP(info, "--------------- getDCAClusterTrackMC"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); auto bz = o2::base::Propagator::Instance()->getNominalBz(); @@ -833,7 +833,7 @@ void EfficiencyStudy::countDuplicatedAfterCuts() LOGP(info, "--------------- countDuplicatedAfterCuts"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1019,7 +1019,7 @@ void EfficiencyStudy::studyDCAcutsMC() // if not, keep it as a fake match -> increase the fake match counter // the efficiency of each one will be match counter / total of the duplicated clusters o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -1346,7 +1346,7 @@ void EfficiencyStudy::studyClusterSelectionMC() } o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2176,7 +2176,7 @@ void EfficiencyStudy::getEfficiency(bool isMC) LOGP(info, "getEfficiency()"); o2::base::Propagator::MatCorrType matCorr = o2::base::Propagator::MatCorrType::USEMatCorrLUT; - o2::gpu::gpustd::array clusOriginalDCA, clusDuplicatedDCA; + std::array clusOriginalDCA, clusDuplicatedDCA; auto propagator = o2::base::Propagator::Instance(); unsigned int rofIndexTrack = 0; @@ -2860,4 +2860,4 @@ DataProcessorSpec getEfficiencyStudy(mask_t srcTracksMask, mask_t srcClustersMas Options{}}; } -} // namespace o2::its::study \ No newline at end of file +} // namespace o2::its::study diff --git a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx index 5ca1bf2bd5c8f..c0aaabddaca1b 100644 --- a/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx +++ b/Detectors/ITSMFT/ITS/postprocessing/studies/src/ImpactParameter.cxx @@ -358,7 +358,7 @@ void ImpactParameterStudy::process(o2::globaltracking::RecoContainer& recoData) auto trueID = trueVec_globID_contr[it]; const o2::track::TrackParCov& trc = recoData.getTrackParam(trueID); auto pt = trc.getPt(); - o2::gpu::gpustd::array dcaInfo{-999., -999.}; + std::array dcaInfo{-999., -999.}; // LOGP(info, " ---> Bz={}", o2::base::Propagator::Instance()->getNominalBz()); o2::track::TrackPar trcTmp{trc}; if (o2::base::Propagator::Instance()->propagateToDCABxByBz({Pvtx_refitted.getX(), Pvtx_refitted.getY(), Pvtx_refitted.getZ()}, trcTmp, 2.f, matCorr, &dcaInfo)) { diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h index 6ae042d081688..059b1cdc29082 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h +++ b/Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexingKernels.h @@ -22,8 +22,6 @@ #include "ITStrackingGPU/VertexerTraitsGPU.h" #include "ITStrackingGPU/TracerGPU.h" -#include "GPUCommonArray.h" - namespace o2::its::gpu { #ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler @@ -56,4 +54,4 @@ void trackletFinderHandler(const Cluster* clustersNextLayer, // 0 2 const float phiCut, const size_t maxTrackletsPerCluster = 1e2); } // namespace o2::its::gpu -#endif \ No newline at end of file +#endif diff --git a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu index 3aab0624ef556..acbd77585df37 100644 --- a/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu +++ b/Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexingKernels.cu @@ -564,7 +564,7 @@ GPUg() void computeVertexKernel( histZ[iBin] = 0; } if (sumWZ > minContributors || vertIndex == 0) { - new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), o2::gpu::gpustd::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; + new (vertices + vertIndex) Vertex{o2::math_utils::Point3D(beamPosition[0], beamPosition[1], wZ / sumWZ), std::array{ex, 0, ey, 0, 0, ez}, static_cast(sumWZ), 0}; } else { new (vertices + vertIndex) Vertex{}; } @@ -577,4 +577,4 @@ GPUg() void computeVertexKernel( */ } // namespace gpu } // namespace its -} // namespace o2 \ No newline at end of file +} // namespace o2 diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h index 0f136edfebfb3..2bf1316470316 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Cluster.h @@ -16,15 +16,14 @@ #ifndef TRACKINGITSU_INCLUDE_CACLUSTER_H_ #define TRACKINGITSU_INCLUDE_CACLUSTER_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#endif - #include "GPUCommonRtypes.h" -#include "GPUCommonArray.h" #include "ITStracking/Definitions.h" #include "ITStracking/MathUtils.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif + namespace o2 { namespace its @@ -61,15 +60,15 @@ GPUhdi() void Cluster::print() const struct TrackingFrameInfo { TrackingFrameInfo() = default; - TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, o2::gpu::gpustd::array&& covTF); + TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, std::array&& covTF); float xCoordinate; float yCoordinate; float zCoordinate; float xTrackingFrame; float alphaTrackingFrame; - o2::gpu::gpustd::array positionTrackingFrame = {-1., -1.}; - o2::gpu::gpustd::array covarianceTrackingFrame = {999., 999., 999.}; + std::array positionTrackingFrame = {-1., -1.}; + std::array covarianceTrackingFrame = {999., 999., 999.}; GPUdi() void print() const { #if !defined(GPUCA_GPUCODE_DEVICE) || (!defined(__OPENCL__) && defined(GPUCA_GPU_DEBUG_PRINT)) diff --git a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h index da02149fbc432..ec075b0f10d04 100644 --- a/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h +++ b/Detectors/ITSMFT/ITS/tracking/include/ITStracking/Constants.h @@ -16,17 +16,17 @@ #ifndef TRACKINGITSU_INCLUDE_CONSTANTS_H_ #define TRACKINGITSU_INCLUDE_CONSTANTS_H_ -#ifndef GPUCA_GPUCODE_DEVICE -#include -#include -#endif - #include "ITStracking/Definitions.h" #include "CommonConstants/MathConstants.h" #include "GPUCommonMath.h" #include "GPUCommonDef.h" -#include "GPUCommonArray.h" + +#ifndef GPUCA_GPUCODE_DEVICE +#include +#include +#include +#endif namespace o2 { @@ -54,9 +54,9 @@ constexpr int ClustersPerCell{3}; constexpr int UnusedIndex{-1}; constexpr float Resolution{0.0005f}; -GPUhdi() constexpr o2::gpu::gpustd::array VertexerHistogramVolume() +GPUhdi() constexpr std::array VertexerHistogramVolume() { - return o2::gpu::gpustd::array{{1.98, 1.98, 40.f}}; + return std::array{{1.98, 1.98, 40.f}}; } } // namespace its @@ -66,24 +66,24 @@ constexpr int LayersNumber{7}; constexpr int TrackletsPerRoad{LayersNumber - 1}; constexpr int CellsPerRoad{LayersNumber - 2}; -GPUhdi() constexpr o2::gpu::gpustd::array LayersZCoordinate() +GPUhdi() constexpr std::array LayersZCoordinate() { constexpr double s = 1.; // safety margin - return o2::gpu::gpustd::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; + return std::array{16.333f + s, 16.333f + s, 16.333f + s, 42.140f + s, 42.140f + s, 73.745f + s, 73.745f + s}; } -GPUhdi() constexpr o2::gpu::gpustd::array LayersRCoordinate() +GPUhdi() constexpr std::array LayersRCoordinate() { - return o2::gpu::gpustd::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; + return std::array{{2.33959f, 3.14076f, 3.91924f, 19.6213f, 24.5597f, 34.388f, 39.3329f}}; } constexpr int ZBins{256}; constexpr int PhiBins{128}; constexpr float InversePhiBinSize{PhiBins / constants::math::TwoPi}; -GPUhdi() constexpr o2::gpu::gpustd::array InverseZBinSize() +GPUhdi() constexpr std::array InverseZBinSize() { constexpr auto zSize = LayersZCoordinate(); - return o2::gpu::gpustd::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), + return std::array{0.5f * ZBins / (zSize[0]), 0.5f * ZBins / (zSize[1]), 0.5f * ZBins / (zSize[2]), 0.5f * ZBins / (zSize[3]), 0.5f * ZBins / (zSize[4]), 0.5f * ZBins / (zSize[5]), 0.5f * ZBins / (zSize[6])}; } diff --git a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx index 630ad9acf59d2..1557c636e2345 100644 --- a/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx +++ b/Detectors/ITSMFT/ITS/tracking/src/Cluster.cxx @@ -17,7 +17,9 @@ #include "ITStracking/MathUtils.h" #include "ITStracking/IndexTableUtils.h" -#include "GPUCommonArray.h" +#ifndef GPUCA_GPUCODE_DEVICE +#include +#endif namespace o2 { @@ -92,8 +94,8 @@ bool Cluster::operator==(const Cluster& rhs) const this->indexTableBinIndex == rhs.indexTableBinIndex; } -TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, o2::gpu::gpustd::array&& posTF, - o2::gpu::gpustd::array&& covTF) +TrackingFrameInfo::TrackingFrameInfo(float x, float y, float z, float xTF, float alpha, std::array&& posTF, + std::array&& covTF) : xCoordinate{x}, yCoordinate{y}, zCoordinate{z}, xTrackingFrame{xTF}, alphaTrackingFrame{alpha}, positionTrackingFrame{posTF}, covarianceTrackingFrame{covTF} { // Nothing to do diff --git a/Detectors/TPC/qc/src/Tracks.cxx b/Detectors/TPC/qc/src/Tracks.cxx index 8e6f0d702df1b..5f29e80c89d2e 100644 --- a/Detectors/TPC/qc/src/Tracks.cxx +++ b/Detectors/TPC/qc/src/Tracks.cxx @@ -13,6 +13,7 @@ #include #include +#include // root includes #include "TFile.h" @@ -21,7 +22,6 @@ // o2 includes #include "DataFormatsTPC/TrackTPC.h" #include "DataFormatsTPC/dEdxInfo.h" -#include "GPUCommonArray.h" #include "DetectorsBase/Propagator.h" #include "TPCQC/Tracks.h" #include "TPCQC/Helpers.h" @@ -179,7 +179,7 @@ bool Tracks::processTrack(const o2::tpc::TrackTPC& track) if (propagator->getMatLUT() && propagator->hasMagFieldSet()) { // ---| fill DCA histos |--- - o2::gpu::gpustd::array dca; + std::array dca; o2::track::TrackPar propTrack(track); if (propagator->propagateToDCABxByBz(mPositionOfPV, propTrack, 2.f, o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca)) { const auto phi = o2::math_utils::to02PiGen(track.getPhi()); @@ -348,4 +348,4 @@ void Tracks::dumpToFile(std::string_view filename) arr.Write(arr.GetName(), TObject::kSingleKey); } f->Close(); -} \ No newline at end of file +} diff --git a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx index 1329dea236b1f..33b9039298264 100644 --- a/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx +++ b/Detectors/TPC/workflow/src/MIPTrackFilterSpec.cxx @@ -178,7 +178,7 @@ bool MIPTrackFilterDevice::acceptDCA(const TrackTPC& track) } auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCRefitter.cxx b/Detectors/TPC/workflow/src/TPCRefitter.cxx index 3ebe32d12ddb8..b2e41c8e808da 100644 --- a/Detectors/TPC/workflow/src/TPCRefitter.cxx +++ b/Detectors/TPC/workflow/src/TPCRefitter.cxx @@ -421,7 +421,7 @@ void TPCRefitterSpec::finaliseCCDB(ConcreteDataMatcher& matcher, void* obj) bool TPCRefitterSpec::getDCAs(const o2::track::TrackPar& track, float& dcar, float& dcaz) { auto propagator = o2::base::Propagator::Instance(); - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; o2::track::TrackPar propTrack(track); const auto ok = propagator->propagateToDCABxByBz(refPoint, propTrack, 2., o2::base::Propagator::MatCorrType::USEMatCorrLUT, &dca); diff --git a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx index 4d20654d07c83..a9f1e7d71da8e 100644 --- a/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx +++ b/Detectors/TPC/workflow/src/TPCTimeSeriesSpec.cxx @@ -1143,7 +1143,7 @@ class TPCTimeSeries : public Task auto propagator = o2::base::Propagator::Instance(); // propagate track to DCA - o2::gpu::gpustd::array dca; + std::array dca; const o2::math_utils::Point3D refPoint{0, 0, 0}; // coarse propagation @@ -1252,7 +1252,7 @@ class TPCTimeSeries : public Task // make propagation for ITS-TPC Track // check if the track was assigned to ITS track - o2::gpu::gpustd::array dcaITSTPC{0, 0}; + std::array dcaITSTPC{0, 0}; float deltaP0 = -999; float deltaP1 = -999; float deltaP2 = -999; @@ -1270,7 +1270,7 @@ class TPCTimeSeries : public Task // store TPC only DCAs // propagate to vertex in case the track belongs to vertex const bool contributeToVertex = (idxITSTPC.back() != -1); - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (contributeToVertex) { if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { @@ -1279,7 +1279,7 @@ class TPCTimeSeries : public Task } // propagate TPC track to vertex - o2::gpu::gpustd::array dcaTPCTmp{-1, -1}; + std::array dcaTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), track, propagator->getNominalBz(), mFineStep, mMatType, &dcaTPCTmp)) { dcaTPCAtVertex = dcaTPCTmp[0]; } @@ -1401,7 +1401,7 @@ class TPCTimeSeries : public Task const bool contributeToVertex = (idxITSTPC.back() != -1); if (hasITSTPC && contributeToVertex) { o2::track::TrackParCov trackITSTPCTmp = tracksITSTPC[idxITSTPC.front()]; - o2::gpu::gpustd::array dcaITSTPCTmp{-1, -1}; + std::array dcaITSTPCTmp{-1, -1}; if (propagator->propagateToDCA(vertex.getXYZ(), trackITSTPCTmp, propagator->getNominalBz(), mFineStep, mMatType, &dcaITSTPCTmp)) { o2::track::TrackParCov trackTPC = tracksTPC[iTrk]; if (trackTPC.rotate(trackITSTPCTmp.getAlpha()) && propagator->propagateTo(trackTPC, trackITSTPCTmp.getX(), false, mMaxSnp, mFineStep, mMatType)) { diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index c9babbf5548b4..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -15,12 +15,14 @@ #ifndef GPUCOMMONARRAY_H #define GPUCOMMONARRAY_H -#ifndef GPUCA_GPUCODE_DEVICE +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonArray once OpenCL supports +#ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif +#else #include "GPUCommonDef.h" -namespace o2::gpu::gpustd +namespace std { #ifdef GPUCA_GPUCODE_DEVICE template @@ -43,5 +45,7 @@ GPUd() array(T, E...)->array; template using array = std::array; #endif -} // namespace o2::gpu::gpustd -#endif \ No newline at end of file +} // namespace std +#endif + +#endif // GPUCOMMONARRAY_H diff --git a/GPU/Common/GPUCommonTypeTraits.h b/GPU/Common/GPUCommonTypeTraits.h index f837019c11875..a51a4ac50683f 100644 --- a/GPU/Common/GPUCommonTypeTraits.h +++ b/GPU/Common/GPUCommonTypeTraits.h @@ -17,12 +17,11 @@ #include "GPUCommonDef.h" -#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) +#if !defined(GPUCA_GPUCODE_DEVICE) || defined(__CUDACC__) || defined(__HIPCC__) // TODO: Get rid of GPUCommonTypeTraits once OpenCL supports #ifndef GPUCA_GPUCODE_COMPILEKERNELS #include #endif #else -// We just reimplement some type traits in std for the GPU // TODO: Check if meanwhile we can get rid of GPUCommonTypeTraits and GPUCommonArray, and just use the std headers. namespace std { template diff --git a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h index 6598085d309c7..08f9be7d98380 100644 --- a/GPU/GPUTracking/Base/GPUStdSystemHeaders.h +++ b/GPU/GPUTracking/Base/GPUStdSystemHeaders.h @@ -21,5 +21,6 @@ #include #include #include +#include #endif diff --git a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl index 3f58c0fea75e9..ea74c43703597 100644 --- a/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl +++ b/GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cl @@ -72,6 +72,7 @@ typedef signed char int8_t; #include "GPUCommonDef.h" #include "GPUCommonTypeTraits.h" // TODO: Once possible in OpenCL, should use GPUStdSystemHeaders.h here +#include "GPUCommonArray.h" // TODO: Same #include "GPUConstantMem.h" #include "GPUReconstructionIncludesDeviceAll.h" diff --git a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx index b74c1bb6a4534..7c7ce8d07ffb9 100644 --- a/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx +++ b/GPU/GPUTracking/Refit/GPUTrackingRefit.cxx @@ -387,8 +387,8 @@ GPUd() int32_t GPUTrackingRefit::RefitTrack(T& trkX, bool outward, bool resetCov TrackParCovChi2 = 0.f; } CADEBUG(printf("\t%21sPropaga Alpha %8.3f , X %8.3f - Y %8.3f, Z %8.3f - QPt %7.2f (%7.2f), SP %5.2f (%5.2f) --- Res %8.3f %8.3f --- Cov sY %8.3f sZ %8.3f sSP %8.3f sPt %8.3f - YPt %8.3f\n", "", trk.getAlpha(), x, trk.getParams()[0], trk.getParams()[1], trk.getParams()[4], trk.getParams()[4], trk.getParams()[2], trk.getParams()[2], trk.getParams()[0] - y, trk.getParams()[1] - z, sqrtf(trk.getCov()[0]), sqrtf(trk.getCov()[2]), sqrtf(trk.getCov()[5]), sqrtf(trk.getCov()[14]), trk.getCov()[10])); - gpu::gpustd::array p = {y, z}; - gpu::gpustd::array c = {0, 0, 0}; + std::array p = {y, z}; + std::array c = {0, 0, 0}; GPUTPCGMPropagator::GetErr2(c[0], c[2], *mPparam, getPar(trk)[2], getPar(trk)[3], z, x, y, currentRow, clusterState, sector, time, invAvgCharge, invCharge, false); TrackParCovChi2 += trk.getPredictedChi2(p, c); if (!trk.update(p, c)) { diff --git a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h index f6b8bea29822a..d26d46495069b 100644 --- a/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h +++ b/GPU/GPUTracking/TRDTracking/GPUTRDInterfaces.h @@ -59,8 +59,8 @@ class propagatorInterface GPUdi() bool update(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->update(pTmp, covTmp); } else { return false; @@ -69,8 +69,8 @@ class propagatorInterface GPUdi() float getPredictedChi2(const float p[2], const float cov[3]) { if (mParam) { - gpustd::array pTmp = {p[0], p[1]}; - gpustd::array covTmp = {cov[0], cov[1], cov[2]}; + std::array pTmp = {p[0], p[1]}; + std::array covTmp = {cov[0], cov[1], cov[2]}; return mParam->getPredictedChi2(pTmp, covTmp); } else { return 99999.f; From a850e9eb3e6a634a1e87a70170c05ad6d8bce3af Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 15 May 2025 22:18:30 +0200 Subject: [PATCH 0519/1914] GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics --- .../ReconstructionDataFormats/TrackParametrization.h | 1 + GPU/Common/GPUCommonArray.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index f240e34861eeb..1d0a5f1a9f1fd 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,6 +29,7 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" +#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index e83ca8c4a69fc..fa86d7bb4a021 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,4 +48,10 @@ using array = std::array; } // namespace std #endif +namespace o2::gpu::gpustd +{ +template +using array = ::std::array; // temporary alias, to remove dependent types +} // o2::gpu::gpustd + #endif // GPUCOMMONARRAY_H From b3c66e8a48c77a932a914a5adcb3562ebef3487a Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 16 May 2025 09:53:57 +0200 Subject: [PATCH 0520/1914] DPL: Drop obsolete API (#14279) The new plugin based mechanism does not need the bulk insertion anymore. --- .../Core/include/Framework/TableBuilder.h | 106 +----------------- .../Core/test/benchmark_TableBuilder.cxx | 33 ------ Framework/Core/test/test_TableBuilder.cxx | 34 ------ 3 files changed, 4 insertions(+), 169 deletions(-) diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 8d7601cefc634..0b35d5be083e4 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -42,12 +42,6 @@ class Table; class Array; } // namespace arrow -template -struct BulkInfo { - const T ptr; - size_t size; -}; - extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; extern template class arrow::NumericBuilder; @@ -200,34 +194,6 @@ struct BuilderUtils { } } - template - static arrow::Status bulkAppend(HolderType& holder, size_t bulkSize, const PTR ptr) - { - return holder.builder->AppendValues(ptr, bulkSize, nullptr); - } - - template - static arrow::Status bulkAppendChunked(HolderType& holder, BulkInfo info) - { - // Appending nullptr is a no-op. - if (info.ptr == nullptr) { - return arrow::Status::OK(); - } - if constexpr (std::is_same_v>) { - if (appendToList>(holder.builder, info.ptr, info.size).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } else { - if (holder.builder->AppendValues(info.ptr, info.size, nullptr).ok() == false) { - throw runtime_error("Unable to append to column"); - } else { - return arrow::Status::OK(); - } - } - } - template static arrow::Status append(HolderType& holder, std::pair ip) { @@ -518,14 +484,6 @@ struct TableBuilderHelpers { return {BuilderTraits::make_datatype()...}; } - template - static std::vector> makeFields(std::array const& names) - { - char const* const* names_ptr = names.data(); - return { - std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...}; - } - /// Invokes the append method for each entry in the tuple template static bool append(std::tuple& holders, VALUES&& values) @@ -542,19 +500,6 @@ struct TableBuilderHelpers { (BuilderUtils::unsafeAppend(std::get(holders), std::get(values)), ...); } - template - static bool bulkAppend(std::tuple& holders, size_t bulkSize, PTRS ptrs) - { - return (BuilderUtils::bulkAppend(std::get(holders), bulkSize, std::get(ptrs)).ok() && ...); - } - - /// Return true if all columns are done. - template - static bool bulkAppendChunked(std::tuple& holders, INFOS infos) - { - return (BuilderUtils::bulkAppendChunked(std::get(holders), std::get(infos)).ok() && ...); - } - /// Invokes the append method for each entry in the tuple template static bool finalize(std::vector>& arrays, std::tuple& holders) @@ -575,15 +520,9 @@ constexpr auto tuple_to_pack(std::tuple&&) return framework::pack{}; } -template -concept BulkInsertable = (std::integral> && !std::same_as>); - template struct InsertionTrait { - static consteval DirectInsertion policy() - requires(!BulkInsertable); - static consteval CachedInsertion policy() - requires(BulkInsertable); + static consteval DirectInsertion policy(); using Policy = decltype(policy()); }; @@ -658,7 +597,9 @@ class TableBuilder template auto makeBuilders(std::array const& columnNames, size_t nRows) { - mSchema = std::make_shared(TableBuilderHelpers::makeFields(columnNames)); + char const* const* names_ptr = columnNames.data(); + mSchema = std::make_shared( + std::vector>({std::make_shared(*names_ptr++, BuilderMaker::make_datatype(), true, nullptr)...})); mHolders = makeHolders(mMemoryPool, nRows); mFinalizer = [](std::vector>& arrays, void* holders) -> bool { @@ -768,45 +709,6 @@ class TableBuilder }(typename T::table_t::persistent_columns_t{}); } - template - auto preallocatedPersist(std::array const& columnNames, int nRows) - { - constexpr size_t nColumns = NCOLUMNS; - validate(); - mArrays.resize(nColumns); - makeBuilders(columnNames, nRows); - - // Callback used to fill the builders - return [holders = mHolders](unsigned int /*slot*/, typename BuilderMaker::FillType... args) -> void { - TableBuilderHelpers::unsafeAppend(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersist(std::array const& columnNames, size_t nRows) - { - validate(); - // Should not be called more than once - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, size_t batchSize, typename BuilderMaker::FillType const*... args) -> void { - TableBuilderHelpers::bulkAppend(*(HoldersTupleIndexed*)holders, batchSize, std::forward_as_tuple(args...)); - }; - } - - template - auto bulkPersistChunked(std::array const& columnNames, size_t nRows) - { - validate(); - mArrays.resize(NCOLUMNS); - makeBuilders(columnNames, nRows); - - return [holders = mHolders](unsigned int /*slot*/, BulkInfo::STLValueType const*>... args) -> bool { - return TableBuilderHelpers::bulkAppendChunked(*(HoldersTupleIndexed*)holders, std::forward_as_tuple(args...)); - }; - } - /// Reserve method to expand the columns as needed. template auto reserveArrays(std::tuple& holders, int s) diff --git a/Framework/Core/test/benchmark_TableBuilder.cxx b/Framework/Core/test/benchmark_TableBuilder.cxx index 59d1450e895bd..5b9dee866c8a3 100644 --- a/Framework/Core/test/benchmark_TableBuilder.cxx +++ b/Framework/Core/test/benchmark_TableBuilder.cxx @@ -62,39 +62,6 @@ static void BM_TableBuilderScalarReserved(benchmark::State& state) BENCHMARK(BM_TableBuilderScalarReserved)->Arg(1 << 21); BENCHMARK(BM_TableBuilderScalarReserved)->Range(8, 8 << 16); -static void BM_TableBuilderScalarPresized(benchmark::State& state) -{ - using namespace o2::framework; - for (auto _ : state) { - TableBuilder builder; - auto rowWriter = builder.preallocatedPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0); ++i) { - rowWriter(0, 0.f); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarPresized)->Arg(1 << 20); -BENCHMARK(BM_TableBuilderScalarPresized)->Range(8, 8 << 16); - -static void BM_TableBuilderScalarBulk(benchmark::State& state) -{ - using namespace o2::framework; - auto chunkSize = state.range(0) / 256; - std::vector buffer(chunkSize, 0.); // We assume data is chunked in blocks 256th of the total size - for (auto _ : state) { - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x"}, state.range(0)); - for (auto i = 0; i < state.range(0) / chunkSize; ++i) { - bulkWriter(0, chunkSize, buffer.data()); - } - auto table = builder.finalize(); - } -} - -BENCHMARK(BM_TableBuilderScalarBulk)->Range(256, 1 << 20); - static void BM_TableBuilderSimple(benchmark::State& state) { using namespace o2::framework; diff --git a/Framework/Core/test/test_TableBuilder.cxx b/Framework/Core/test/test_TableBuilder.cxx index b08fee5ad4e6a..00cbbbc59b725 100644 --- a/Framework/Core/test/test_TableBuilder.cxx +++ b/Framework/Core/test/test_TableBuilder.cxx @@ -162,30 +162,6 @@ TEST_CASE("TestTableBuilderStruct") } } -TEST_CASE("TestTableBuilderBulk") -{ - using namespace o2::framework; - TableBuilder builder; - auto bulkWriter = builder.bulkPersist({"x", "y"}, 10); - int x[] = {0, 1, 2, 3, 4, 5, 6, 7}; - int y[] = {0, 1, 2, 3, 4, 5, 6, 7}; - - bulkWriter(0, 8, x, y); - - auto table = builder.finalize(); - REQUIRE(table->num_columns() == 2); - REQUIRE(table->num_rows() == 8); - REQUIRE(table->schema()->field(0)->name() == "x"); - REQUIRE(table->schema()->field(1)->name() == "y"); - REQUIRE(table->schema()->field(0)->type()->id() == arrow::int32()->id()); - REQUIRE(table->schema()->field(1)->type()->id() == arrow::int32()->id()); - - for (int64_t i = 0; i < 8; ++i) { - auto p = std::dynamic_pointer_cast>(table->column(0)->chunk(0)); - REQUIRE(p->Value(i) == i); - } -} - TEST_CASE("TestTableBuilderMore") { using namespace o2::framework; @@ -288,13 +264,3 @@ TEST_CASE("TestColumnCount") int count2 = TableBuilder::countColumns(); REQUIRE(count2 == 3); } - -TEST_CASE("TestMakeFields") -{ - auto fields = TableBuilderHelpers::makeFields({"i", "f"}); - REQUIRE(fields.size() == 2); - REQUIRE(fields[0]->name() == "i"); - REQUIRE(fields[1]->name() == "f"); - REQUIRE(fields[0]->type()->name() == "int32"); - REQUIRE(fields[1]->type()->name() == "float"); -} From d89ef683194da14326ccc2915810cf402d6921d7 Mon Sep 17 00:00:00 2001 From: shahoian Date: Thu, 15 May 2025 23:29:48 +0200 Subject: [PATCH 0521/1914] Promote ITS/MFT wrong orbit/ROF count messages to critical --- Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx | 2 +- Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx index a9ed2748ec004..187a1bc114ee7 100644 --- a/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx +++ b/Detectors/ITSMFT/common/reconstruction/src/RUDecodeData.cxx @@ -125,7 +125,7 @@ bool RUDecodeData::checkLinkInSync(int icab, const o2::InteractionRecord ir) link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]++; linkHBFToDump[(uint64_t(link->subSpec) << 32) + link->hbfEntry] = link->irHBF.orbit; if (link->needToPrintError(link->statistics.errorCounts[GBTLinkDecodingStat::ErrOldROF]) && !ROFRampUpStage) { - LOGP(error, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), + LOGP(critical, "{} (cable {}) has IR={} for current majority IR={} -> {}", link->describe(), cableHWID[icab], link->ir.asString(), ir.asString(), link->statistics.ErrNames[GBTLinkDecodingStat::ErrOldROF]); } #endif diff --git a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx index 7042cb7433ac5..da1af34376ff1 100644 --- a/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx +++ b/Detectors/ITSMFT/common/workflow/src/STFDecoderSpec.cxx @@ -202,7 +202,7 @@ void STFDecoder::run(ProcessingContext& pc) if ((expectedTFSize != nTriggersProcessed) && mROFErrRepIntervalMS > 0 && mTFCounter > 1 && nTriggersProcessed > 0) { long currTS = std::chrono::time_point_cast(std::chrono::system_clock::now()).time_since_epoch().count(); if (currTS - lastErrReportTS > mROFErrRepIntervalMS) { - LOGP(error, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); + LOGP(critical, "Inconsistent number of ROF per TF. From parameters: {} from readout: {} (muting further reporting for {} ms)", expectedTFSize, nTriggersProcessed, mROFErrRepIntervalMS); lastErrReportTS = currTS; } } From 5b5f1d96e60af52e9bf70796b23e1a4f5f0c544c Mon Sep 17 00:00:00 2001 From: Marco Giacalone Date: Fri, 16 May 2025 16:09:42 +0200 Subject: [PATCH 0522/1914] Include/adapt QED for special runs (#14242) * Include QED for special runs --- Generators/include/Generators/QEDGenParam.h | 11 ++++++----- Generators/share/external/QEDepem.C | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Generators/include/Generators/QEDGenParam.h b/Generators/include/Generators/QEDGenParam.h index 1c78b14cfc516..0a79f113fc4ac 100644 --- a/Generators/include/Generators/QEDGenParam.h +++ b/Generators/include/Generators/QEDGenParam.h @@ -24,11 +24,12 @@ namespace o2 namespace eventgen { struct QEDGenParam : public o2::conf::ConfigurableParamHelper { - - float yMin = -6.f; ///< min Y - float yMax = 6.f; ///< max Y - float ptMin = 0.4e-3; ///< min pT - float ptMax = 10.f; ///< min pT + float yMin = -6.f; ///< min Y + float yMax = 6.f; ///< max Y + float ptMin = 0.4e-3; ///< min pT + float ptMax = 10.f; ///< min pT + float cmEnergy = 5360.f; ///< center of mass energy per nucleon pair in GeV + float Z = 82.f; ///< atomic number of the projectile/target (only symmetric systems are compatible for now) // float xSectionQED = -1; ///< estimated QED x-section in barns float xSectionHad = 8.; ///< reference hadronic x-section for the same system diff --git a/Generators/share/external/QEDepem.C b/Generators/share/external/QEDepem.C index 1e464ec69be00..d9103d1476df0 100644 --- a/Generators/share/external/QEDepem.C +++ b/Generators/share/external/QEDepem.C @@ -35,6 +35,8 @@ o2::eventgen::GeneratorTGenerator* QEDepem() genBg->SetPtRange(qedParam.ptMin, qedParam.ptMax); // Set pt limits (GeV) for e+-: 1MeV corresponds to max R=13.3mm at 5kGaus genBg->SetOrigin(diamond.position[0], diamond.position[1], diamond.position[2]); // vertex position in space genBg->SetSigma(diamond.width[0], diamond.width[1], diamond.width[2]); // vertex sigma + genBg->SetCMEnergy(qedParam.cmEnergy); // center of mass energy per nucleon pair in GeV + genBg->SetZ(qedParam.Z); // atomic number of the projectile/target (only symmetric systems are compatible for now) genBg->SetTimeOrigin(0.); // vertex position in time initialized = genBg->Init(); if (!initialized) { From 64315adcaddfc72a231bc8c15427b03d6f239e87 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Fri, 16 May 2025 15:43:44 +0200 Subject: [PATCH 0523/1914] GPU: Check that we do not optimize RTC when we have 2 different GPUReconstruction instances --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 9 ++++++++- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index 3ef995b9f9561..c76bf11c3e25d 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -347,15 +347,22 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->nTPCClustererLanes = GPUCA_NSECTORS; } + if (GetProcessingSettings().doublePipeline) { + mProcessingSettings->rtctech.allowOptimizedSlaveReconstruction = true; + } if (GetProcessingSettings().doublePipeline && (mChains.size() != 1 || mChains[0]->SupportsDoublePipeline() == false || !IsGPU() || GetProcessingSettings().memoryAllocationStrategy != GPUMemoryResource::ALLOCATION_GLOBAL)) { GPUError("Must use double pipeline mode only with exactly one chain that must support it"); return 1; } - if (mMaster == nullptr && GetProcessingSettings().doublePipeline) { mPipelineContext.reset(new GPUReconstructionPipelineContext); } + if (mMaster && GetProcessingSettings().rtc.enable && (GetProcessingSettings().rtc.optConstexpr || GetProcessingSettings().rtc.optSpecialCode) && !GetProcessingSettings().rtctech.allowOptimizedSlaveReconstruction) { + GPUError("Not allowed to create optimized RTC code with more than one GPUReconstruction instances"); + return 1; + } + mDeviceMemorySize = mHostMemorySize = 0; for (uint32_t i = 0; i < mChains.size(); i++) { if (mChains[i]->EarlyConfigure()) { diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 9400a429fca81..b9be1db881816 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -228,6 +228,7 @@ AddOption(runTest, int32_t, 0, "", 0, "Do not run the actual benchmark, but just AddOption(cacheMutex, bool, true, "", 0, "Use a file lock to serialize access to the cache folder") AddOption(ignoreCacheValid, bool, false, "", 0, "If set, allows to use RTC cached code files even if they are not valid for the current source code / parameters") AddOption(printLaunchBounds, bool, false, "", 0, "Print launch bounds used for RTC code as debugging option") +AddOption(allowOptimizedSlaveReconstruction, bool, false, "", 0, "Allow RTC with slave GPUReconstruction instances with optConstexpr and optSpecialcode") AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored") AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache From dc60e3535f333042ef6dcb665cf4c3cfb5d95cfa Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 13:22:19 +0200 Subject: [PATCH 0524/1914] By default process K0s only in the sync. svertexer To rever to the full mode use define SECVTXK0ONLY=0 --- .../include/DetectorsVertexing/SVertexHypothesis.h | 4 ++-- prodtests/full-system-test/dpl-workflow.sh | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h index 1450e0c15e98c..c3fd74aa7eeff 100644 --- a/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h +++ b/Detectors/Vertexing/include/DetectorsVertexing/SVertexHypothesis.h @@ -60,7 +60,7 @@ class SVertexHypothesis bool check(float p2Pos, float p2Neg, float p2V0, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2V0), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2V0), ptV0); } bool check(float mass, float pt) const { // check if given mass and pt is matching to hypothesis @@ -151,7 +151,7 @@ class SVertex3Hypothesis bool check(float p2Pos, float p2Neg, float p2Bach, float p2Tot, float ptV0) const { // check if given mass and pt is matching to hypothesis - return check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); + return mPars[SigmaM] > 0 && check(calcMass(p2Pos, p2Neg, p2Bach, p2Tot), ptV0); } bool check(float mass, float pt) const diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index b51594115154c..4e6cbbebe7db7 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -94,6 +94,7 @@ TPC_CORR_OPT= TPC_CORR_KEY= INTERACTION_TAG_CONFIG_KEY= EVE_OPT=" --jsons-folder $EDJSONS_DIR" +: ${SECVTXK0ONLY:=} : ${EVE_CONFIG:=} : ${STRTRACKING:=} : ${ITSEXTRAERR:=} @@ -591,7 +592,14 @@ has_detector_reco ITS && has_detector_gpu ITS TPC && [[ -z "$DISABLE_ROOT_OUTPUT has_detector_matching PRIMVTX && [[ ! -z "$VERTEXING_SOURCES" ]] && [[ $GLOBAL_READER_NEEDS_PV != 1 ]] && add_W o2-primary-vertexing-workflow "$DISABLE_MC $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $PVERTEX_CONFIG --pipeline $(get_N primary-vertexing MATCH REST 1 PRIMVTX),$(get_N pvertex-track-matching MATCH REST 1 PRIMVTXMATCH)" "${PVERTEXING_CONFIG_KEY};${INTERACTION_TAG_CONFIG_KEY};" if [[ $BEAMTYPE != "cosmic" ]] && has_detectors_reco ITS && has_detector_matching SECVTX && [[ ! -z "$SVERTEXING_SOURCES" ]]; then - [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY" + : ${REDUCESV_OPT:=} + : ${REDUCESV_CONF:=} + if [[ $SYNCMODE == 1 ]] && [[ $SECVTXK0ONLY != 0 ]] ; then + : ${STRTRACKING:=" --disable-strangeness-tracker "} + : ${REDUCESV_OPT:=" --disable-cascade-finder --disable-3body-finder "} + : ${REDUCESV_CONF:="svertexer.pidCutsPhoton[0]=-1;svertexer.pidCutsLambda[0]=-1;svertexer.pidCutsHTriton[0]=-1;svertexer.pidCutsHhydrog4[0]=-1;"} + fi + [[ $GLOBAL_READER_NEEDS_SV != 1 ]] && add_W o2-secondary-vertexing-workflow "$DISABLE_MC $STRTRACKING $REDUCESV_OPT $DISABLE_ROOT_INPUT $DISABLE_ROOT_OUTPUT $TPC_CORR_OPT --vertexing-sources $SVERTEXING_SOURCES --threads $SVERTEX_THREADS --pipeline $(get_N secondary-vertexing MATCH REST $SVERTEX_THREADS SECVTX)" "$TPC_CORR_KEY;$REDUCESV_CONF" SECTVTX_ON="1" else SECTVTX_ON="0" From 966097b751dced62af09972912f8711dac66b7e4 Mon Sep 17 00:00:00 2001 From: shahoian Date: Fri, 16 May 2025 17:37:22 +0200 Subject: [PATCH 0525/1914] Possibility to define AlignParam as Local Delta The constructors got extra argument convertLocalToGlobal (by default true) which tells that if the provided parameters correspond to the local delta (i.e. argument global == false), then it should be converted to global delta. Otherwise, the delta being local or global is fully defined by the value of the global argument. The AlignParam::applyToGeometry() accounts for the type of persistent type of delta and avoids extra global -> local conversion if mIsGlobalDelta is false. --- .../DetectorsCommonDataFormats/AlignParam.h | 17 +++++-- .../Detectors/Common/src/AlignParam.cxx | 46 +++++++++++-------- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h index c4e702c6ae27e..a93a37c1e36ab 100644 --- a/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h +++ b/DataFormats/Detectors/Common/include/DetectorsCommonDataFormats/AlignParam.h @@ -37,9 +37,12 @@ class AlignParam AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global = true); // global (preferable) or local delta definition + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global - AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global = true); + AlignParam(const char* symname, int algID, TGeoMatrix& m, + bool global = true, // global (preferable) or local delta definition + bool convertLocalToGlobal = true); // if local is provided, convert it to global /// return symbolic name of the volume const std::string& getSymName() const { return mSymName; } @@ -70,6 +73,9 @@ class AlignParam void setAlignableID(int id) { mAlignableID = id; } /// ================ methods for direct setting of delta params + /// set parameters + void setParams(double x, double y, double z, double psi, double theta, double phi); + /// set parameters of global delta void setGlobalParams(double x, double y, double z, double psi, double theta, double phi); @@ -114,6 +120,9 @@ class AlignParam int rectify(double zero = 1e-13); + bool isGlobal() const { return mIsGlobal; } + void setIsGlobal(bool v) { mIsGlobal = v; } + protected: bool matrixToAngles(const double* rot, double& psi, double& theta, double& phi) const; void anglesToMatrix(double psi, double theta, double phi, double* rot) const; @@ -123,8 +132,8 @@ class AlignParam private: std::string mSymName{}; + bool mIsGlobal = true; /// is this global delta? int mAlignableID = -1; /// alignable ID (set for sensors only) - double mX = 0.; ///< X translation of global delta double mY = 0.; ///< Y translation of global delta double mZ = 0.; ///< Z translation of global delta @@ -133,7 +142,7 @@ class AlignParam double mTheta = 0.; ///< "roll" : Euler angle of rotation around Y axis after 1st rotation (radians) double mPhi = 0.; ///< "yaw" : Euler angle of rotation around Z axis (radians) - ClassDefNV(AlignParam, 1); + ClassDefNV(AlignParam, 2); }; } // namespace detectors diff --git a/DataFormats/Detectors/Common/src/AlignParam.cxx b/DataFormats/Detectors/Common/src/AlignParam.cxx index 90f2a349607a1..f20cf3dac4971 100644 --- a/DataFormats/Detectors/Common/src/AlignParam.cxx +++ b/DataFormats/Detectors/Common/src/AlignParam.cxx @@ -26,8 +26,9 @@ using namespace o2::detectors; AlignParam::AlignParam(const char* symname, int algID, // volume symbolic name and its alignable ID double x, double y, double z, // delta translation double psi, double theta, double phi, // delta rotation - bool global) // global (preferable) or local delta definition - : mSymName(symname), mAlignableID(algID) + bool global, // global (preferable) or local delta definition + bool convertLocalToGlobal) // if local is provided, convert it to global + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { /// standard constructor with 3 translation + 3 rotation parameters /// If the user explicitly sets the global variable to false then the @@ -35,23 +36,22 @@ AlignParam::AlignParam(const char* symname, int algID, // volume symbolic /// This requires to have a gGeoMenager active instance, otherwise the /// constructor will fail (no object created) - if (global) { - setGlobalParams(x, y, z, psi, theta, phi); - } else { + setParams(x, y, z, psi, theta, phi); + if (!global && convertLocalToGlobal) { setLocalParams(x, y, z, psi, theta, phi); } } //___________________________________________________ -AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global) - : mSymName(symname), mAlignableID(algID) +AlignParam::AlignParam(const char* symname, int algID, TGeoMatrix& m, bool global, bool convertLocalToGlobal) + : mSymName(symname), mIsGlobal(global || convertLocalToGlobal), mAlignableID(algID) { setTranslation(m); if (!setRotation(m)) { const double* rot = m.GetRotationMatrix(); throw std::runtime_error(fmt::format("Failed to extract roll-pitch-yall angles from [[{},{},{}], [{},{},{}], [{},{},{}] for {}", rot[0], rot[1], rot[2], rot[3], rot[4], rot[5], rot[6], rot[7], rot[8], symname)); } - if (!global && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { + if (!global && convertLocalToGlobal && !setLocalParams(mX, mY, mZ, mPsi, mTheta, mPhi)) { throw std::runtime_error(fmt::format("Alignment creation for {} failed: geomManager is absent", symname)); } } @@ -223,6 +223,10 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const // In case that the TGeo was not initialized or not closed, // returns false and the object parameters are not set. // + m = createMatrix(); + if (!mIsGlobal) { + return true; + } if (!gGeoManager || !gGeoManager->IsClosed()) { LOG(error) << "Can't get the local alignment object parameters! gGeoManager doesn't exist or it is still open!"; return false; @@ -247,7 +251,6 @@ bool AlignParam::createLocalMatrix(TGeoHMatrix& m) const LOG(error) << "Volume name or path " << symname << " is not valid!"; return false; } - m = createMatrix(); TGeoHMatrix gprime, gprimeinv; gprime = *node->GetMatrix(); gprimeinv = gprime.Inverse(); @@ -302,18 +305,15 @@ bool AlignParam::applyToGeometry() const } // double threshold = 0.001; - - TGeoHMatrix gprime = *node->GetMatrix(); - TGeoHMatrix align = createMatrix(); - gprime.MultiplyLeft(&align); - TGeoHMatrix* ginv = new TGeoHMatrix; // TGeoPhysicalNode takes and manages raw pointer, need naked new! - TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); - *ginv = g->Inverse(); - *ginv *= gprime; - + TGeoHMatrix* align = new TGeoHMatrix(createMatrix()); + if (mIsGlobal) { + align->Multiply(node->GetMatrix()); + TGeoHMatrix* g = node->GetMatrix(node->GetLevel() - 1); + align->MultiplyLeft(node->GetMatrix(node->GetLevel() - 1)->Inverse()); + } LOG(debug) << "Aligning volume " << symname; - node->Align(ginv); + node->Align(align); return true; } @@ -359,6 +359,14 @@ void AlignParam::setGlobalParams(double x, double y, double z, double psi, doubl setRotation(psi, theta, phi); } +//_____________________________________________________________________________ +void AlignParam::setParams(double x, double y, double z, double psi, double theta, double phi) +{ + /// set parameters of global delta + setTranslation(x, y, z); + setRotation(psi, theta, phi); +} + //_____________________________________________________________________________ void AlignParam::setRotation(double psi, double theta, double phi) { From a6af3174a7011e8a0620b71a14fee134275f3815 Mon Sep 17 00:00:00 2001 From: swenzel Date: Fri, 16 May 2025 22:13:05 +0200 Subject: [PATCH 0526/1914] TPC init: load more params from CCDB for GEMAmpl --- Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx index a04f73a62fbf8..b111d8f372967 100644 --- a/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx +++ b/Steer/DigitizerWorkflow/src/SimpleDigitizerWorkflow.cxx @@ -309,11 +309,16 @@ void initTPC(long timestamp) auto& cdb = o2::tpc::CDBInterface::instance(); cdb.setUseDefaults(); - // IMPORTANT: load ParameterGEM from CCDB + // IMPORTANT: load ParameterGEM, ParameterGas and CalPadGainFull from CCDB to correctly init GEMAmplification auto& ccdbManager = o2::ccdb::BasicCCDBManager::instance(); ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGEM), timestamp); - LOGP(info, "initTPC: TPC GEM param updated for time {}", timestamp); + LOGP(info, "initTPC: TPC GEM param, Gas param + CalPadGainFull updated for time {}", timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::CalPadGainFull), timestamp); + ccdbManager.getSpecific(o2::tpc::CDBTypeMap.at(o2::tpc::CDBType::ParGas), timestamp); + o2::tpc::ParameterGEM::Instance().printKeyValues(true, true); + o2::tpc::ParameterGas::Instance().printKeyValues(true, true); + // by invoking this constructor we make sure that a common file will be created // in future we should take this from OCDB and just forward per message const static auto& ampl = o2::tpc::GEMAmplification::instance(); From 88baea9a23ba9f20083204b9ae46a63eaa19038b Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:23 +0200 Subject: [PATCH 0527/1914] DPL Analysis: `as(cfg)` for adjusting the types of configurables in expressions (#14276) --- .../Core/include/Framework/Expressions.h | 22 +++++++++++++------ Framework/Core/test/test_Expressions.cxx | 9 ++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 9e00388ee5df8..18c930700a91d 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -12,13 +12,11 @@ #define O2_FRAMEWORK_EXPRESSIONS_H_ #include "Framework/BasicOps.h" -#include "Framework/CompilerBuiltins.h" #include "Framework/Pack.h" #include "Framework/Configurable.h" #include "Framework/Variant.h" #include "Framework/InitContext.h" #include "Framework/ConfigParamRegistry.h" -#include "Framework/RuntimeError.h" #include #include #include @@ -143,13 +141,17 @@ struct OpNode { /// A placeholder node for simple type configurable struct PlaceholderNode : LiteralNode { template + requires(variant_trait_v::type> != VariantType::Unknown) PlaceholderNode(Configurable const& v) : LiteralNode{v.value}, name{v.name} { - if constexpr (variant_trait_v::type> != VariantType::Unknown) { - retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; - } else { - unknownParameterUsed(name.c_str()); - } + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{context.options().get(name)}; }; + } + + template + requires((std::convertible_to) && (variant_trait_v::type> != VariantType::Unknown)) + PlaceholderNode(Configurable const& v, AT*) : LiteralNode{static_cast(v.value)}, name{v.name} + { + retrieve = [](InitContext& context, char const* name) { return LiteralNode::var_t{static_cast(context.options().get(name))}; }; } PlaceholderNode(PlaceholderNode const& other) = default; @@ -163,6 +165,12 @@ struct PlaceholderNode : LiteralNode { LiteralNode::var_t (*retrieve)(InitContext&, char const*); }; +template +PlaceholderNode as(Configurable const& v) +{ + return PlaceholderNode(v, (AT*)nullptr); +} + /// A placeholder node for parameters taken from an array struct ParameterNode : LiteralNode { ParameterNode(int index_ = -1) diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 2296b5dcbfbc4..6faa2fc352232 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -146,6 +146,15 @@ TEST_CASE("TestTreeParsing") REQUIRE(ptfilterspecs2[0].left == (DatumSpec{std::string{"fPt"}, typeid(o2::aod::track::Pt).hash_code(), atype::FLOAT})); REQUIRE(ptfilterspecs2[0].right == (DatumSpec{LiteralNode::var_t{1.0f}, atype::FLOAT})); REQUIRE(ptfilterspecs2[0].result == (DatumSpec{0u, atype::BOOL})); + + Configurable cvalue{"cvalue", 1, "test value"}; + Filter testFilter = o2::aod::track::tpcNClsShared < as(cvalue); + REQUIRE(testFilter.node->self.index() == 2); + REQUIRE(testFilter.node->left->self.index() == 1); + REQUIRE(testFilter.node->right->self.index() == 3); + REQUIRE(std::get(testFilter.node->right->self).name == "cvalue"); + auto testSpecs = createOperations(testFilter); + REQUIRE(testSpecs[0].right == (DatumSpec{LiteralNode::var_t{(uint8_t)1}, atype::UINT8})); } TEST_CASE("TestGandivaTreeCreation") From 4d20c8d9087a28bbd16984c36abef7b58b1f2802 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:06:56 +0200 Subject: [PATCH 0528/1914] DPL Analysis: improve/fix join error by not relying on metadata (#14249) --- Framework/Core/include/Framework/ASoA.h | 10 ++++++---- .../Core/include/Framework/AnalysisManagers.h | 8 ++++---- Framework/Core/include/Framework/AnalysisTask.h | 4 ++-- Framework/Core/include/Framework/TableBuilder.h | 7 ++++--- Framework/Core/src/ASoA.cxx | 7 ++----- Framework/Core/test/test_ASoA.cxx | 15 +++++++++++++++ 6 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index e098cd89f6d5d..c13cfe912c814 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1245,7 +1245,7 @@ struct TableIterator : IP, C... { }; struct ArrowHelpers { - static std::shared_ptr joinTables(std::vector>&& tables); + static std::shared_ptr joinTables(std::vector>&& tables, std::span labels); static std::shared_ptr concatTables(std::vector>&& tables); }; @@ -1683,6 +1683,7 @@ class Table using table_t = self_t; static constexpr const auto originals = computeOriginals(); + static constexpr const auto originalLabels = [] refs, size_t... Is>(std::index_sequence) { return std::array{o2::aod::label()...}; }.template operator()(std::make_index_sequence()); template bindings> requires(ref.origin_hash == "CONC"_h) @@ -1931,7 +1932,7 @@ class Table Table(std::vector>&& tables, uint64_t offset = 0) requires(ref.origin_hash != "CONC"_h) - : Table(ArrowHelpers::joinTables(std::move(tables)), offset) + : Table(ArrowHelpers::joinTables(std::move(tables), std::span{originalLabels}), offset) { } @@ -3213,7 +3214,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. bindInternalIndicesTo(this); } JoinFull(std::vector>&& tables, uint64_t offset = 0) - : base{ArrowHelpers::joinTables(std::move(tables)), offset} + : base{ArrowHelpers::joinTables(std::move(tables), std::span{base::originalLabels}), offset} { bindInternalIndicesTo(this); } @@ -3223,6 +3224,7 @@ struct JoinFull : Table, D, o2::aod::Hash<"JOIN"_h>, Ts. using self_t = JoinFull; using table_t = base; static constexpr const auto originals = base::originals; + static constexpr const auto originalLabels = base::originalLabels; using columns_t = typename table_t::columns_t; using persistent_columns_t = typename table_t::persistent_columns_t; using iterator = table_t::template iterator_template; @@ -3293,7 +3295,7 @@ using Join = JoinFull, Ts...>; template constexpr auto join(Ts const&... t) { - return Join(ArrowHelpers::joinTables({t.asArrowTable()...})); + return Join(ArrowHelpers::joinTables({t.asArrowTable()...}, std::span{Join::base::originalLabels})); } template diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 330eaf01f0be4..b76d88ea5ee66 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -282,14 +282,14 @@ template bool prepareOutput(ProcessingContext& context, T& spawns) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); - spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable})); + spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -304,14 +304,14 @@ template bool prepareOutput(ProcessingContext& context, T& defines) { using metadata = o2::aod::MetadataTrait>::metadata; - auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context)); + auto originalTable = soa::ArrowHelpers::joinTables(extractOriginals(context), std::span{metadata::base_table_t::originalLabels}); if (originalTable->schema()->fields().empty() == true) { using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); - defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable})); + defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index c7f3da1948c62..257a5358463c6 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -201,9 +201,9 @@ struct AnalysisDataProcessorBuilder { std::shared_ptr table = nullptr; auto joiner = [&record] refs, size_t... Is>(std::index_sequence) { return std::vector{extractTableFromRecord(record)...}; }; if constexpr (soa::is_iterator) { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::parent_t::originalLabels}); } else { - table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence())); + table = o2::soa::ArrowHelpers::joinTables(joiner.template operator()(std::make_index_sequence()), std::span{T::originalLabels}); } expressions::updateFilterInfo(info, table); if constexpr (!o2::soa::is_smallgroups>) { diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index 0b35d5be083e4..e2d12789ef922 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -771,7 +771,7 @@ template auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } @@ -798,7 +798,7 @@ template auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } @@ -834,7 +834,8 @@ auto spawner(std::shared_ptr const& fullTable, const char* name, s template auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) { - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables)); + std::array labels{"original"}; + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index 810398747de88..bd6ca551d24ec 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -64,7 +64,7 @@ SelectionVector sliceSelection(gsl::span const& mSelectedRows, in return slicedSelection; } -std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables) +std::shared_ptr ArrowHelpers::joinTables(std::vector>&& tables, std::span labels) { if (tables.size() == 1) { return tables[0]; @@ -72,10 +72,7 @@ std::shared_ptr ArrowHelpers::joinTables(std::vectornum_rows() != tables[i + 1]->num_rows()) { throw o2::framework::runtime_error_f("Tables %s and %s have different sizes (%d vs %d) and cannot be joined!", - tables[i]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i + 1]->schema()->metadata()->Get("label").ValueOrDie().c_str(), - tables[i]->num_rows(), - tables[i + 1]->num_rows()); + labels[i], labels[i + 1], tables[i]->num_rows(), tables[i + 1]->num_rows()); } } std::vector> fields; diff --git a/Framework/Core/test/test_ASoA.cxx b/Framework/Core/test/test_ASoA.cxx index 6d9ee16fec84d..80519aebc9ee7 100644 --- a/Framework/Core/test/test_ASoA.cxx +++ b/Framework/Core/test/test_ASoA.cxx @@ -31,6 +31,7 @@ namespace test DECLARE_SOA_COLUMN(X, x, int); DECLARE_SOA_COLUMN(Y, y, int); DECLARE_SOA_COLUMN(Z, z, int); +DECLARE_SOA_COLUMN(W, w, int); DECLARE_SOA_DYNAMIC_COLUMN(Sum, sum, [](int x, int y) { return x + y; }); DECLARE_SOA_EXPRESSION_COLUMN(ESum, esum, int, test::x + test::y); } // namespace test @@ -268,9 +269,17 @@ TEST_CASE("TestJoinedTables") rowWriterZ(0, 8); auto tableZ = builderZ.finalize(); + TableBuilder builderW; + auto rowWriterW = builderW.persist({"fW"}); + rowWriterW(0, 8); + rowWriterW(0, 8); + rowWriterW(0, 8); + auto tableW = builderW.finalize(); + using TestX = InPlaceTable<"A0"_h, o2::aod::test::X>; using TestY = InPlaceTable<"A1"_h, o2::aod::test::Y>; using TestZ = InPlaceTable<"A2"_h, o2::aod::test::Z>; + using TestW = InPlaceTable<"A3"_h, o2::aod::test::W>; using Test = Join; REQUIRE(Test::contains()); @@ -303,6 +312,12 @@ TEST_CASE("TestJoinedTables") for (auto& test : tests4) { REQUIRE(15 == test.x() + test.y() + test.z()); } + + try { + auto testF = join(TestZ{tableZ}, TestW{tableW}); + } catch (RuntimeErrorRef ref) { + REQUIRE(std::string{error_from_ref(ref).what} == "Tables TEST and TEST have different sizes (8 vs 3) and cannot be joined!"); + } } TEST_CASE("TestConcatTables") From 35e208b13fb4c30c38ff4b3d05535fb370b53c0a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Mon, 19 May 2025 11:07:42 +0200 Subject: [PATCH 0529/1914] DPL Analysis: prevent slice cache from updating unnecessarily (#14257) * Cache setup now only happens after init when process configurables' values are final * Add inline contrained functions to avoid using "overloaded" * add error messages for unexpected situations --- Framework/Core/include/Framework/ASoA.h | 12 +-- .../Core/include/Framework/AnalysisManagers.h | 14 ++- .../Core/include/Framework/AnalysisTask.h | 102 ++++++++++++------ .../Framework/ArrowTableSlicingCache.h | 45 +++++--- .../Core/include/Framework/GroupSlicer.h | 2 +- Framework/Core/src/ASoA.cxx | 2 +- Framework/Core/src/ArrowSupport.cxx | 19 ++-- Framework/Core/src/ArrowTableSlicingCache.cxx | 60 +++++++---- Framework/Core/test/test_GroupSlicer.cxx | 4 +- 9 files changed, 167 insertions(+), 93 deletions(-) diff --git a/Framework/Core/include/Framework/ASoA.h b/Framework/Core/include/Framework/ASoA.h index c13cfe912c814..4ed8e830fe137 100644 --- a/Framework/Core/include/Framework/ASoA.h +++ b/Framework/Core/include/Framework/ASoA.h @@ -1400,10 +1400,10 @@ namespace o2::framework struct PreslicePolicyBase { const std::string binding; - StringPair bindingKey; + Entry bindingKey; bool isMissing() const; - StringPair const& getBindingKey() const; + Entry const& getBindingKey() const; }; struct PreslicePolicySorted : public PreslicePolicyBase { @@ -1428,7 +1428,7 @@ struct PresliceBase : public Policy { const std::string binding; PresliceBase(expressions::BindingNode index_) - : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, std::make_pair(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} + : Policy{PreslicePolicyBase{{o2::soa::getLabelFromTypeForKey(std::string{index_.name})}, Entry(o2::soa::getLabelFromTypeForKey(std::string{index_.name}), std::string{index_.name})}, {}} { } @@ -1508,7 +1508,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; @@ -1545,7 +1545,7 @@ auto doSliceBy(T const* table, o2::framework::PresliceBase const { if constexpr (OPT) { if (container.isMissing()) { - missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType>().data(), container.bindingKey.key.c_str()); } } auto selection = container.getSliceFor(value); @@ -1574,7 +1574,7 @@ auto doFilteredSliceBy(T const* table, o2::framework::PresliceBase().data(), container.bindingKey.second.c_str()); + missingOptionalPreslice(getLabelFromType().data(), container.bindingKey.key.c_str()); } } uint64_t offset = 0; diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index b76d88ea5ee66..1d894b2b67948 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -534,39 +534,43 @@ static void setGroupedCombination(C& comb, TG& grouping, std::tuple& asso /// Preslice handling template requires(!is_preslice) -bool registerCache(T&, std::vector&, std::vector&) +bool registerCache(T&, Cache&, Cache&) { return false; } template requires std::same_as -bool registerCache(T& preslice, std::vector& bsks, std::vector&) +bool registerCache(T& preslice, Cache& bsks, Cache&) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsks.begin(), bsks.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsks.end()) { bsks.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } template requires std::same_as -bool registerCache(T& preslice, std::vector&, std::vector& bsksU) +bool registerCache(T& preslice, Cache&, Cache& bsksU) { if constexpr (T::optional) { if (preslice.binding == "[MISSING]") { return true; } } - auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.first == preslice.bindingKey.first) && (entry.second == preslice.bindingKey.second); }); + auto locate = std::find_if(bsksU.begin(), bsksU.end(), [&](auto const& entry) { return (entry.binding == preslice.bindingKey.binding) && (entry.key == preslice.bindingKey.key); }); if (locate == bsksU.end()) { bsksU.emplace_back(preslice.getBindingKey()); + } else if (locate->enabled == false) { + locate->enabled = true; } return true; } diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index 257a5358463c6..b53929f326712 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -65,21 +65,18 @@ concept is_enumeration = is_enumeration_v>; // the contents of an AnalysisTask... namespace { struct AnalysisDataProcessorBuilder { - template - static void addGroupingCandidates(std::vector& bk, std::vector& bku) + template + static void addGroupingCandidates(Cache& bk, Cache& bku, bool enabled) { - [&bk, &bku](framework::pack) mutable { - std::string key; - if constexpr (soa::is_iterator>) { - key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); - } - ([&bk, &bku, &key]() mutable { + [&bk, &bku, enabled](framework::pack) mutable { + auto key = std::string{"fIndex"} + o2::framework::cutString(soa::getLabelFromType>()); + ([&bk, &bku, &key, enabled]() mutable { if constexpr (soa::relatedByIndex, std::decay_t>()) { auto binding = soa::getLabelFromTypeForKey>(key); if constexpr (o2::soa::is_smallgroups>) { - framework::updatePairList(bku, binding, key); + framework::updatePairList(bku, binding, key, enabled); } else { - framework::updatePairList(bk, binding, key); + framework::updatePairList(bk, binding, key, enabled); } } }(), @@ -145,34 +142,72 @@ struct AnalysisDataProcessorBuilder { } /// helper to parse the process arguments + template + inline static bool requestInputsFromArgs(T&, std::string const&, std::vector&, std::vector&) + { + return false; + } + template + inline static bool requestInputsFromArgs(T& pc, std::string const& name, std::vector& inputs, std::vector& eis) + { + AnalysisDataProcessorBuilder::inputsFromArgs(pc.process, (name + "/" + pc.name).c_str(), pc.value, inputs, eis); + return true; + } + template + inline static bool requestCacheFromArgs(T&, Cache&, Cache&) + { + return false; + } + template + inline static bool requestCacheFromArgs(T& pc, Cache& bk, Cache& bku) + { + AnalysisDataProcessorBuilder::cacheFromArgs(pc.process, pc.value, bk, bku); + return true; + } /// 1. enumeration (must be the only argument) template - static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(A), const char* /*name*/, bool /*value*/, std::vector& inputs, std::vector&) //, Cache&, Cache&) { std::vector inputMetadata; // FIXME: for the moment we do not support begin, end and step. DataSpecUtils::updateInputList(inputs, InputSpec{"enumeration", "DPL", "ENUM", 0, Lifetime::Enumeration, inputMetadata}); } - /// 2. grouping case - 1st argument is an iterator + /// 2. 1st argument is an iterator template - static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector& bk, std::vector& bku) + static void inputsFromArgs(R (C::*)(A, Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache& bk, Cache& bku) requires(std::is_lvalue_reference_v && (std::is_lvalue_reference_v && ...)) { - addGroupingCandidates(bk, bku); constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions::parent_t, Args...>(hash, name, value, inputs, eInfos); } /// 3. generic case template - static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos, std::vector&, std::vector&) + static void inputsFromArgs(R (C::*)(Args...), const char* name, bool value, std::vector& inputs, std::vector& eInfos) //, Cache&, Cache&) requires(std::is_lvalue_reference_v && ...) { constexpr auto hash = o2::framework::TypeIdHelpers::uniqueId(); addInputsAndExpressions(hash, name, value, inputs, eInfos); } + /// 1. enumeration (no grouping) + template + static void cacheFromArgs(R (C::*)(A), bool, Cache&, Cache&) + { + } + /// 2. iterator (the only grouping case) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool value, Cache& bk, Cache& bku) + { + addGroupingCandidates(bk, bku, value); + } + /// 3. generic case (no grouping) + template + static void cacheFromArgs(R (C::*)(A, Args...), bool, Cache&, Cache&) + { + } + template static auto extractTableFromRecord(InputRecord& record) { @@ -480,8 +515,6 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) std::vector inputs; std::vector options; std::vector expressionInfos; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; /// make sure options and configurables are set before expression infos are created homogeneous_apply_refs([&options, &hash](auto& element) { return analysis_task_parsers::appendOption(options, element); }, *task.get()); @@ -490,23 +523,15 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) /// parse process functions defined by corresponding configurables if constexpr (requires { &T::process; }) { - AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); + AnalysisDataProcessorBuilder::inputsFromArgs(&T::process, "default", true, inputs, expressionInfos); } homogeneous_apply_refs( - overloaded{ - [name = name_str, &expressionInfos, &inputs, &bindingsKeys, &bindingsKeysUnsorted](framework::is_process_configurable auto& x) mutable { - // this pushes (argumentIndex,processHash,schemaPtr,nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators - AnalysisDataProcessorBuilder::inputsFromArgs(x.process, (name + "/" + x.name).c_str(), x.value, inputs, expressionInfos, bindingsKeys, bindingsKeysUnsorted); - return true; - }, - [](auto&) { - return false; - }}, + [name = name_str, &expressionInfos, &inputs](auto& x) mutable { + // this pushes (argumentIndex, processHash, schemaPtr, nullptr) into expressionInfos for arguments that are Filtered/filtered_iterators + return AnalysisDataProcessorBuilder::requestInputsFromArgs(x, name, inputs, expressionInfos); + }, *task.get()); - // add preslice declarations to slicing cache definition - homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); - // request base tables for spawnable extended tables and indices to be built // this checks for duplications homogeneous_apply_refs([&inputs](auto& element) { @@ -526,7 +551,12 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) requiredServices.insert(requiredServices.end(), arrowServices.begin(), arrowServices.end()); homogeneous_apply_refs([&requiredServices](auto& element) { return analysis_task_parsers::addService(requiredServices, element); }, *task.get()); - auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos, bindingsKeys, bindingsKeysUnsorted](InitContext& ic) mutable { + auto algo = AlgorithmSpec::InitCallback{[task = task, expressionInfos](InitContext& ic) mutable { + Cache bindingsKeys; + Cache bindingsKeysUnsorted; + // add preslice declarations to slicing cache definition + homogeneous_apply_refs([&bindingsKeys, &bindingsKeysUnsorted](auto& element) { return analysis_task_parsers::registerCache(element, bindingsKeys, bindingsKeysUnsorted); }, *task.get()); + homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareOption(ic, element); }, *task.get()); homogeneous_apply_refs([&ic](auto&& element) { return analysis_task_parsers::prepareService(ic, element); }, *task.get()); @@ -556,6 +586,16 @@ DataProcessorSpec adaptAnalysisTask(ConfigContext const& ctx, Args&&... args) task->init(ic); } + /// parse process functions to enable requested grouping caches - note that at this state process configurables have their final values + if constexpr (requires { &T::process; }) { + AnalysisDataProcessorBuilder::cacheFromArgs(&T::process, true, bindingsKeys, bindingsKeysUnsorted); + } + homogeneous_apply_refs( + [&bindingsKeys, &bindingsKeysUnsorted](auto& x) mutable { + return AnalysisDataProcessorBuilder::requestCacheFromArgs(x, bindingsKeys, bindingsKeysUnsorted); + }, + *task.get()); + ic.services().get().setCaches(std::move(bindingsKeys)); ic.services().get().setCachesUnsorted(std::move(bindingsKeysUnsorted)); // initialize global caches diff --git a/Framework/Core/include/Framework/ArrowTableSlicingCache.h b/Framework/Core/include/Framework/ArrowTableSlicingCache.h index 2edc23a63ce76..292a67023fc5e 100644 --- a/Framework/Core/include/Framework/ArrowTableSlicingCache.h +++ b/Framework/Core/include/Framework/ArrowTableSlicingCache.h @@ -34,51 +34,64 @@ struct SliceInfoUnsortedPtr { gsl::span getSliceFor(int value) const; }; -using StringPair = std::pair; +struct Entry { + std::string binding; + std::string key; + bool enabled; + + Entry(std::string b, std::string k, bool e = true) + : binding{b}, + key{k}, + enabled{e} + { + } +}; + +using Cache = std::vector; -void updatePairList(std::vector& list, std::string const& binding, std::string const& key); +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled); struct ArrowTableSlicingCacheDef { constexpr static ServiceKind service_kind = ServiceKind::Global; - std::vector bindingsKeys; - std::vector bindingsKeysUnsorted; + Cache bindingsKeys; + Cache bindingsKeysUnsorted; - void setCaches(std::vector&& bsks); - void setCachesUnsorted(std::vector&& bsks); + void setCaches(Cache&& bsks); + void setCachesUnsorted(Cache&& bsks); }; struct ArrowTableSlicingCache { constexpr static ServiceKind service_kind = ServiceKind::Stream; - std::vector bindingsKeys; + Cache bindingsKeys; std::vector>> values; std::vector>> counts; - std::vector bindingsKeysUnsorted; + Cache bindingsKeysUnsorted; std::vector> valuesUnsorted; std::vector groups; - ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted = {}); // set caching information externally - void setCaches(std::vector&& bsks, std::vector&& bsksUnsorted = {}); + void setCaches(Cache&& bsks, Cache&& bsksUnsorted = {}); // update slicing info cache entry (assumes it is already present) arrow::Status updateCacheEntry(int pos, std::shared_ptr const& table); arrow::Status updateCacheEntryUnsorted(int pos, std::shared_ptr const& table); // helper to locate cache position - std::pair getCachePos(StringPair const& bindingKey) const; - int getCachePosSortedFor(StringPair const& bindingKey) const; - int getCachePosUnsortedFor(StringPair const& bindingKey) const; + std::pair getCachePos(Entry const& bindingKey) const; + int getCachePosSortedFor(Entry const& bindingKey) const; + int getCachePosUnsortedFor(Entry const& bindingKey) const; // get slice from cache for a given value - SliceInfoPtr getCacheFor(StringPair const& bindingKey) const; - SliceInfoUnsortedPtr getCacheUnsortedFor(StringPair const& bindingKey) const; + SliceInfoPtr getCacheFor(Entry const& bindingKey) const; + SliceInfoUnsortedPtr getCacheUnsortedFor(Entry const& bindingKey) const; SliceInfoPtr getCacheForPos(int pos) const; SliceInfoUnsortedPtr getCacheUnsortedForPos(int pos) const; - static void validateOrder(StringPair const& bindingKey, std::shared_ptr const& input); + static void validateOrder(Entry const& bindingKey, std::shared_ptr const& input); }; } // namespace o2::framework diff --git a/Framework/Core/include/Framework/GroupSlicer.h b/Framework/Core/include/Framework/GroupSlicer.h index 64b1d863c59e6..b8436314b057e 100644 --- a/Framework/Core/include/Framework/GroupSlicer.h +++ b/Framework/Core/include/Framework/GroupSlicer.h @@ -55,7 +55,7 @@ struct GroupSlicer { { constexpr auto index = framework::has_type_at_v>(associated_pack_t{}); auto binding = o2::soa::getLabelFromTypeForKey>(mIndexColumnName); - auto bk = std::make_pair(binding, mIndexColumnName); + auto bk = Entry(binding, mIndexColumnName); if constexpr (!o2::soa::is_smallgroups>) { if (table.size() == 0) { return; diff --git a/Framework/Core/src/ASoA.cxx b/Framework/Core/src/ASoA.cxx index bd6ca551d24ec..3a681ee931a2b 100644 --- a/Framework/Core/src/ASoA.cxx +++ b/Framework/Core/src/ASoA.cxx @@ -194,7 +194,7 @@ bool PreslicePolicyBase::isMissing() const return binding == "[MISSING]"; } -StringPair const& PreslicePolicyBase::getBindingKey() const +Entry const& PreslicePolicyBase::getBindingKey() const { return bindingKey; } diff --git a/Framework/Core/src/ArrowSupport.cxx b/Framework/Core/src/ArrowSupport.cxx index 12a4c7131e828..3b13e30581f70 100644 --- a/Framework/Core/src/ArrowSupport.cxx +++ b/Framework/Core/src/ArrowSupport.cxx @@ -567,26 +567,27 @@ o2::framework::ServiceSpec ArrowSupport::arrowTableSlicingCacheSpec() .name = "arrow-slicing-cache", .uniqueId = CommonServices::simpleServiceId(), .init = [](ServiceRegistryRef services, DeviceState&, fair::mq::ProgOptions&) { return ServiceHandle{TypeIdHelpers::uniqueId(), - new ArrowTableSlicingCache(std::vector>{services.get().bindingsKeys}, std::vector{services.get().bindingsKeysUnsorted}), + new ArrowTableSlicingCache(Cache{services.get().bindingsKeys}, + Cache{services.get().bindingsKeysUnsorted}), ServiceKind::Stream, typeid(ArrowTableSlicingCache).name()}; }, .configure = CommonServices::noConfiguration(), .preProcessing = [](ProcessingContext& pc, void* service_ptr) { auto* service = static_cast(service_ptr); auto& caches = service->bindingsKeys; - for (auto i = 0; i < caches.size(); ++i) { - if (pc.inputs().getPos(caches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < caches.size(); ++i) { + if (caches[i].enabled && pc.inputs().getPos(caches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntry(i, pc.inputs().get(caches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].first.c_str(), caches[i].second.c_str()); + throw runtime_error_f("Failed to update slice cache for %s/%s", caches[i].binding.c_str(), caches[i].key.c_str()); } } } auto& unsortedCaches = service->bindingsKeysUnsorted; - for (auto i = 0; i < unsortedCaches.size(); ++i) { - if (pc.inputs().getPos(unsortedCaches[i].first.c_str()) >= 0) { - auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].first.c_str())->asArrowTable()); + for (auto i = 0u; i < unsortedCaches.size(); ++i) { + if (unsortedCaches[i].enabled && pc.inputs().getPos(unsortedCaches[i].binding.c_str()) >= 0) { + auto status = service->updateCacheEntryUnsorted(i, pc.inputs().get(unsortedCaches[i].binding.c_str())->asArrowTable()); if (!status.ok()) { - throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].first.c_str(), unsortedCaches[i].second.c_str()); + throw runtime_error_f("failed to update slice cache (unsorted) for %s/%s", unsortedCaches[i].binding.c_str(), unsortedCaches[i].key.c_str()); } } } }, diff --git a/Framework/Core/src/ArrowTableSlicingCache.cxx b/Framework/Core/src/ArrowTableSlicingCache.cxx index 4b31f96e32fba..e001e293c4733 100644 --- a/Framework/Core/src/ArrowTableSlicingCache.cxx +++ b/Framework/Core/src/ArrowTableSlicingCache.cxx @@ -19,10 +19,13 @@ namespace o2::framework { -void updatePairList(std::vector& list, std::string const& binding, std::string const& key) +void updatePairList(Cache& list, std::string const& binding, std::string const& key, bool enabled = true) { - if (std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.first == binding) && (entry.second == key); }) == list.end()) { - list.emplace_back(binding, key); + auto locate = std::find_if(list.begin(), list.end(), [&binding, &key](auto const& entry) { return (entry.binding == binding) && (entry.key == key); }); + if (locate == list.end()) { + list.emplace_back(binding, key, enabled); + } else if (!locate->enabled && enabled) { + locate->enabled = true; } } @@ -65,17 +68,17 @@ gsl::span SliceInfoUnsortedPtr::getSliceFor(int value) const return {(*groups)[value].data(), (*groups)[value].size()}; } -void ArrowTableSlicingCacheDef::setCaches(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCaches(Cache&& bsks) { bindingsKeys = bsks; } -void ArrowTableSlicingCacheDef::setCachesUnsorted(std::vector&& bsks) +void ArrowTableSlicingCacheDef::setCachesUnsorted(Cache&& bsks) { bindingsKeysUnsorted = bsks; } -ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, std::vector&& bsksUnsorted) +ArrowTableSlicingCache::ArrowTableSlicingCache(Cache&& bsks, Cache&& bsksUnsorted) : bindingsKeys{bsks}, bindingsKeysUnsorted{bsksUnsorted} { @@ -86,7 +89,7 @@ ArrowTableSlicingCache::ArrowTableSlicingCache(std::vector&& bsks, s groups.resize(bindingsKeysUnsorted.size()); } -void ArrowTableSlicingCache::setCaches(std::vector&& bsks, std::vector&& bsksUnsorted) +void ArrowTableSlicingCache::setCaches(Cache&& bsks, Cache&& bsksUnsorted) { bindingsKeys = bsks; bindingsKeysUnsorted = bsksUnsorted; @@ -107,11 +110,15 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntry(int pos, std::shared_ptr< counts[pos].reset(); return arrow::Status::OK(); } + auto& [b, k, e] = bindingsKeys[pos]; + if (!e) { + throw runtime_error_f("Disabled cache %s/%s update requested", b.c_str(), k.c_str()); + } validateOrder(bindingsKeys[pos], table); arrow::Datum value_counts; auto options = arrow::compute::ScalarAggregateOptions::Defaults(); ARROW_ASSIGN_OR_RAISE(value_counts, - arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].second)}, + arrow::compute::CallFunction("value_counts", {table->GetColumnByName(bindingsKeys[pos].key)}, &options)); auto pair = static_cast(value_counts.array()); values[pos].reset(); @@ -128,7 +135,10 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (table->num_rows() == 0) { return arrow::Status::OK(); } - auto& [b, k] = bindingsKeysUnsorted[pos]; + auto& [b, k, e] = bindingsKeysUnsorted[pos]; + if (!e) { + throw runtime_error_f("Disabled unsorted cache %s/%s update requested", b.c_str(), k.c_str()); + } auto column = table->GetColumnByName(k); auto row = 0; for (auto iChunk = 0; iChunk < column->num_chunks(); ++iChunk) { @@ -139,7 +149,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st if (std::find(valuesUnsorted[pos].begin(), valuesUnsorted[pos].end(), v) == valuesUnsorted[pos].end()) { valuesUnsorted[pos].push_back(v); } - if (groups[pos].size() <= v) { + if ((int)groups[pos].size() <= v) { groups[pos].resize(v + 1); } (groups[pos])[v].push_back(row); @@ -151,7 +161,7 @@ arrow::Status ArrowTableSlicingCache::updateCacheEntryUnsorted(int pos, const st return arrow::Status::OK(); } -std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindingKey) const +std::pair ArrowTableSlicingCache::getCachePos(const Entry& bindingKey) const { auto pos = getCachePosSortedFor(bindingKey); if (pos != -1) { @@ -161,41 +171,47 @@ std::pair ArrowTableSlicingCache::getCachePos(const StringPair& bindi if (pos != -1) { return {pos, false}; } - throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s not found neither in sorted or unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); } -int ArrowTableSlicingCache::getCachePosSortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosSortedFor(Entry const& bindingKey) const { - auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate = std::find_if(bindingsKeys.begin(), bindingsKeys.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate != bindingsKeys.end()) { return std::distance(bindingsKeys.begin(), locate); } return -1; } -int ArrowTableSlicingCache::getCachePosUnsortedFor(StringPair const& bindingKey) const +int ArrowTableSlicingCache::getCachePosUnsortedFor(Entry const& bindingKey) const { - auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](StringPair const& bk) { return (bindingKey.first == bk.first) && (bindingKey.second == bk.second); }); + auto locate_unsorted = std::find_if(bindingsKeysUnsorted.begin(), bindingsKeysUnsorted.end(), [&](Entry const& bk) { return (bindingKey.binding == bk.binding) && (bindingKey.key == bk.key); }); if (locate_unsorted != bindingsKeysUnsorted.end()) { return std::distance(bindingsKeysUnsorted.begin(), locate_unsorted); } return -1; } -SliceInfoPtr ArrowTableSlicingCache::getCacheFor(StringPair const& bindingKey) const +SliceInfoPtr ArrowTableSlicingCache::getCacheFor(Entry const& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (!s) { - throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in unsorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeys[p].enabled) { + throw runtime_error_f("Disabled cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheForPos(p); } -SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const StringPair& bindingKey) const +SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedFor(const Entry& bindingKey) const { auto [p, s] = getCachePos(bindingKey); if (s) { - throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.first.c_str(), bindingKey.second.c_str()); + throw runtime_error_f("%s/%s is found in sorted cache", bindingKey.binding.c_str(), bindingKey.key.c_str()); + } + if (!bindingsKeysUnsorted[p].enabled) { + throw runtime_error_f("Disabled unsorted cache %s/%s is requested", bindingKey.binding.c_str(), bindingKey.key.c_str()); } return getCacheUnsortedForPos(p); @@ -224,9 +240,9 @@ SliceInfoUnsortedPtr ArrowTableSlicingCache::getCacheUnsortedForPos(int pos) con }; } -void ArrowTableSlicingCache::validateOrder(StringPair const& bindingKey, const std::shared_ptr& input) +void ArrowTableSlicingCache::validateOrder(Entry const& bindingKey, const std::shared_ptr& input) { - auto const& [target, key] = bindingKey; + auto const& [target, key, enabled] = bindingKey; auto column = input->GetColumnByName(key); auto array0 = static_cast>(column->chunk(0)->data()); int32_t prev = 0; diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 161939141e790..091c21eeae229 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -683,7 +683,7 @@ TEST_CASE("ArrowDirectSlicing") std::vector slices; std::vector offsts; - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); auto s = cache.updateCacheEntry(0, {evtTable}); auto lcache = cache.getCacheFor(bk); @@ -741,7 +741,7 @@ TEST_CASE("TestSlicingException") } auto evtTable = builderE.finalize(); - auto bk = std::make_pair(soa::getLabelFromType(), "fID"); + auto bk = Entry(soa::getLabelFromType(), "fID"); ArrowTableSlicingCache cache({bk}); try { From 0c5140edf08d83042e2b8362eb152db6e01e3177 Mon Sep 17 00:00:00 2001 From: Christian Sonnabend Date: Mon, 19 May 2025 12:48:54 +0200 Subject: [PATCH 0530/1914] NN clustering: VRAM memory leak fix + (u)int -> (u)int32_t (#14272) * VRAM memory leak fix + (u)int -> (u)int32_t * Please consider the following formatting changes * Fixing my own debug messages * Making shared pointer for releasing * Bug-fix * Adding Davids patch --------- Co-authored-by: ALICE Action Bot --- Common/ML/include/ML/OrtInterface.h | 14 +-- Common/ML/src/OrtInterface.cxx | 42 ++++--- .../Global/GPUChainTrackingClusterizer.cxx | 33 ++++-- .../GPUTPCNNClusterizerHost.cxx | 23 ++-- .../GPUTPCNNClusterizerHost.h | 2 +- .../GPUTPCNNClusterizerKernels.cxx | 108 +++++++++--------- .../GPUTPCNNClusterizerKernels.h | 7 +- 7 files changed, 128 insertions(+), 101 deletions(-) diff --git a/Common/ML/include/ML/OrtInterface.h b/Common/ML/include/ML/OrtInterface.h index b4f40f3f5c694..7224645425856 100644 --- a/Common/ML/include/ML/OrtInterface.h +++ b/Common/ML/include/ML/OrtInterface.h @@ -45,14 +45,10 @@ class OrtModel public: // Constructors & destructors - OrtModel() = default; - OrtModel(std::unordered_map optionsMap) { init(optionsMap); } - void init(std::unordered_map optionsMap) - { - initOptions(optionsMap); - initEnvironment(); - } - virtual ~OrtModel() = default; + OrtModel(); + OrtModel(std::unordered_map optionsMap); + void init(std::unordered_map optionsMap); + virtual ~OrtModel(); // General purpose void initOptions(std::unordered_map optionsMap); @@ -113,7 +109,7 @@ class OrtModel private: // ORT variables -> need to be hidden as pImpl struct OrtVariables; - OrtVariables* mPImplOrt; + std::unique_ptr mPImplOrt; // Input & Output specifications of the loaded network std::vector mInputNamesChar, mOutputNamesChar; diff --git a/Common/ML/src/OrtInterface.cxx b/Common/ML/src/OrtInterface.cxx index df7f0a2deba82..8f31761489997 100644 --- a/Common/ML/src/OrtInterface.cxx +++ b/Common/ML/src/OrtInterface.cxx @@ -27,11 +27,20 @@ namespace o2 namespace ml { +OrtModel::OrtModel() = default; +OrtModel::OrtModel(std::unordered_map optionsMap) { init(optionsMap); } +OrtModel::~OrtModel() = default; +void OrtModel::init(std::unordered_map optionsMap) +{ + initOptions(optionsMap); + initEnvironment(); +} + struct OrtModel::OrtVariables { // The actual implementation is hidden in the .cxx file // ORT runtime objects Ort::RunOptions runOptions; - std::shared_ptr env = nullptr; - std::shared_ptr session = nullptr; ///< ONNX session + std::unique_ptr env = nullptr; + std::unique_ptr session = nullptr; ///< ONNX session Ort::SessionOptions sessionOptions; Ort::AllocatorWithDefaultOptions allocator; Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault); @@ -41,7 +50,7 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c // General purpose void OrtModel::initOptions(std::unordered_map optionsMap) { - mPImplOrt = new OrtVariables(); + mPImplOrt = std::make_unique(); // Load from options map if (!optionsMap.contains("model-path")) { @@ -101,7 +110,7 @@ void OrtModel::initOptions(std::unordered_map optionsM void OrtModel::initEnvironment() { - mPImplOrt->env = std::make_shared( + mPImplOrt->env = std::make_unique( OrtLoggingLevel(mLoggingLevel), (mEnvName.empty() ? "ORT" : mEnvName.c_str()), // Integrate ORT logging into Fairlogger @@ -129,7 +138,7 @@ void OrtModel::initSession() if (mAllocateDeviceMemory) { memoryOnDevice(mDeviceId); } - mPImplOrt->session = std::make_shared(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*mPImplOrt->env, mModelPath.c_str(), mPImplOrt->sessionOptions); mPImplOrt->ioBinding = std::make_unique(*mPImplOrt->session); setIO(); @@ -147,12 +156,12 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) (mPImplOrt->sessionOptions).AddConfigEntry("session.use_env_allocators", "1"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time (mPImplOrt->sessionOptions).AddConfigEntry("session_options.enable_cpu_mem_arena", "0"); // This should enable to use the volatile memory allocation defined in O2/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx; not working yet: ONNX still assigns new memory at init time // Arena memory shrinkage comes at performance cost - /// For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; - // (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 + // For now prefer to use single allocation, enabled by O2/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu -> SetONNXGPUStream -> rocm_options.arena_extend_strategy = 0; + (mPImplOrt->runOptions).AddConfigEntry("memory.enable_memory_arena_shrinkage", ("gpu:" + std::to_string(deviceIndex)).c_str()); // See kOrtRunOptionsConfigEnableMemoryArenaShrinkage, https://github.com/microsoft/onnxruntime/blob/90c263f471bbce724e77d8e62831d3a9fa838b2f/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h#L27 std::string dev_mem_str = ""; if (mDeviceType == "ROCM") { - dev_mem_str = "Hip"; + dev_mem_str = "HipPinned"; } if (mDeviceType == "CUDA") { dev_mem_str = "Cuda"; @@ -166,7 +175,7 @@ void OrtModel::memoryOnDevice(int32_t deviceIndex) void OrtModel::resetSession() { - mPImplOrt->session = std::make_shared(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); + mPImplOrt->session = std::make_unique(*(mPImplOrt->env), mModelPath.c_str(), mPImplOrt->sessionOptions); } // Getters @@ -252,7 +261,7 @@ void OrtModel::setIO() void OrtModel::setEnv(Ort::Env* env) { - mPImplOrt->env = std::shared_ptr(env); + mPImplOrt->env.reset(env); } // Inference @@ -308,6 +317,14 @@ void OrtModel::inference(I* input, int64_t input_size, O* output) (mPImplOrt->ioBinding)->BindOutput(mOutputNames[0].c_str(), outputTensor); (mPImplOrt->session)->Run(mPImplOrt->runOptions, *mPImplOrt->ioBinding); + // mPImplOrt->session->Run( + // mPImplOrt->runOptions, + // mInputNamesChar.data(), + // &inputTensor, + // mInputNamesChar.size(), + // mOutputNamesChar.data(), + // &outputTensor, + // mOutputNamesChar.size()); } template void OrtModel::inference(OrtDataType::Float16_t*, int64_t, OrtDataType::Float16_t*); @@ -427,10 +444,7 @@ template std::vector OrtModel::inferencesession->EndProfiling(); - // } - LOG(info) << "(ORT) Size of mPImplOrt: " << sizeof(*mPImplOrt) << " bytes"; + mPImplOrt.reset(); } // private diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 2cdd1bb76bf00..6c4e60a6025e1 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -659,7 +659,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) // But environment must be valid, so we init the model environment first and use it here afterwards. // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator. // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // if (lane == 0) { + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // } // recreateMemoryAllocator = true; (nnApplications[lane].mModelClass).initSession(); } @@ -671,7 +673,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg1).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg1).initSession(); } if (nnApplications[lane].mModelsUsed[2]) { @@ -680,8 +682,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (nnApplications[lane].mModelReg2.getIntraOpNumThreads() > maxThreads) { nnApplications[lane].mModelReg2.setIntraOpNumThreads(maxThreads); } + // (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv()); (nnApplications[lane].mModelReg2).initEnvironment(); - // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator); (nnApplications[lane].mModelReg2).initSession(); } if (nn_settings.nnClusterizerVerbosity < 3) { @@ -707,8 +710,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) if (doGPU) { WriteToConstantMemory(RecoStep::TPCClusterFinding, (char*)&processors()->tpcNNClusterer - (char*)processors(), &processorsShadow()->tpcNNClusterer, sizeof(GPUTPCNNClusterizer) * NSECTORS, mRec->NStreams() - 1, &mEvents->init); } - LOG(info) << "Size of nnApplications[lane]: " << sizeof(nnApplications[0]) << " bytes"; - LOG(info) << "Size of nnApplications: " << sizeof(GPUTPCNNClusterizerHost) * GetProcessingSettings().nTPCClustererLanes << " bytes"; } #endif @@ -976,6 +977,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN; GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane]; + // // bool recreateMemoryAllocator = false; + // if (lane == 0) { + // (nnApplications[lane].mModelClass).initEnvironment(); + // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0); + // } + // // recreateMemoryAllocator = true; + // (nnApplications[lane].mModelClass).initSession(); + // (nnApplications[lane].mModelReg1).initSession(); + int withMC = (doGPU && propagateMCLabels); if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) { @@ -1188,12 +1198,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { - // if (GetProcessingSettings().nn.applyNNclusterizer) { - // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; - // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling); - // } + if (GetProcessingSettings().nn.applyNNclusterizer) { + LOG(info) << "(ORT) Environment releasing..."; + GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; + nnApplication.mModelClass.release(true); + nnApplication.mModelReg1.release(true); + nnApplication.mModelReg2.release(true); + } if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx index ca2deec60601c..90f1d6e27246f 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx @@ -136,8 +136,8 @@ struct MockedOrtAllocator : OrtAllocator { std::atomic memory_inuse{0}; std::atomic num_allocations{0}; std::atomic num_reserve_allocations{0}; - OrtMemoryInfo* memory_info; - GPUReconstruction* rec; + OrtMemoryInfo* mMemoryInfoInternal; + GPUReconstruction* mRecInternal; }; MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info) @@ -147,37 +147,36 @@ MockedOrtAllocator::MockedOrtAllocator(GPUReconstruction* r, OrtMemoryInfo* info OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast(this_)->Free(p); }; OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast(this_)->Info(); }; OrtAllocator::Reserve = [](OrtAllocator* this_, size_t size) { return static_cast(this_)->Reserve(size); }; - rec = r; - memory_info = info; + mRecInternal = r; + mMemoryInfoInternal = info; } MockedOrtAllocator::~MockedOrtAllocator() { - // Ort::GetApi().ReleaseMemoryInfo(memory_info); + // Ort::GetApi().ReleaseMemoryInfo(mMemoryInfoInternal); (void)0; // Suppress warning for empty destructor } void* MockedOrtAllocator::Alloc(size_t size) { - // LOG(info) << "(ORT) Allocating volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Allocating direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void* MockedOrtAllocator::Reserve(size_t size) { - // LOG(info) << "(ORT) Reserving volatile memory of size " << size << " bytes"; - return rec->AllocateVolatileDeviceMemory(size); + LOG(info) << "(ORT) Reserving direct memory of size " << size << " bytes"; + return mRecInternal->AllocateDirectMemory(size, GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_STACK); } void MockedOrtAllocator::Free(void* p) { // LOG(info) << "(ORT) Freeing volatile memory " << p; - rec->ReturnVolatileDeviceMemory(); } const OrtMemoryInfo* MockedOrtAllocator::Info() const { - return memory_info; + return mMemoryInfoInternal; } size_t MockedOrtAllocator::NumAllocations() const @@ -197,7 +196,7 @@ void MockedOrtAllocator::LeakCheck() } } -void GPUTPCNNClusterizerHost::volatileOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) +void GPUTPCNNClusterizerHost::directOrtAllocator(Ort::Env* env, Ort::MemoryInfo* memInfo, GPUReconstruction* rec, bool recreate) { mMockedAlloc = std::make_shared(rec, (OrtMemoryInfo*)(*memInfo)); if (recreate) { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h index e659753f21d7d..4334c3418eb09 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h @@ -53,7 +53,7 @@ class GPUTPCNNClusterizerHost void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&); // ONNX - void volatileOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); + void directOrtAllocator(Ort::Env*, Ort::MemoryInfo*, GPUReconstruction*, bool = false); MockedOrtAllocator* getMockedAllocator(); const OrtMemoryInfo* getMockedMemoryInfo(); diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx index 47bc5e8da80ca..8ca61602ab4e9 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx @@ -35,9 +35,9 @@ using namespace o2::gpu::tpccf; // Defining individual thread functions for data filling, determining the class label and running the CF clusterizer template <> -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; if (clustererNN.mOutputDataClass[glo_idx] == 0) { // default clusterizer should not be called in batched mode due to mess-up with thread indices @@ -51,29 +51,29 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId + uint32_t write_idx = glo_idx * clustererNN.mNnClusterizerElementSize; // Potential optimization: Either choose mNnClusterizerBatchedMode as a power of 2 or calculate from threadId and blockId CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()), time = static_cast(peak.time()); // Explicit casting to avoid conversion errors float central_charge = static_cast(chargeMap[peak].unpack()); - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); #ifndef GPUCA_GPUCODE GPUCA_UNROLL(U(), U()); #endif - for (int r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { + for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) { bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); - int pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); - for (int p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { + int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r); + for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) { bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); - for (int t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { + for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) { if (!is_boundary) { CfChargePos tmp_pos(row + r, pad + p, time + t); if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization @@ -111,21 +111,21 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; - uint base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); - uint transient_index = glo_idx % clustererNN.mNnClusterizerElementSize; + uint32_t base_idx = CAMath::Floor(glo_idx / clustererNN.mNnClusterizerElementSize); + uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerElementSize); CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); CfArray2D isPeakMap(clusterer.mPpeakMap); - CfChargePos peak = clusterer.mPfilteredPeakPositions[base_idx + batchStart]; - int row = static_cast(peak.row()), pad = static_cast(peak.pad()); + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; + int32_t row = static_cast(peak.row()), pad = static_cast(peak.pad()); if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) { - uint top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; + uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize; for (uint16_t i = 0; i < 8; i++) { Delta2 d = cfconsts::InnerNeighbors[i]; CfChargePos tmp_pos = peak.delta(d); @@ -142,8 +142,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(pad) / GPUTPCGeometry::NPads(row); } } else if ((int32_t)transient_index < (clustererNN.mNnClusterizerElementSize - 3)) { - int time = static_cast(peak.time()); - int r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; + int32_t time = static_cast(peak.time()); + int32_t r = CAMath::Floor(transient_index / ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1))) - clustererNN.mNnClusterizerSizeInputRow; bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0); if (is_row_boundary) { if (dtype == 0) { @@ -152,15 +152,16 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread(clustererNN.mNnClusterizerBoundaryFillValue); } } else { - int row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); - int pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); - int rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); - int p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; - bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow); + int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow); + int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r); + int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1)); + int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset; + int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; + + bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >= TPC_MAX_FRAGMENT_LEN_GPU); if (!is_boundary) { float central_charge = static_cast(chargeMap[peak].unpack()); - int t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime; CfChargePos tmp_pos(row + r, pad + p, time + t); if (dtype == 0) { clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast(chargeMap[tmp_pos].unpack()) / central_charge); @@ -179,9 +180,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); if (dtype == 0) { processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold); } else if (dtype == 1) { @@ -190,14 +191,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart) { auto& clustererNN = processors.tpcNNClusterer[sector]; - uint glo_idx = get_global_id(0); - uint elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; + uint32_t glo_idx = get_global_id(0); + uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes; float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty] - uint class_label = 0; - for (uint pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { + uint32_t class_label = 0; + for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) { if (pIdx == elem_iterator) { if (dtype == 0) { current_max_prob = static_cast(clustererNN.mModelProbabilities_16[pIdx]); @@ -212,7 +213,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 1) { clustererNN.mClusterFlags[2 * glo_idx] = 1; @@ -221,25 +222,30 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; + uint32_t maxClusterNum = clusterer.mPmemory->counters.nClusters; + uint32_t full_glo_idx = glo_idx + batchStart; + if (full_glo_idx >= maxClusterNum) { + return; + } + int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; + CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(full_glo_idx, maxClusterNum - 1)]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes; // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size(); - if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes == -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { + if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) { ClusterAccumulator pc; @@ -291,7 +297,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread -GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart) +GPUdii() void GPUTPCNNClusterizerKernels::Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart) { - uint glo_idx = get_global_id(0); + uint32_t glo_idx = get_global_id(0); auto& clusterer = processors.tpcClusterer[sector]; auto& clustererNN = processors.tpcNNClusterer[sector]; CfArray2D chargeMap(reinterpret_cast(clusterer.mPchargeMap)); - CfChargePos peak = clusterer.mPfilteredPeakPositions[glo_idx + batchStart]; + CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))]; float central_charge = static_cast(chargeMap[peak].unpack()); CPU_ONLY(MCLabelAccumulator labelAccElem(clusterer)); MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem); tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow; - uint full_glo_idx = glo_idx + batchStart; - int model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; + uint32_t full_glo_idx = glo_idx + batchStart; + uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes; if (clustererNN.mOutputDataClass[full_glo_idx] > 0) { @@ -384,7 +390,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread 62 ? global_shift : 0); } -GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int row, int pad, int global_shift) +GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t global_shift) { if (pad < 0 || row < 0) { // Faster short-circuit return true; diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h index dc7f537c6c1e8..dac2bf9554849 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h @@ -73,11 +73,12 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate private: static GPUd() void fillInputData(int32_t, int32_t, int32_t, int32_t, processorType&, uint8_t, int8_t, uint); static GPUd() void publishClustersReg1(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); + static GPUd() uint32_t sortIntoBuckets(GPUTPCClusterFinder&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t); static GPUd() void publishClustersReg2(uint, GPUSharedMemory&, processorType&, uint8_t, int8_t, int8_t, uint); - static GPUd() int padOffset(int, int); - static GPUd() int rowOffset(int, int); - static GPUd() bool isBoundary(int, int, int); + static GPUd() int32_t padOffset(int32_t, int32_t); + static GPUd() int32_t rowOffset(int32_t, int32_t); + static GPUd() bool isBoundary(int32_t, int32_t, int32_t); }; } // namespace o2::gpu From e7d217af61b67a12b508451c2f2ea925fda92f6e Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:29:21 +0200 Subject: [PATCH 0531/1914] [EMCAL-688] EMCAL: Add fCross to the AnalysisCluster (#14282) - Add fCross the exoticity parameter to the AnalysisCluster so we can easily access it later in the emcalCorrectionTask in O2Physics --- .../EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h | 8 ++++++-- .../EMCAL/base/include/EMCALBase/ClusterFactory.h | 5 +++-- Detectors/EMCAL/base/src/ClusterFactory.cxx | 12 ++++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h index 758e0a1fa0b47..e19fd17dea2ce 100644 --- a/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h +++ b/DataFormats/Detectors/EMCAL/include/DataFormatsEMCAL/AnalysisCluster.h @@ -182,6 +182,9 @@ class AnalysisCluster float getCoreEnergy() const { return mCoreEnergy; } void setCoreEnergy(float energy) { mCoreEnergy = energy; } + float getFCross() const { return mFCross; } + void setFCross(float fCross) { mFCross = fCross; } + /// /// Returns TLorentzVector with momentum of the cluster. Only valid for clusters /// identified as photons or pi0 (overlapped gamma) produced on the vertex @@ -223,12 +226,13 @@ class AnalysisCluster float mTime = 0.; ///< Time of the digit/cell with maximal energy deposition bool mIsExotic = false; //!::buildCluster(int clusterIn float exoticTime = mInputsContainer[inputIndMax].getTimeStamp(); + float fCross = 0.; + try { - clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime)); + clusterAnalysis.setIsExotic(isExoticCell(towerId, inputEnergyMax, exoticTime, fCross)); + clusterAnalysis.setFCross(fCross); } catch (UninitLookUpTableException& e) { LOG(error) << e.what(); } @@ -253,7 +256,7 @@ void ClusterFactory::evalLocalPosition(gsl::span inputsInd clRmsXYZ[i] += (w * xyzi[i] * xyzi[i]); } } // w > 0 - } // dig loop + } // dig loop // cout << " wtot " << wtot << endl; @@ -600,7 +603,7 @@ std::tuple ClusterFactory::getMaximalEnergyI /// Look to cell neighbourhood and reject if it seems exotic //____________________________________________________________________________ template -bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime) const +bool ClusterFactory::isExoticCell(short towerId, float ecell, float const exoticTime, float& fCross) const { if (ecell < mExoticCellMinAmplitude) { return false; // do not reject low energy cells @@ -612,8 +615,9 @@ bool ClusterFactory::isExoticCell(short towerId, float ecell, float c } float eCross = getECross(towerId, ecell, exoticTime); + fCross = 1.f - eCross / ecell; - if (1 - eCross / ecell > mExoticCellFraction) { + if (fCross > mExoticCellFraction) { LOG(debug) << "EXOTIC CELL id " << towerId << ", eCell " << ecell << ", eCross " << eCross << ", 1-eCross/eCell " << 1 - eCross / ecell; return true; } From 762cef75f0f7ce4c23c81e78efb439c72bdc7970 Mon Sep 17 00:00:00 2001 From: Marvin Hemmer <53471402+mhemmer-cern@users.noreply.github.com> Date: Mon, 19 May 2025 14:30:02 +0200 Subject: [PATCH 0532/1914] [EMCAL-689] EMCAL/Geometry: Add function to load alignment matrix from CCDB (#14237) - Add function `SetMisalMatrixFromCcdb` to set the missalignment matrices for the EMCal via the CCDB. This way they are not laoded from the GeoManager. The function expects a path inside the ccdb, which by default is set to `"Users/m/mhemmer/EMCAL/Config/GeometryAligned"` and the timestamp. Currently in the CCDB we only have the old Run 2 alignment object. Once we have the new alignment objects we can uplaod them and via the correct time stamp load them fittingly. --- .../EMCAL/base/include/EMCALBase/Geometry.h | 9 +++++- Detectors/EMCAL/base/src/Geometry.cxx | 31 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h index 04dcaa3b802de..4d4a947de88ca 100644 --- a/Detectors/EMCAL/base/include/EMCALBase/Geometry.h +++ b/Detectors/EMCAL/base/include/EMCALBase/Geometry.h @@ -22,7 +22,9 @@ #include #include #include +#include +#include "CCDB/BasicCCDBManager.h" #include "DataFormatsEMCAL/Constants.h" #include "EMCALBase/GeometryBase.h" #include "MathUtils/Cartesian.h" @@ -57,7 +59,7 @@ class Geometry /// | EMCAL_COMPLETE12SMV1_DCAL | Full EMCAL, 10 DCAL Supermodules (not used in practice) | /// | EMCAL_COMPLETE12SMV1_DCAL_8SM | Full EMCAL, 8 DCAL Supermodules (run2) | /// | EMCAL_COMPLETE12SMV1_DCAL_DEV | Full EMCAL, DCAL development geometry (not used) | - Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); + explicit Geometry(const std::string_view name, const std::string_view mcname = "", const std::string_view mctitle = ""); /// \brief Copy constructor. Geometry(const Geometry& geom); @@ -564,6 +566,11 @@ class Geometry /// void SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const; + /// + /// Method to set shift-rotational matrixes from CCDB + /// + void SetMisalMatrixFromCcdb(const char* path = "Users/m/mhemmer/EMCAL/Config/GeometryAligned", int timestamp = 10000) const; + /// /// Transform clusters cell position into global with alternative method, taking into account the depth calculation. /// Input are: diff --git a/Detectors/EMCAL/base/src/Geometry.cxx b/Detectors/EMCAL/base/src/Geometry.cxx index 920dc24823e83..6eff6c161f2a1 100644 --- a/Detectors/EMCAL/base/src/Geometry.cxx +++ b/Detectors/EMCAL/base/src/Geometry.cxx @@ -8,16 +8,21 @@ // In applying this license CERN does not waive the privileges and immunities // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. +#include "EMCALBase/Geometry.h" + +#include + #include +#include +#include +#include +#include #include #include #include #include -#include - -#include "EMCALBase/Geometry.h" #include "EMCALBase/ShishKebabTrd1Module.h" #include @@ -1557,6 +1562,7 @@ const TGeoHMatrix* Geometry::GetMatrixForSuperModule(Int_t smod) const if (!SMODULEMATRIX[smod]) { if (gGeoManager) { + LOG(info) << "Loading EMCAL misalignment matrix for SM " << smod << " from GeoManager."; SetMisalMatrix(GetMatrixForSuperModuleFromGeoManager(smod), smod); } else { LOG(fatal) << "Cannot find EMCAL misalignment matrices! Recover them either: \n" @@ -1762,6 +1768,25 @@ void Geometry::SetMisalMatrix(const TGeoHMatrix* m, Int_t smod) const } } +void Geometry::SetMisalMatrixFromCcdb(const char* path, int timestamp) const +{ + LOG(info) << "Using CCDB to obtain EMCal alignment."; + o2::ccdb::CcdbApi api; + map metadata; // can be empty + api.init("http://alice-ccdb.cern.ch"); + TObjArray* matrices = api.retrieveFromTFileAny(path, metadata, timestamp); + + for (int iSM = 0; iSM < mNumberOfSuperModules; ++iSM) { + TGeoHMatrix* mat = reinterpret_cast(matrices->At(iSM)); + if (mat) { + + SetMisalMatrix(mat, iSM); + } else { + LOG(info) << "Could not obtain Alignment Matrix for SM " << iSM; + } + } +} + Bool_t Geometry::IsDCALSM(Int_t iSupMod) const { if (mEMCSMSystem[iSupMod] == DCAL_STANDARD || mEMCSMSystem[iSupMod] == DCAL_EXT) { From e2f0fa3f1da3ecba8e1d446de4fd456fdd61b359 Mon Sep 17 00:00:00 2001 From: Roman Lietava Date: Mon, 19 May 2025 18:55:40 +0200 Subject: [PATCH 0533/1914] fix: suppressing excesive output from raw decoder (#14291) * fix: suppressing excesive output from raw decoder * clang * fix * fix --- .../include/DataFormatsCTP/Configuration.h | 2 +- .../Detectors/CTP/src/Configuration.cxx | 12 ++++- .../CTPReconstruction/RawDataDecoder.h | 10 ++-- .../CTP/reconstruction/src/RawDataDecoder.cxx | 32 ++++++------ .../include/CTPWorkflow/RawDecoderSpec.h | 11 ++++ Detectors/CTP/workflow/src/RawDecoderSpec.cxx | 50 +++++++++++++------ 6 files changed, 79 insertions(+), 38 deletions(-) diff --git a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h index fdd73986f1eaf..e9464089d71fc 100644 --- a/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h +++ b/DataFormats/Detectors/CTP/include/DataFormatsCTP/Configuration.h @@ -160,7 +160,7 @@ class CTPConfiguration const std::vector& getCTPClasses() const { return mCTPClasses; } // Read-only interface uint64_t getInputMask(const std::string& name) const; int getInputIndex(const std::string& name) const; - std::string getClassNameFromIndex(int index) { return mCTPClasses[index].name; }; + std::string getClassNameFromIndex(int index); std::string getClassNameFromHWIndex(int index); const CTPClass* getCTPClassFromHWIndex(const int index) const; bool isMaskInInputs(const uint64_t& mask) const; diff --git a/DataFormats/Detectors/CTP/src/Configuration.cxx b/DataFormats/Detectors/CTP/src/Configuration.cxx index 38a49132db3d1..61e51bcb20d91 100644 --- a/DataFormats/Detectors/CTP/src/Configuration.cxx +++ b/DataFormats/Detectors/CTP/src/Configuration.cxx @@ -780,6 +780,15 @@ int CTPConfiguration::getInputIndex(const std::string& name) const LOG(info) << "input:" << name << " index:" << index; return index; } +std::string CTPConfiguration::getClassNameFromIndex(int index) +{ + if (index < (int)mCTPClasses.size()) { + return mCTPClasses[index].name; + } else { + std::string name = "Cls" + std::to_string(index); + return name; + } +}; std::string CTPConfiguration::getClassNameFromHWIndex(int index) { for (auto& cls : mCTPClasses) { @@ -924,8 +933,9 @@ uint64_t CTPConfiguration::getTriggerClassMaskWInputsNoTrgDets() const exclude += cls.name.find("EMC") != std::string::npos; exclude += cls.name.find("TRD") != std::string::npos; exclude += cls.name.find("HMP") != std::string::npos; - if (!exclude) + if (!exclude) { clsmask |= cls.classMask; + } } return clsmask; } diff --git a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h index 8ebc7e0304561..53addf32c538f 100644 --- a/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h +++ b/Detectors/CTP/reconstruction/include/CTPReconstruction/RawDataDecoder.h @@ -60,7 +60,8 @@ class RawDataDecoder std::array getClassErrorsB() { return mClassErrorsB; } std::array getClassCountersA() { return mClassCountersA; } std::array getClassCountersB() { return mClassCountersB; } - int getLostDueToShift() { return mLostDueToShift; } + int getLostDueToShiftCls() { return mLostDueToShiftCC; } + int getLostDueToShiftInp() { return mLostDueToShiftInps; } private: static constexpr uint32_t TF_TRIGGERTYPE_MASK = 0x800; @@ -80,8 +81,8 @@ class RawDataDecoder gbtword80_t mTVXMask = 0x4; // TVX is 3rd input gbtword80_t mVBAMask = 0x20; // VBA is 6 th input bool mVerbose = false; - uint32_t mIRRejected = 0; - uint32_t mTCRRejected = 0; + int mIRRejected = 0; + int mTCRRejected = 0; bool mPadding = true; uint32_t mTFOrbit = 0; std::vector mTFOrbits; @@ -94,7 +95,8 @@ class RawDataDecoder std::array mClassErrorsB{}; // from inputs std::array mClassCountersA{}; std::array mClassCountersB{}; // from inputs - int mLostDueToShift = 0; + int mLostDueToShiftCC = 0; + int mLostDueToShiftInps = 0; CTPConfiguration mCTPConfig; }; } // namespace ctp diff --git a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx index b216f5ec54570..a062a262acf62 100644 --- a/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx +++ b/Detectors/CTP/reconstruction/src/RawDataDecoder.cxx @@ -296,12 +296,12 @@ int RawDataDecoder::decodeRaw(o2::framework::InputRecord& inputs, std::vector& digitsMap, o2::pmr::vector& digits, uint32_t TFOrbit, uint64_t trgclassmask) { // int nClasswoInp = 0; // counting classes without input which should never happen + int lost = 0; std::map digitsMapShifted; auto L0shift = o2::ctp::TriggerOffsetsParam::Instance().LM_L0; auto L1shift = L0shift + o2::ctp::TriggerOffsetsParam::Instance().L0_L1; @@ -551,7 +554,7 @@ int RawDataDecoder::shiftInputs(std::map& digit if (lut == 0 || lut == 1) { // no inps or LM digitsMapShifted[dig.first] = dig.second; } else if (lut == 2) { // L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); if (dig.second.CTPClassMask.count()) { // LOG(error) << "Adding class mask without input ?"; // This is not needed as it can happen; Full checj done below - see next LOG(error) @@ -559,30 +562,30 @@ int RawDataDecoder::shiftInputs(std::map& digit digitsMapShifted[dig.first] = digi; } } else if (lut == 4) { // L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 6) { // L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); if (dig.second.CTPClassMask.count()) { CTPDigit digi = {dig.first, 0, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } } else if (lut == 3) { // LM and L0 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L0MASKInputs), dig.second.CTPClassMask}; // if LM level do not need to add class as LM is not shifted; digitsMapShifted[dig.first] = digi; } else if (lut == 5) { // LM and L1 - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmask & (~L1MASKInputs), dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else if (lut == 7) { // LM and L0 and L1 - shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); - shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L0shift, 0, digitsMapShifted); + lost += shiftNew(dig.first, TFOrbit, inpmask, L1shift, 1, digitsMapShifted); CTPDigit digi = {dig.first, inpmaskLM, dig.second.CTPClassMask}; digitsMapShifted[dig.first] = digi; } else { @@ -592,7 +595,7 @@ int RawDataDecoder::shiftInputs(std::map& digit for (auto const& dig : digitsMapShifted) { digits.push_back(dig.second); } - return 0; + return lost; } // int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, uint64_t trgclassmask, uint64_t trgclassmaskNoTrgDet) @@ -654,16 +657,13 @@ int RawDataDecoder::checkReadoutConsistentncy(o2::pmr::vector& digits, mClassErrorsB[cls.getIndex()]++; ret = 256; } else { - mLostDueToShift++; + mLostDueToShiftCC++; } } } } } } - if (mLostDueToShift) { - LOG(debug) << "LOST classes because of shift:" << mLostDueToShift; - } return ret; } // diff --git a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h index a5a1a75a0b594..3198e5c33e219 100644 --- a/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h +++ b/Detectors/CTP/workflow/include/CTPWorkflow/RawDecoderSpec.h @@ -74,6 +74,17 @@ class RawDecoderSpec : public framework::Task std::deque mHistoryT; std::deque mHistoryV; RawDataDecoder mDecoder; + // Errors + int mLostDueToShiftInps = 0; + int mErrorIR = 0; + int mErrorTCR = 0; + int mIRRejected = 0; + int mTCRRejected = 0; + std::array mClsEA{}; + std::array mClsEB{}; // from inputs + std::array mClsA{}; + std::array mClsB{}; // from inputs + bool mCheckConsistency = false; }; /// \brief Creating DataProcessorSpec for the CTP diff --git a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx index 2df6bc981ce44..041e6cb472ebb 100644 --- a/Detectors/CTP/workflow/src/RawDecoderSpec.cxx +++ b/Detectors/CTP/workflow/src/RawDecoderSpec.cxx @@ -26,6 +26,8 @@ using namespace o2::ctp::reco_workflow; void RawDecoderSpec::init(framework::InitContext& ctx) { + mCheckConsistency = ctx.options().get("check-consistency"); + mDecoder.setCheckConsistency(mCheckConsistency); mDecodeinputs = ctx.options().get("ctpinputs-decoding"); mDecoder.setDecodeInps(mDecodeinputs); mNTFToIntegrate = ctx.options().get("ntf-to-average"); @@ -43,7 +45,7 @@ void RawDecoderSpec::init(framework::InitContext& ctx) mOutputLumiInfo.inp2 = inp2; mMaxInputSize = ctx.options().get("max-input-size"); mMaxInputSizeFatal = ctx.options().get("max-input-size-fatal"); - LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal; + LOG(info) << "CTP reco init done. Inputs decoding here:" << mDecodeinputs << " DoLumi:" << mDoLumi << " DoDigits:" << mDoDigits << " NTF:" << mNTFToIntegrate << " Lumi inputs:" << lumiinp1 << ":" << inp1 << " " << lumiinp2 << ":" << inp2 << " Max errors:" << maxerrors << " Max input size:" << mMaxInputSize << " MaxInputSizeFatal:" << mMaxInputSizeFatal << " CheckConsistency:" << mCheckConsistency; // mOutputLumiInfo.printInputs(); } void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) @@ -69,22 +71,22 @@ void RawDecoderSpec::endOfStream(framework::EndOfStreamContext& ec) o0 = TFOrbits[i]; } std::cout << std::endl; - LOG(info) << " Lost due to the shift:" << mDecoder.getLostDueToShift(); - LOG(info) << "Number of missing TF:" << nmiss << std::endl; - if (mDecoder.getErrorIR() || mDecoder.getErrorTCR()) { - LOG(error) << "# of IR errors:" << mDecoder.getErrorIR() << " TCR errors:" << mDecoder.getErrorTCR() << std::endl; + LOG(info) << "Number of non continous TF:" << nmiss << std::endl; + LOG(info) << "Lost in shiftInputs:" << mLostDueToShiftInps; + LOG(info) << "Lost in addDigit Inputs:" << mIRRejected << " Classes:" << mTCRRejected; + if (mErrorIR || mErrorTCR) { + LOG(error) << "# of IR errors:" << mErrorIR << " TCR errors:" << mErrorTCR << std::endl; } - std::array clsA = mDecoder.getClassCountersA(); - std::array clsB = mDecoder.getClassCountersB(); - std::array clsEA = mDecoder.getClassErrorsA(); - std::array clsEB = mDecoder.getClassErrorsB(); - - for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { - bool print = clsA[i] > 0 || clsB[i] > 0 || clsEA[i] > 0 || clsEB[i] > 0; - if (clsEA[i]) { - LOG(error) << " Class without inputs:"; + if (mCheckConsistency) { + LOG(info) << "Lost due to the shift Consistency Checker:" << mDecoder.getLostDueToShiftCls(); + auto ctpcfg = mDecoder.getCTPConfig(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + std::string name = ctpcfg.getClassNameFromIndex(i); + if (mClsEA[i]) { + LOG(error) << " Class without inputs:"; + } + LOG(important) << "CLASS:" << name << ":" << i << " Cls=>Inp:" << mClsA[i] << " Inp=>Cls:" << mClsB[i] << " ErrorsCls=>Inps:" << mClsEA[i] << " MissingInps=>Cls:" << mClsEB[i]; } - LOG(important) << "CLASS:" << i << " Cls=>Inp:" << clsA[i] << " Inp=>Cls:" << clsB[i] << " ErrorsCls=>Inps:" << clsEA[i] << " MissingInps=>Cls:" << clsEB[i]; } } void RawDecoderSpec::run(framework::ProcessingContext& ctx) @@ -161,6 +163,21 @@ void RawDecoderSpec::run(framework::ProcessingContext& ctx) if (mDoDigits) { LOG(info) << "[CTPRawToDigitConverter - run] Writing " << mOutputDigits.size() << " digits. IR rejected:" << mDecoder.getIRRejected() << " TCR rejected:" << mDecoder.getTCRRejected(); ctx.outputs().snapshot(o2::framework::Output{"CTP", "DIGITS", 0}, mOutputDigits); + mLostDueToShiftInps += mDecoder.getLostDueToShiftInp(); + mErrorIR += mDecoder.getErrorIR(); + mErrorTCR += mDecoder.getErrorTCR(); + mIRRejected += mDecoder.getIRRejected(); + mTCRRejected += mDecoder.getTCRRejected(); + auto clsEA = mDecoder.getClassErrorsA(); + auto clsEB = mDecoder.getClassErrorsB(); + auto cntCA = mDecoder.getClassCountersA(); + auto cntCB = mDecoder.getClassCountersB(); + for (int i = 0; i < o2::ctp::CTP_NCLASSES; i++) { + mClsEA[i] += clsEA[i]; + mClsEB[i] += clsEB[i]; + mClsA[i] += cntCA[i]; + mClsB[i] += cntCB[i]; + } } if (mDoLumi) { uint32_t tfCountsT = 0; @@ -236,7 +253,8 @@ o2::framework::DataProcessorSpec o2::ctp::reco_workflow::getRawDecoderSpec(bool {"lumi-inp2", o2::framework::VariantType::String, "VBA", {"The second input used for online lumi. Name in capital."}}, {"use-verbose-mode", o2::framework::VariantType::Bool, false, {"Verbose logging"}}, {"max-input-size", o2::framework::VariantType::Int, 0, {"Do not process input if bigger than max size, 0 - do not check"}}, - {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error on;y"}}, + {"max-input-size-fatal", o2::framework::VariantType::Bool, false, {"If true issue fatal error otherwise error only"}}, + {"check-consistency", o2::framework::VariantType::Bool, false, {"If true checks digits consistency using ctp config"}}, {"ctpinputs-decoding", o2::framework::VariantType::Bool, false, {"Inputs alignment: true - raw decoder - has to be compatible with CTF decoder: allowed options: 10,01,00"}}}}; } void RawDecoderSpec::updateTimeDependentParams(framework::ProcessingContext& pc) From 769ba3364776be99859990b6523814297d207aec Mon Sep 17 00:00:00 2001 From: swenzel Date: Mon, 19 May 2025 17:58:22 +0200 Subject: [PATCH 0534/1914] Fix units for GeneratorFromEventPool --- Generators/src/GeneratorFromFile.cxx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Generators/src/GeneratorFromFile.cxx b/Generators/src/GeneratorFromFile.cxx index e37a3886c24e1..66f7e03a4cf15 100644 --- a/Generators/src/GeneratorFromFile.cxx +++ b/Generators/src/GeneratorFromFile.cxx @@ -385,6 +385,11 @@ GeneratorFromEventPool::GeneratorFromEventPool(EventPoolGenConfig const& pars) : bool GeneratorFromEventPool::Init() { + // this simply passes tracks trough. Leave units intact. + setTimeUnit(1.); + setPositionUnit(1.); + setEnergyUnit(1.); + // initialize the event pool if (mConfig.rngseed > 0) { mRandomEngine.seed(mConfig.rngseed); @@ -588,4 +593,4 @@ std::vector GeneratorFromEventPool::setupFileUniverse(std::string c ClassImp(o2::eventgen::GeneratorFromEventPool); ClassImp(o2::eventgen::GeneratorFromFile); -ClassImp(o2::eventgen::GeneratorFromO2Kine); \ No newline at end of file +ClassImp(o2::eventgen::GeneratorFromO2Kine); From 0a9fbfa7809b174632895e1e804ab0ae42c0e2f3 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 10:39:14 +0200 Subject: [PATCH 0535/1914] DPL Analysis: fix ineffective function for Builds<> (#14297) --- Framework/Core/include/Framework/AnalysisManagers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index 1d894b2b67948..dfec2256875c9 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -294,7 +294,7 @@ bool prepareOutput(ProcessingContext& context, T& spawns) } template -bool prepareOuput(ProcessingContext& context, T& builds) +bool prepareOutput(ProcessingContext& context, T& builds) { using metadata = o2::aod::MetadataTrait>::metadata; return builds.template build(builds.pack(), extractOriginals(context)); From 128a030847822127c42ba6e92f606d2f87b55409 Mon Sep 17 00:00:00 2001 From: Sergio Garcia <47090312+singiamtel@users.noreply.github.com> Date: Tue, 20 May 2025 11:19:39 +0200 Subject: [PATCH 0536/1914] Github Actions: Setup dependabot (#14292) Related: https://github.com/AliceO2Group/O2Physics/pull/10660 --- .github/dependabot.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000000..30ad6d8f005b3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +--- +# Dependabot configuration +# Reference: https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "github-actions" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" From dbf8b73a42164b608f91ed2095f8550788672dea Mon Sep 17 00:00:00 2001 From: Daniel Battistini <60930860+danielbattistini@users.noreply.github.com> Date: Tue, 20 May 2025 18:06:53 +0200 Subject: [PATCH 0537/1914] Fix the thickness of logical volumes for kTurboStaves and kStaggered configurations of the tracker (#14268) --- .../TRK/simulation/include/TRKSimulation/TRKLayer.h | 3 +++ Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h index ef355ec36ce2f..ba894f6d7a92b 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/include/TRKSimulation/TRKLayer.h @@ -45,6 +45,9 @@ class TRKLayer void createLayer(TGeoVolume* motherVolume); private: + // TGeo objects outside logical volumes can cause errors. Only used in case of kStaggered and kTurboStaves layouts + static constexpr float mLogicalVolumeThickness = 1; + int mLayerNumber; std::string mLayerName; float mInnerRadius; diff --git a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx index e6b00f6e96425..a95418afbba25 100644 --- a/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx +++ b/Detectors/Upgrades/ALICE3/TRK/simulation/src/TRKLayer.cxx @@ -120,7 +120,7 @@ TGeoVolume* TRKLayer::createStave(std::string type, double width) staveVol->AddNode(chipVol, 1, nullptr); } else if (type == "staggered") { double width = mModuleWidth * 2; // Each stave has two modules (based on the LOI design) - stave = new TGeoBBox(width / 2, mChipThickness / 2, mZ / 2); + stave = new TGeoBBox(width / 2, mLogicalVolumeThickness / 2, mZ / 2); TGeoVolume* chipVolLeft = createChip("flat", mModuleWidth); TGeoVolume* chipVolRight = createChip("flat", mModuleWidth); staveVol = new TGeoVolume(staveName.c_str(), stave, medAir); @@ -152,7 +152,11 @@ void TRKLayer::createLayer(TGeoVolume* motherVolume) chipName = o2::trk::GeometryTGeo::getTRKChipPattern() + std::to_string(mLayerNumber), sensName = Form("%s%d", GeometryTGeo::getTRKSensorPattern(), mLayerNumber); - TGeoTube* layer = new TGeoTube(mInnerRadius, mInnerRadius + mChipThickness, mZ / 2); + double layerThickness = mChipThickness; + if (mLayout != eLayout::kCylinder) { + layerThickness = mLogicalVolumeThickness; + } + TGeoTube* layer = new TGeoTube(mInnerRadius - 0.333 * layerThickness, mInnerRadius + 0.667 * layerThickness, mZ / 2); TGeoVolume* layerVol = new TGeoVolume(mLayerName.c_str(), layer, medAir); layerVol->SetLineColor(kYellow); From 930d83707083b9916ee60c18fa1680e75f8ff5f8 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Tue, 20 May 2025 19:27:52 +0200 Subject: [PATCH 0538/1914] DPL Analysis: move spawner caches outside of the spawner function (#14281) --- .../Core/include/Framework/AnalysisHelpers.h | 12 ++++- .../Core/include/Framework/AnalysisManagers.h | 14 ++++- .../Core/include/Framework/TableBuilder.h | 53 +++++-------------- Framework/Core/src/AODReaderHelpers.cxx | 8 ++- Framework/Core/test/test_TableSpawner.cxx | 7 ++- 5 files changed, 45 insertions(+), 49 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisHelpers.h b/Framework/Core/include/Framework/AnalysisHelpers.h index 55d2490dff1bc..95be6c7e407b3 100644 --- a/Framework/Core/include/Framework/AnalysisHelpers.h +++ b/Framework/Core/include/Framework/AnalysisHelpers.h @@ -297,6 +297,7 @@ struct Spawns : decltype(transformBase()) { using extension_t = typename metadata::extension_table_t; using base_table_t = typename metadata::base_table_t; using expression_pack_t = typename metadata::expression_pack_t; + static constexpr size_t N = framework::pack_size(expression_pack_t{}); constexpr auto pack() { @@ -318,7 +319,13 @@ struct Spawns : decltype(transformBase()) { } std::shared_ptr table = nullptr; std::shared_ptr extension = nullptr; + std::array projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (expression_pack_t{}); std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); }; template @@ -365,6 +372,7 @@ struct Defines : decltype(transformBase()) { std::array projectors; std::shared_ptr projector = nullptr; + std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); }; template @@ -828,8 +836,10 @@ template auto Extend(T const& table) { using output_t = Join, o2::aod::Hash<"JOIN/0"_h>, o2::aod::Hash<"JOIN"_h>, Cs...>>; + static std::array projectors{{std::move(Cs::Projector())...}}; static std::shared_ptr projector = nullptr; - return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projector), table.asArrowTable()}, 0}; + static auto schema = std::make_shared(o2::soa::createFieldsFromColumns(framework::pack{})); + return output_t{{o2::framework::spawner(framework::pack{}, {table.asArrowTable()}, "dynamicExtension", projectors.data(), projector, schema), table.asArrowTable()}, 0}; } /// Template function to attach dynamic columns on-the-fly (e.g. inside diff --git a/Framework/Core/include/Framework/AnalysisManagers.h b/Framework/Core/include/Framework/AnalysisManagers.h index dfec2256875c9..2a052c0b07218 100644 --- a/Framework/Core/include/Framework/AnalysisManagers.h +++ b/Framework/Core/include/Framework/AnalysisManagers.h @@ -287,8 +287,13 @@ bool prepareOutput(ProcessingContext& context, T& spawns) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - spawns.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), spawns.projector)); + spawns.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + spawns.projectors.data(), + spawns.projector, + spawns.schema)); spawns.table = std::make_shared(soa::ArrowHelpers::joinTables({spawns.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } @@ -309,8 +314,13 @@ bool prepareOutput(ProcessingContext& context, T& defines) using base_table_t = typename T::base_table_t::table_t; originalTable = makeEmptyTable(o2::aod::label()); } + using D = o2::aod::Hash; - defines.extension = std::make_shared(o2::framework::spawner>(originalTable, o2::aod::label(), defines.projectors.data(), defines.projector)); + defines.extension = std::make_shared(o2::framework::spawner(originalTable, + o2::aod::label(), + defines.projectors.data(), + defines.projector, + defines.schema)); defines.table = std::make_shared(soa::ArrowHelpers::joinTables({defines.extension->asArrowTable(), originalTable}, std::span{T::spawnable_t::table_t::originalLabels})); return true; } diff --git a/Framework/Core/include/Framework/TableBuilder.h b/Framework/Core/include/Framework/TableBuilder.h index e2d12789ef922..f941bf29bd8c8 100644 --- a/Framework/Core/include/Framework/TableBuilder.h +++ b/Framework/Core/include/Framework/TableBuilder.h @@ -768,80 +768,51 @@ std::shared_ptr spawnerHelper(std::shared_ptr const& /// Expression-based column generator to materialize columns template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, placeholders_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); } template requires(soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, o2::framework::expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using placeholders_pack_t = typename o2::aod::MetadataTrait::metadata::placeholders_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, placeholders_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(placeholders_pack_t{})); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(placeholders_pack_t{}), projectors, name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(std::shared_ptr const& fullTable, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, expression_pack_t{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, framework::pack_size(expression_pack_t{}), projectors, name, projector); } template requires(soa::has_extension::metadata> && !soa::has_configurable_extension::metadata>) -auto spawner(std::shared_ptr const& fullTable, const char* name, std::shared_ptr& projector) +auto spawner(std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { - using expression_pack_t = typename o2::aod::MetadataTrait::metadata::expression_pack_t; - if (fullTable->num_rows() == 0) { - return makeEmptyTable(name, expression_pack_t{}); - } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(expression_pack_t{})); - auto projectors = [](framework::pack) -> std::array - { - return {{std::move(C::Projector())...}}; - } - (expression_pack_t{}); - - return spawnerHelper(fullTable, new_schema, framework::pack_size(expression_pack_t{}), projectors.data(), name, projector); + auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{o2::aod::MetadataTrait::metadata::base_table_t::originalLabels}); + return spawner(fullTable, name, projectors, projector, schema); } template -auto spawner(framework::pack columns, std::vector>&& tables, const char* name, std::shared_ptr& projector) +auto spawner(framework::pack, std::vector>&& tables, const char* name, expressions::Projector* projectors, std::shared_ptr& projector, std::shared_ptr const& schema) { std::array labels{"original"}; auto fullTable = soa::ArrowHelpers::joinTables(std::move(tables), std::span{labels}); if (fullTable->num_rows() == 0) { return makeEmptyTable(name, framework::pack{}); } - static auto new_schema = std::make_shared(o2::soa::createFieldsFromColumns(columns)); - std::array projectors{{std::move(C::Projector())...}}; - return spawnerHelper(fullTable, new_schema, sizeof...(C), projectors.data(), name, projector); + return spawnerHelper(fullTable, schema, sizeof...(C), projectors, name, projector); } template diff --git a/Framework/Core/src/AODReaderHelpers.cxx b/Framework/Core/src/AODReaderHelpers.cxx index c413f2520919d..4dbd2877476be 100644 --- a/Framework/Core/src/AODReaderHelpers.cxx +++ b/Framework/Core/src/AODReaderHelpers.cxx @@ -158,7 +158,13 @@ auto make_spawn(InputSpec const& input, ProcessingContext& pc) using metadata_t = o2::aod::MetadataTrait::metadata; constexpr auto sources = metadata_t::sources; static std::shared_ptr projector = nullptr; - return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projector); + static std::shared_ptr schema = std::make_shared(o2::soa::createFieldsFromColumns(typename metadata_t::expression_pack_t{})); + static auto projectors = [](framework::pack) -> std::array + { + return {{std::move(C::Projector())...}}; + } + (typename metadata_t::expression_pack_t{}); + return o2::framework::spawner(extractOriginals(pc), input.binding.c_str(), projectors.data(), projector, schema); } } // namespace diff --git a/Framework/Core/test/test_TableSpawner.cxx b/Framework/Core/test/test_TableSpawner.cxx index 2291ba5f4f787..e200adf37ccb4 100644 --- a/Framework/Core/test/test_TableSpawner.cxx +++ b/Framework/Core/test/test_TableSpawner.cxx @@ -50,10 +50,9 @@ TEST_CASE("TestTableSpawner") auto t1 = b1.finalize(); Points st1{t1}; - std::shared_ptr projector = nullptr; - auto expoints_a = o2::soa::Extend(st1); - auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, projector)}; + Spawns s; + auto extension = ExPointsExtension{o2::framework::spawner>(t1, o2::aod::Hash<"ExPoints"_h>::str, s.projectors.data(), s.projector, s.schema)}; auto expoints = ExPoints{{t1, extension.asArrowTable()}, 0}; REQUIRE(expoints_a.size() == 9); @@ -81,7 +80,7 @@ TEST_CASE("TestTableSpawner") Defines excpts; excpts.projectors[0] = test::x * test::x + test::y * test::y + test::z * test::z; - auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector)}; + auto extension_2 = ExcPointsCfgExtension{o2::framework::spawner>({t1}, o2::aod::Hash<"ExcPoints"_h>::str, excpts.projectors.data(), excpts.projector, excpts.schema)}; auto excpoints = ExcPoints{{t1, extension_2.asArrowTable()}, 0}; rex = extension.begin(); From 23781677b66c802d8f8ea8e2dbb390b425d91bec Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 23:59:09 +0200 Subject: [PATCH 0539/1914] GPU TPC NN Clusterizer: Fix compilation without ONNX --- GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx index 6c4e60a6025e1..64e6f5a31aaa7 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx @@ -1198,6 +1198,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) } } for (int32_t i = 0; i < GetProcessingSettings().nTPCClustererLanes; i++) { +#ifdef GPUCA_HAS_ONNX if (GetProcessingSettings().nn.applyNNclusterizer) { LOG(info) << "(ORT) Environment releasing..."; GPUTPCNNClusterizerHost& nnApplication = nnApplications[i]; @@ -1205,6 +1206,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput) nnApplication.mModelReg1.release(true); nnApplication.mModelReg2.release(true); } +#endif if (transferRunning[i]) { ReleaseEvent(mEvents->stream[i], doGPU); } From 546f79341f0407ed993b08046b97ef3f7a34e8cd Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 16:06:32 +0200 Subject: [PATCH 0540/1914] DPL: fix error reporting (#14306) Any oldest possible timeframe message was accounted as error. Maybe we should simply drop the metric... --- Framework/Core/src/DataProcessingDevice.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/src/DataProcessingDevice.cxx b/Framework/Core/src/DataProcessingDevice.cxx index ae25d8d3a915c..c303af4858234 100644 --- a/Framework/Core/src/DataProcessingDevice.cxx +++ b/Framework/Core/src/DataProcessingDevice.cxx @@ -2107,7 +2107,7 @@ void DataProcessingDevice::handleData(ServiceRegistryRef ref, InputChannelInfo& LOGP(debug, "Got DomainInfoHeader, new oldestPossibleTimeslice {} on channel {}", oldestPossibleTimeslice, info.id.value); parts.At(headerIndex).reset(nullptr); parts.At(payloadIndex).reset(nullptr); - } + } break; case InputType::Invalid: { reportError("Invalid part found."); } break; From 981cd40881706e1cf56a2e99d2778c683835153f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Tue, 20 May 2025 16:07:57 +0200 Subject: [PATCH 0541/1914] GPU: Add possibility to dump raw data in case of error --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 + GPU/GPUTracking/Base/GPUReconstruction.h | 11 + .../Base/GPUReconstructionDebug.cxx | 188 ++++++++++++++++++ .../Base/GPUReconstructionLibrary.cxx | 2 +- GPU/GPUTracking/CMakeLists.txt | 1 + GPU/GPUTracking/Definitions/GPUSettingsList.h | 8 + GPU/GPUTracking/Global/GPUChainTracking.cxx | 28 ++- GPU/GPUTracking/Global/GPUChainTracking.h | 7 +- .../GPUChainTrackingDebugAndProfiling.cxx | 24 ++- GPU/GPUTracking/Global/GPUChainTrackingIO.cxx | 81 ++++---- GPU/GPUTracking/Global/GPUErrorCodes.h | 1 + GPU/GPUTracking/Global/GPUErrors.cxx | 8 +- GPU/GPUTracking/Global/GPUErrors.h | 2 +- 13 files changed, 313 insertions(+), 50 deletions(-) create mode 100644 GPU/GPUTracking/Base/GPUReconstructionDebug.cxx diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index c76bf11c3e25d..a4e5d5e1189f5 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -193,6 +193,7 @@ int32_t GPUReconstruction::Init() } mSlaves[i]->ClearAllocatedMemory(); } + debugInit(); return 0; } @@ -469,6 +470,7 @@ int32_t GPUReconstruction::Exit() if (mInitialized) { ExitDevice(); } + debugExit(); mInitialized = false; return 0; } diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index d5c0b8e828087..e0c866fd9421b 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,9 @@ class GPUReconstruction virtual void PrintKernelOccupancies() {} double GetStatKernelTime() { return mStatKernelTime; } double GetStatWallTime() { return mStatWallTime; } + void setDebugDumpCallback(std::function&& callback = std::function(nullptr)); + bool triggerDebugDump(); + std::string getDebugFolder(const std::string& prefix = ""); // empty string = no debug // Threading std::shared_ptr mThreading; @@ -407,6 +411,13 @@ class GPUReconstruction }; static std::shared_ptr sLibCUDA, sLibHIP, sLibOCL; + // Debugging + struct debugInternal; + static std::unique_ptr mDebugData; + bool mDebugEnabled = false; + void debugInit(); + void debugExit(); + static GPUReconstruction* GPUReconstruction_Create_CPU(const GPUSettingsDeviceBackend& cfg); }; diff --git a/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx new file mode 100644 index 0000000000000..c1c31eedde1b2 --- /dev/null +++ b/GPU/GPUTracking/Base/GPUReconstructionDebug.cxx @@ -0,0 +1,188 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file GPUReconstructionDebug.cxx +/// \author David Rohr + +#include "GPUReconstruction.h" +#include "GPULogging.h" +#include "GPUSettings.h" + +#include +#include +#include +#include +#include +#include +#include + +using namespace o2::gpu; + +struct GPUReconstruction::debugInternal { + std::function signalCallback; + std::function debugCallback = nullptr; + std::function reinstallCallback = nullptr; + std::unordered_map oldActions; + size_t debugCount = 0; + static void globalCallback(int32_t signal, siginfo_t* info, void* ucontext) + { + GPUReconstruction::mDebugData->signalCallback(signal, info, ucontext); + } +}; + +std::unique_ptr GPUReconstruction::mDebugData; + +void GPUReconstruction::debugInit() +{ + if (GetProcessingSettings().debugOnFailure) { + static std::mutex initMutex; + { + std::lock_guard guard(initMutex); + if (mDebugData) { + GPUFatal("Error handlers for debug dumps already set, cannot set them again"); + } + mDebugData = std::make_unique(); + } + mDebugEnabled = true; + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 2)) { + struct sigaction sa, oldsa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = GPUReconstruction::debugInternal::globalCallback; + sa.sa_flags = SA_SIGINFO; + uint32_t mask = GetProcessingSettings().debugOnFailureSignalMask == (uint32_t)-1 ? ((1 << SIGINT) | (1 << SIGABRT) | (1 << SIGBUS) | (1 << SIGTERM) | (1 << SIGSEGV)) : GetProcessingSettings().debugOnFailureSignalMask; + if (mask) { + for (uint32_t i = 0; i < sizeof(mask) * 8; i++) { + if (mask & (1 << i)) { + if (sigaction(i, &sa, &oldsa)) { + GPUFatal("Error installing signal handler for error dump on signal %d", i); + } + mDebugData->oldActions.emplace(i, oldsa); + } + } + } + + mDebugData->signalCallback = [this, &oldActions = mDebugData->oldActions, myAction = std::move(sa)](int32_t signal, siginfo_t* info, void* ucontext) { + static std::mutex callbackMutex; + std::lock_guard guard(callbackMutex); + if (mDebugData->debugCallback) { + GPUInfo("Running debug callback for signal %d", signal); + mDebugData->debugCallback(); + mDebugData->debugCount++; + } + mDebugData->debugCallback = nullptr; + if (!GetProcessingSettings().debugOnFailureNoForwardSignal) { + sigaction(signal, &oldActions[signal], nullptr); + raise(signal); + mDebugData->reinstallCallback = [signal, myAction]() { sigaction(signal, &myAction, nullptr); }; + } + }; + } + } +} + +void GPUReconstruction::debugExit() +{ + if (!mDebugEnabled) { + return; + } + if (mDebugData) { + for (auto& it : mDebugData->oldActions) { + if (sigaction(it.first, &it.second, nullptr)) { + GPUFatal("Error restoring signal handler for signal %d", it.first); + } + } + } + mDebugEnabled = false; +} + +void GPUReconstruction::setDebugDumpCallback(std::function&& callback) +{ + if (mMaster) { + if (mDebugData->reinstallCallback) { + mDebugData->reinstallCallback(); + mDebugData->reinstallCallback = nullptr; + } + mMaster->setDebugDumpCallback(std::move(callback)); + } else if (mDebugEnabled && mDebugData) { + mDebugData->debugCallback = callback; + } +} + +std::string GPUReconstruction::getDebugFolder(const std::string& prefix) +{ + const std::filesystem::path target_dir = GetProcessingSettings().debugOnFailureDirectory; + + std::size_t total_size = 0; + std::size_t subfolder_count = 0; + + if (!std::filesystem::exists(target_dir) || !std::filesystem::is_directory(target_dir)) { + GPUError("Invalid debugOnFailureDirectory %s", GetProcessingSettings().debugOnFailureDirectory.c_str()); + return ""; + } + + for (const auto& entry : std::filesystem::directory_iterator(target_dir)) { + if (entry.is_directory()) { + subfolder_count++; + + for (const auto& subentry : std::filesystem::directory_iterator(entry.path())) { + if (subentry.is_regular_file()) { + std::error_code ec; + auto size = std::filesystem::file_size(subentry.path(), ec); + if (!ec) { + total_size += size; + } + } + } + } + } + + if ((GetProcessingSettings().debugOnFailureMaxFiles && subfolder_count >= GetProcessingSettings().debugOnFailureMaxFiles) || (GetProcessingSettings().debugOnFailureMaxSize && (total_size >> 30) >= GetProcessingSettings().debugOnFailureMaxSize)) { + GPUError("Cannot store debug dump files, target storage exceeded: %zu dumps, %zu bytes", subfolder_count, total_size); + return ""; + } + + auto currentTime = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + std::ostringstream dateTime; + dateTime << std::put_time(std::localtime(¤tTime), "%Y-%m-%d_%H-%M-%S"); + + int32_t attempt = 0; + std::string outname; + while (true) { + if (attempt++ >= 512) { + GPUError("Error creating debug dump folder"); + return ""; + } + + outname = GetProcessingSettings().debugOnFailureDirectory + "/debug_" + prefix + (prefix == "" ? "" : "_") + dateTime.str() + "_" + std::to_string(attempt); + std::error_code ec; + bool created = std::filesystem::create_directory(outname, ec); + if (!ec && created) { + break; + } + } + + GPUInfo("Debug dump to %s", outname.c_str()); + return outname; +} + +bool GPUReconstruction::triggerDebugDump() +{ + if (mMaster) { + return mMaster->triggerDebugDump(); + } else if (mDebugEnabled && mDebugData && mDebugData->debugCallback) { + GPUInfo("Running triggered debug callback"); + mDebugData->debugCallback(); + mDebugData->debugCount++; + mDebugData->debugCallback = nullptr; + return true; + } + return false; +} diff --git a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx index 89517c612403b..64184dd724acd 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionLibrary.cxx @@ -9,7 +9,7 @@ // granted to it by virtue of its status as an Intergovernmental Organization // or submit itself to any jurisdiction. -/// \file GPUReconstruction.cxx +/// \file GPUReconstructionLibrary.cxx /// \author David Rohr #ifdef _WIN32 diff --git a/GPU/GPUTracking/CMakeLists.txt b/GPU/GPUTracking/CMakeLists.txt index 52848692e7516..1b108bc74190d 100644 --- a/GPU/GPUTracking/CMakeLists.txt +++ b/GPU/GPUTracking/CMakeLists.txt @@ -95,6 +95,7 @@ set(SRCS_NO_CINT set(SRCS_NO_H SectorTracker/GPUTPCTrackerDump.cxx Merger/GPUTPCGMMergerDump.cxx Base/GPUReconstructionLibrary.cxx + Base/GPUReconstructionDebug.cxx Global/GPUChainTrackingClusterizer.cxx Global/GPUChainTrackingTransformation.cxx Global/GPUChainTrackingTRD.cxx diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index b9be1db881816..12f40cda4c398 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -360,6 +360,14 @@ AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options") AddOption(printSettings, bool, false, "", 0, "Print all settings when initializing") AddOption(tpcFreeAllocatedMemoryAfterProcessing, bool, false, "", 0, "Clean all memory allocated by TPC when TPC processing done, only data written to external output resources will remain") +AddOption(debugOnFailure, int32_t, 0, "", 0, "Dump raw data in case an error occured, bit 1 enables all dumps, otherwise bitmask for: 2 = signal, 3 = GPUErrorCode", def(1)) +AddOption(debugOnFailureSignalMask, uint32_t, (uint32_t)-1, "", 0, "Mask of signals that trigger debug / dump") +AddOption(debugOnFailureErrorMask, uint64_t, (uint64_t)-1, "", 0, "Mask of GPUCA_ERRORS that trigger debug / dump") +AddOption(debugOnFailureNoForwardSignal, bool, false, "", 0, "Do not forward signal to original signal handler") +AddOption(debugOnFailureMaxN, uint32_t, 1, "", 0, "Max number of times to run the debug / dump") +AddOption(debugOnFailureMaxFiles, uint32_t, 0, "", 0, "Max number of files to have in the target folder") +AddOption(debugOnFailureMaxSize, uint32_t, 0, "", 0, "Max size of existing dumps in the target folder in GB") +AddOption(debugOnFailureDirectory, std::string, ".", "", 0, "Target folder for debug / dump") AddVariable(eventDisplay, o2::gpu::GPUDisplayFrontendInterface*, nullptr) AddSubConfig(GPUSettingsProcessingRTC, rtc) AddSubConfig(GPUSettingsProcessingRTCtechnical, rtctech) diff --git a/GPU/GPUTracking/Global/GPUChainTracking.cxx b/GPU/GPUTracking/Global/GPUChainTracking.cxx index c1c3e368ce90c..db84050772312 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.cxx +++ b/GPU/GPUTracking/Global/GPUChainTracking.cxx @@ -705,10 +705,14 @@ int32_t GPUChainTracking::RunChain() } mRec->getGeneralStepTimer(GeneralStep::Prepare).Stop(); - PrepareDebugOutput(); + PrepareKernelDebugOutput(); SynchronizeStream(0); // Synchronize all init copies that might be ongoing + if (GetProcessingSettings().debugOnFailure) { + mRec->setDebugDumpCallback([this]() { DoDebugRawDump(); }); + } + if (mIOPtrs.tpcCompressedClusters) { if (runRecoStep(RecoStep::TPCDecompression, &GPUChainTracking::RunTPCDecompression)) { return 1; @@ -775,7 +779,7 @@ int32_t GPUChainTracking::RunChain() } int32_t retVal = 0; - if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { + if (CheckErrorCodes(false, false, mRec->getErrorCodeOutput())) { // TODO: Eventually, we should use GPUReconstruction::CheckErrorCodes retVal = 3; if (!GetProcessingSettings().ignoreNonFatalGPUErrors) { return retVal; @@ -815,7 +819,7 @@ int32_t GPUChainTracking::RunChainFinalize() PrintOutputStat(); } - PrintDebugOutput(); + PrintKernelDebugOutput(); // PrintMemoryRelations(); @@ -884,6 +888,7 @@ int32_t GPUChainTracking::FinalizePipelinedProcessing() int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, std::vector>* fillErrors) { int32_t retVal = 0; + bool hasDebugError = false; for (int32_t i = 0; i < 1 + (!cpuOnly && mRec->IsGPU()); i++) { if (i) { const auto& threadContext = GetThreadContext(); @@ -925,9 +930,26 @@ int32_t GPUChainTracking::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, st fillErrors->emplace_back(std::array{pErrors[4 * j], pErrors[4 * j + 1], pErrors[4 * j + 2], pErrors[4 * j + 3]}); } } + if ((GetProcessingSettings().debugOnFailure & 1) || (GetProcessingSettings().debugOnFailure & 4)) { + if (GetProcessingSettings().debugOnFailureErrorMask == (uint64_t)-1) { + hasDebugError = true; + } else { + uint32_t nErrors = processors()->errorCodes.getNErrors(); + const uint32_t* pErrors = processors()->errorCodes.getErrorPtr(); + for (uint32_t j = 0; j < nErrors; j++) { + if (GetProcessingSettings().debugOnFailureErrorMask & (1 << pErrors[4 * j])) { + hasDebugError = true; + break; + } + } + } + } } } ClearErrorCodes(cpuOnly); + if (hasDebugError) { + mRec->triggerDebugDump(); + } return retVal; } diff --git a/GPU/GPUTracking/Global/GPUChainTracking.h b/GPU/GPUTracking/Global/GPUChainTracking.h index 2a2996895dbcf..7d4adcd70af7f 100644 --- a/GPU/GPUTracking/Global/GPUChainTracking.h +++ b/GPU/GPUTracking/Global/GPUChainTracking.h @@ -134,7 +134,7 @@ class GPUChainTracking : public GPUChain void ClearIOPointers(); void AllocateIOMemory(); using GPUChain::DumpData; - void DumpData(const char* filename); + void DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs = nullptr); using GPUChain::ReadData; int32_t ReadData(const char* filename); void DumpSettings(const char* dir = "") override; @@ -231,11 +231,12 @@ class GPUChainTracking : public GPUChain int32_t DoProfile(); void PrintMemoryRelations(); void PrintMemoryStatistics() override; - void PrepareDebugOutput(); - void PrintDebugOutput(); + void PrepareKernelDebugOutput(); + void PrintKernelDebugOutput(); void PrintOutputStat(); static void DumpClusters(std::ostream& out, const o2::tpc::ClusterNativeAccess* clusters); static void DebugSortCompressedClusters(o2::tpc::CompressedClustersFlat* cls); + void DoDebugRawDump(); bool ValidateSteps(); bool ValidateSettings(); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index f72943e6bcd5a..e9721ec9d12bf 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -185,7 +185,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); } -void GPUChainTracking::PrepareDebugOutput() +void GPUChainTracking::PrepareKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -198,7 +198,7 @@ void GPUChainTracking::PrepareDebugOutput() #endif } -void GPUChainTracking::PrintDebugOutput() +void GPUChainTracking::PrintKernelDebugOutput() { #ifdef GPUCA_KERNEL_DEBUGGER_OUTPUT const auto& threadContext = GetThreadContext(); @@ -390,3 +390,23 @@ void GPUChainTracking::DebugSortCompressedClusters(o2::tpc::CompressedClustersFl sortMultiple(c.nAttachedClustersReduced, getReducedOffset, getN1, c.rowDiffA, c.sliceLegDiffA, c.padResA, c.timeResA); sortMultiple(c.nTracks, getIndex, get1, c.qPtA, c.rowA, c.sliceA, c.timeA, c.padA, c.nTrackClusters); // NOTE: This must be last, since nTrackClusters is used for handling the arrays above! } + +void GPUChainTracking::DoDebugRawDump() +{ + std::string dirName = mRec->getDebugFolder("tpc_raw"); + if (dirName == "") { + return; + } + GPUTrackingInOutPointers ioPtrs; + if (mIOPtrs.tpcZS) { + ioPtrs.tpcZS = mIOPtrs.tpcZS; + } else if (mIOPtrs.tpcPackedDigits) { + ioPtrs.tpcPackedDigits = mIOPtrs.tpcPackedDigits; + } else if (mIOPtrs.clustersNative) { + ioPtrs.clustersNative = mIOPtrs.clustersNative; + } + + GPUInfo("Doing debug raw dump"); + mRec->DumpSettings((dirName + "/").c_str()); + DumpData((dirName + "/event.0.dump").c_str(), &ioPtrs); +} diff --git a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx index 035e257ca7952..5a141cd08eb65 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingIO.cxx @@ -63,33 +63,36 @@ GPUChainTracking::InOutMemory::~InOutMemory() = default; GPUChainTracking::InOutMemory::InOutMemory(GPUChainTracking::InOutMemory&&) = default; GPUChainTracking::InOutMemory& GPUChainTracking::InOutMemory::operator=(GPUChainTracking::InOutMemory&&) = default; // NOLINT: False positive in clang-tidy -void GPUChainTracking::DumpData(const char* filename) +void GPUChainTracking::DumpData(const char* filename, const GPUTrackingInOutPointers* ioPtrs) { FILE* fp = fopen(filename, "w+b"); if (fp == nullptr) { return; } + if (ioPtrs == nullptr) { + ioPtrs = &mIOPtrs; + } fwrite(DUMP_HEADER, 1, DUMP_HEADER_SIZE, fp); fwrite(&GPUReconstruction::geometryType, sizeof(GPUReconstruction::geometryType), 1, fp); - DumpData(fp, mIOPtrs.clusterData, mIOPtrs.nClusterData, InOutPointerType::CLUSTER_DATA); - DumpData(fp, mIOPtrs.rawClusters, mIOPtrs.nRawClusters, InOutPointerType::RAW_CLUSTERS); - if (mIOPtrs.clustersNative) { - if (DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { - fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); - if (mIOPtrs.clustersNative->clustersMCTruth) { - const auto& buffer = mIOPtrs.clustersNative->clustersMCTruth->getBuffer(); + DumpData(fp, ioPtrs->clusterData, ioPtrs->nClusterData, InOutPointerType::CLUSTER_DATA); + DumpData(fp, ioPtrs->rawClusters, ioPtrs->nRawClusters, InOutPointerType::RAW_CLUSTERS); + if (ioPtrs->clustersNative) { + if (DumpData(fp, &ioPtrs->clustersNative->clustersLinear, &ioPtrs->clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE)) { + fwrite(&ioPtrs->clustersNative->nClusters[0][0], sizeof(ioPtrs->clustersNative->nClusters[0][0]), NSECTORS * GPUCA_ROW_COUNT, fp); + if (ioPtrs->clustersNative->clustersMCTruth) { + const auto& buffer = ioPtrs->clustersNative->clustersMCTruth->getBuffer(); std::pair tmp = {buffer.data(), buffer.size()}; DumpData(fp, &tmp.first, &tmp.second, InOutPointerType::CLUSTER_NATIVE_MC); } } } - if (mIOPtrs.tpcPackedDigits) { - if (DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && mIOPtrs.tpcPackedDigits->tpcDigitsMC) { + if (ioPtrs->tpcPackedDigits) { + if (DumpData(fp, ioPtrs->tpcPackedDigits->tpcDigits, ioPtrs->tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT) && ioPtrs->tpcPackedDigits->tpcDigitsMC) { const char* ptrs[NSECTORS]; size_t sizes[NSECTORS]; for (uint32_t i = 0; i < NSECTORS; i++) { - if (mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]) { - const auto& buffer = mIOPtrs.tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); + if (ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]) { + const auto& buffer = ioPtrs->tpcPackedDigits->tpcDigitsMC->v[i]->getBuffer(); ptrs[i] = buffer.data(); sizes[i] = buffer.size(); } else { @@ -100,12 +103,12 @@ void GPUChainTracking::DumpData(const char* filename) DumpData(fp, ptrs, sizes, InOutPointerType::TPC_DIGIT_MC); } } - if (mIOPtrs.tpcZS) { + if (ioPtrs->tpcZS) { size_t total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -115,10 +118,10 @@ void GPUChainTracking::DumpData(const char* filename) total = 0; for (int32_t i = 0; i < NSECTORS; i++) { for (uint32_t j = 0; j < GPUTrackingInOutZS::NENDPOINTS; j++) { - for (uint32_t k = 0; k < mIOPtrs.tpcZS->sector[i].count[j]; k++) { - memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], mIOPtrs.tpcZS->sector[i].zsPtr[j][k], mIOPtrs.tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); - counts.count[i][j] += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; - total += mIOPtrs.tpcZS->sector[i].nZSPtr[j][k]; + for (uint32_t k = 0; k < ioPtrs->tpcZS->sector[i].count[j]; k++) { + memcpy(&ptr[total * TPCZSHDR::TPC_ZS_PAGE_SIZE], ioPtrs->tpcZS->sector[i].zsPtr[j][k], ioPtrs->tpcZS->sector[i].nZSPtr[j][k] * TPCZSHDR::TPC_ZS_PAGE_SIZE); + counts.count[i][j] += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; + total += ioPtrs->tpcZS->sector[i].nZSPtr[j][k]; } } } @@ -127,33 +130,33 @@ void GPUChainTracking::DumpData(const char* filename) fwrite(&counts, sizeof(counts), 1, fp); } } - if (mIOPtrs.tpcCompressedClusters) { - if (mIOPtrs.tpcCompressedClusters->ptrForward) { + if (ioPtrs->tpcCompressedClusters) { + if (ioPtrs->tpcCompressedClusters->ptrForward) { throw std::runtime_error("Cannot dump non-flat compressed clusters"); } - char* ptr = (char*)mIOPtrs.tpcCompressedClusters; - size_t size = mIOPtrs.tpcCompressedClusters->totalDataSize; + char* ptr = (char*)ioPtrs->tpcCompressedClusters; + size_t size = ioPtrs->tpcCompressedClusters->totalDataSize; DumpData(fp, &ptr, &size, InOutPointerType::TPC_COMPRESSED_CL); } - if (mIOPtrs.settingsTF) { + if (ioPtrs->settingsTF) { uint32_t n = 1; - DumpData(fp, &mIOPtrs.settingsTF, &n, InOutPointerType::TF_SETTINGS); + DumpData(fp, &ioPtrs->settingsTF, &n, InOutPointerType::TF_SETTINGS); } - DumpData(fp, mIOPtrs.sectorTracks, mIOPtrs.nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); - DumpData(fp, mIOPtrs.sectorClusters, mIOPtrs.nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); - DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPC, &mIOPtrs.nMCInfosTPC, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mcInfosTPCCol, &mIOPtrs.nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); - DumpData(fp, &mIOPtrs.mergedTracks, &mIOPtrs.nMergedTracks, InOutPointerType::MERGED_TRACK); - DumpData(fp, &mIOPtrs.mergedTrackHits, &mIOPtrs.nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); - DumpData(fp, &mIOPtrs.trdTracks, &mIOPtrs.nTRDTracks, InOutPointerType::TRD_TRACK); - DumpData(fp, &mIOPtrs.trdTracklets, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_TRACKLET); - if (mIOPtrs.trdSpacePoints) { - DumpData(fp, &mIOPtrs.trdSpacePoints, &mIOPtrs.nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); + DumpData(fp, ioPtrs->sectorTracks, ioPtrs->nSectorTracks, InOutPointerType::SECTOR_OUT_TRACK); + DumpData(fp, ioPtrs->sectorClusters, ioPtrs->nSectorClusters, InOutPointerType::SECTOR_OUT_CLUSTER); + DumpData(fp, &ioPtrs->mcLabelsTPC, &ioPtrs->nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC); + DumpData(fp, &ioPtrs->mcInfosTPC, &ioPtrs->nMCInfosTPC, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mcInfosTPCCol, &ioPtrs->nMCInfosTPCCol, InOutPointerType::MC_INFO_TPC); + DumpData(fp, &ioPtrs->mergedTracks, &ioPtrs->nMergedTracks, InOutPointerType::MERGED_TRACK); + DumpData(fp, &ioPtrs->mergedTrackHits, &ioPtrs->nMergedTrackHits, InOutPointerType::MERGED_TRACK_HIT); + DumpData(fp, &ioPtrs->trdTracks, &ioPtrs->nTRDTracks, InOutPointerType::TRD_TRACK); + DumpData(fp, &ioPtrs->trdTracklets, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_TRACKLET); + if (ioPtrs->trdSpacePoints) { + DumpData(fp, &ioPtrs->trdSpacePoints, &ioPtrs->nTRDTracklets, InOutPointerType::TRD_SPACEPOINT); } - DumpData(fp, &mIOPtrs.trdTriggerTimes, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrackletIdxFirst, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); - DumpData(fp, &mIOPtrs.trdTrigRecMask, &mIOPtrs.nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTriggerTimes, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrackletIdxFirst, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); + DumpData(fp, &ioPtrs->trdTrigRecMask, &ioPtrs->nTRDTriggerRecords, InOutPointerType::TRD_TRIGGERRECORDS); fclose(fp); } diff --git a/GPU/GPUTracking/Global/GPUErrorCodes.h b/GPU/GPUTracking/Global/GPUErrorCodes.h index f35f5fc81a382..8fec23be00a09 100644 --- a/GPU/GPUTracking/Global/GPUErrorCodes.h +++ b/GPU/GPUTracking/Global/GPUErrorCodes.h @@ -47,5 +47,6 @@ GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow) GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset +GPUCA_ERROR_CODE(29, MAX_GPUCA_ERROR_NUMBER) // #define GPUCA_CHECK_TPCZS_CORRUPTION diff --git a/GPU/GPUTracking/Global/GPUErrors.cxx b/GPU/GPUTracking/Global/GPUErrors.cxx index 7f3ed1d8206d9..e9d5a74c6567a 100644 --- a/GPU/GPUTracking/Global/GPUErrors.cxx +++ b/GPU/GPUTracking/Global/GPUErrors.cxx @@ -54,12 +54,17 @@ static std::unordered_map errorNames = { #undef GPUCA_ERROR_CODE }; -void GPUErrors::printErrors(bool silent) +bool GPUErrors::printErrors(bool silent, uint64_t mask) { + bool retVal = 0; for (uint32_t i = 0; i < std::min(*mErrors, GPUCA_MAX_ERRORS); i++) { uint32_t errorCode = mErrors[4 * i + 1]; const auto& it = errorNames.find(errorCode); const char* errorName = it == errorNames.end() ? "INVALID ERROR CODE" : it->second; + static_assert(MAX_GPUCA_ERROR_NUMBER <= sizeof(mask) * 8); + if (mask & (1 << errorCode)) { + retVal = 1; + } if (silent && i) { GPUWarning("GPU Error Code (%u:%u) %s : %u / %u / %u", i, errorCode, errorName, mErrors[4 * i + 2], mErrors[4 * i + 3], mErrors[4 * i + 4]); } else if (silent) { @@ -75,6 +80,7 @@ void GPUErrors::printErrors(bool silent) GPUError("Additional errors occured (codes not stored)"); } } + return retVal; } uint32_t GPUErrors::getNErrors() const diff --git a/GPU/GPUTracking/Global/GPUErrors.h b/GPU/GPUTracking/Global/GPUErrors.h index cd86390bc1b01..1cbc4a019601d 100644 --- a/GPU/GPUTracking/Global/GPUErrors.h +++ b/GPU/GPUTracking/Global/GPUErrors.h @@ -33,7 +33,7 @@ class GPUErrors GPUd() bool hasError() { return *mErrors > 0; } void setMemory(GPUglobalref() uint32_t* m) { mErrors = m; } void clear(); - void printErrors(bool silent = false); + bool printErrors(bool silent = false, uint64_t mask = 0); uint32_t getNErrors() const; const uint32_t* getErrorPtr() const; static uint32_t getMaxErrors(); From bb048efab7be5df04ad93a974abef167f4c6e88a Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 21 May 2025 19:41:52 +0200 Subject: [PATCH 0542/1914] DPL Analysis: add `clamp` expression node to constrain a result of an expresison between two values (#14305) --- Framework/Core/include/Framework/Expressions.h | 8 ++++++++ Framework/Core/test/test_Expressions.cxx | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/Framework/Core/include/Framework/Expressions.h b/Framework/Core/include/Framework/Expressions.h index 18c930700a91d..9d6c3cfb7c66e 100644 --- a/Framework/Core/include/Framework/Expressions.h +++ b/Framework/Core/include/Framework/Expressions.h @@ -546,6 +546,14 @@ inline Node updateParameters(Node const& pexp, int bins, std::vector const& p return result; } +/// clamping functional +template +inline Node clamp(Node&& expr, T low, T hi) +{ + auto copy = expr; + return ifnode(Node{copy} < LiteralNode{low}, LiteralNode{low}, ifnode(Node{copy} > LiteralNode{hi}, LiteralNode{hi}, Node{copy})); +} + /// A struct, containing the root of the expression tree struct Filter { Filter() = default; diff --git a/Framework/Core/test/test_Expressions.cxx b/Framework/Core/test/test_Expressions.cxx index 6faa2fc352232..e8cf43e03e11d 100644 --- a/Framework/Core/test/test_Expressions.cxx +++ b/Framework/Core/test/test_Expressions.cxx @@ -290,6 +290,13 @@ TEST_CASE("TestConditionalExpressions") auto gandiva_condition2 = makeCondition(gandiva_tree2); auto gandiva_filter2 = createFilter(schema2, gandiva_condition2); REQUIRE(gandiva_tree2->ToString() == "bool greater_than((float) fSigned1Pt, (const float) 0 raw(0)) && if (bool less_than(float absf((float) fEta), (const float) 1 raw(3f800000)) && if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { bool greater_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) } else { bool less_than((float) fPhi, (const float) 1.5708 raw(3fc90fdb)) }) { bool greater_than(float absf((float) fX), (const float) 1 raw(3f800000)) } else { bool greater_than(float absf((float) fY), (const float) 1 raw(3f800000)) }"); + + // clamp + Projector clp = clamp(o2::aod::track::pt, 1.0f, 10.f); + auto clpspecs = createOperations(clp); + auto schemaclp = std::make_shared(std::vector{o2::aod::track::Pt::asArrowField()}); + auto gandiva_tree_clp = createExpressionTree(clpspecs, schemaclp); + REQUIRE(gandiva_tree_clp->ToString() == "if (bool less_than((float) fPt, (const float) 1 raw(3f800000))) { (const float) 1 raw(3f800000) } else { if (bool greater_than((float) fPt, (const float) 10 raw(41200000))) { (const float) 10 raw(41200000) } else { (float) fPt } }"); } TEST_CASE("TestBinnedExpressions") From 35ca22b3bde5014b40ebe1d823c6dc88a313ddbf Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Wed, 21 May 2025 19:54:16 +0200 Subject: [PATCH 0543/1914] DPL: fix merging of pipelined devices (#14307) Sometimes we are just too smart. Multiple messages with the same signature are coalesced in the same input if they are processed at the same time. This explains why the sleep was improving behavior: it merely staggers arrival, so that the optimisation cannot happen anymore. --- .../AnalysisSupport/src/AODWriterHelpers.cxx | 254 +++++++++--------- 1 file changed, 129 insertions(+), 125 deletions(-) diff --git a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx index 2b1b4f880d1ee..40d2189ea96d0 100644 --- a/Framework/AnalysisSupport/src/AODWriterHelpers.cxx +++ b/Framework/AnalysisSupport/src/AODWriterHelpers.cxx @@ -269,145 +269,149 @@ AlgorithmSpec AODWriterHelpers::getOutputObjHistWriter(ConfigContext const& ctx) callbacks.set(endofdatacb); return [inputObjects, objmap, tskmap](ProcessingContext& pc) mutable -> void { - auto const& ref = pc.inputs().get("x"); - if (!ref.header) { - LOG(error) << "Header not found"; - return; - } - auto datah = o2::header::get(ref.header); - if (!datah) { - LOG(error) << "No data header in stack"; - return; - } + auto mergePart = [&inputObjects, &objmap, &tskmap](DataRef const& ref) { + if (!ref.header) { + LOG(error) << "Header not found"; + return; + } + auto datah = o2::header::get(ref.header); + if (!datah) { + LOG(error) << "No data header in stack"; + return; + } - if (!ref.payload) { - LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + if (!ref.payload) { + LOGP(error, "Payload not found for {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto objh = o2::header::get(ref.header); - if (!objh) { - LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + auto objh = o2::header::get(ref.header); + if (!objh) { + LOGP(error, "No output object header in stack of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - InputObject obj; - FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); - tm.InitMap(); - obj.kind = tm.ReadClass(); - tm.SetBufferOffset(0); - tm.ResetMap(); - if (obj.kind == nullptr) { - LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); - return; - } + InputObject obj; + FairInputTBuffer tm(const_cast(ref.payload), static_cast(datah->payloadSize)); + tm.InitMap(); + obj.kind = tm.ReadClass(); + tm.SetBufferOffset(0); + tm.ResetMap(); + if (obj.kind == nullptr) { + LOGP(error, "Cannot read class info from buffer of {}/{}/{}", datah->dataOrigin.as(), datah->dataDescription.as(), datah->subSpecification); + return; + } - auto policy = objh->mPolicy; - auto sourceType = objh->mSourceType; - auto hash = objh->mTaskHash; + auto policy = objh->mPolicy; + auto sourceType = objh->mSourceType; + auto hash = objh->mTaskHash; - obj.obj = tm.ReadObjectAny(obj.kind); - auto* named = static_cast(obj.obj); - obj.name = named->GetName(); - auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); - if (hpos == tskmap.end()) { - LOG(error) << "No task found for hash " << hash; - return; - } - auto taskname = hpos->name; - auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); - if (opos == objmap.end()) { - LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; - return; - } - auto objects = opos->bindings; - if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { - LOG(error) << "No object " << obj.name << " in map for task " << taskname; - return; - } - auto nameHash = runtime_hash(obj.name.c_str()); - InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; - auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); - // If it's the first one, we just add it to the list. - if (existing == inputObjects->end()) { - obj.count = objh->mPipelineSize; - inputObjects->push_back(std::make_pair(key, obj)); - existing = inputObjects->end() - 1; - } else { - obj.count = existing->second.count; - // Otherwise, we merge it with the existing one. - auto merger = existing->second.kind->GetMerge(); - if (!merger) { - LOG(error) << "Already one unmergeable object found for " << obj.name; + obj.obj = tm.ReadObjectAny(obj.kind); + auto* named = static_cast(obj.obj); + obj.name = named->GetName(); + auto hpos = std::find_if(tskmap.begin(), tskmap.end(), [&](auto&& x) { return x.id == hash; }); + if (hpos == tskmap.end()) { + LOG(error) << "No task found for hash " << hash; return; } - TList coll; - coll.Add(static_cast(obj.obj)); - merger(existing->second.obj, &coll, nullptr); - } - // We expect as many objects as the pipeline size, for - // a given object name and task hash. - existing->second.count -= 1; - - if (existing->second.count != 0) { - return; - } - // Write the object here. - auto route = existing->first; - auto entry = existing->second; - auto file = ROOTfileNames.find(route.policy); - if (file == ROOTfileNames.end()) { - return; - } - auto filename = file->second; - if (f[route.policy] == nullptr) { - f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); - } - auto nextDirectory = route.directory; - if ((nextDirectory != currentDirectory) || (filename != currentFile)) { - if (!f[route.policy]->FindKey(nextDirectory.c_str())) { - f[route.policy]->mkdir(nextDirectory.c_str()); + auto taskname = hpos->name; + auto opos = std::find_if(objmap.begin(), objmap.end(), [&](auto&& x) { return x.id == hash; }); + if (opos == objmap.end()) { + LOG(error) << "No object list found for task " << taskname << " (hash=" << hash << ")"; + return; } - currentDirectory = nextDirectory; - currentFile = filename; - } + auto objects = opos->bindings; + if (std::find(objects.begin(), objects.end(), obj.name) == objects.end()) { + LOG(error) << "No object " << obj.name << " in map for task " << taskname; + return; + } + auto nameHash = runtime_hash(obj.name.c_str()); + InputObjectRoute key{obj.name, nameHash, taskname, hash, policy, sourceType}; + auto existing = std::find_if(inputObjects->begin(), inputObjects->end(), [&](auto&& x) { return (x.first.uniqueId == nameHash) && (x.first.taskHash == hash); }); + // If it's the first one, we just add it to the list. + if (existing == inputObjects->end()) { + obj.count = objh->mPipelineSize; + inputObjects->push_back(std::make_pair(key, obj)); + existing = inputObjects->end() - 1; + } else { + obj.count = existing->second.count; + // Otherwise, we merge it with the existing one. + auto merger = existing->second.kind->GetMerge(); + if (!merger) { + LOG(error) << "Already one unmergeable object found for " << obj.name; + return; + } + TList coll; + coll.Add(static_cast(obj.obj)); + merger(existing->second.obj, &coll, nullptr); + } + // We expect as many objects as the pipeline size, for + // a given object name and task hash. + existing->second.count -= 1; - // translate the list-structure created by the registry into a directory structure within the file - std::function writeListToFile; - writeListToFile = [&](TList* list, TDirectory* parentDir) { - TIter next(list); - TObject* object = nullptr; - while ((object = next())) { - if (object->InheritsFrom(TList::Class())) { - writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); - } else { - parentDir->WriteObjectAny(object, object->Class(), object->GetName()); - auto* written = list->Remove(object); - delete written; + if (existing->second.count != 0) { + return; + } + // Write the object here. + auto route = existing->first; + auto entry = existing->second; + auto file = ROOTfileNames.find(route.policy); + if (file == ROOTfileNames.end()) { + return; + } + auto filename = file->second; + if (f[route.policy] == nullptr) { + f[route.policy] = TFile::Open(filename.c_str(), "RECREATE"); + } + auto nextDirectory = route.directory; + if ((nextDirectory != currentDirectory) || (filename != currentFile)) { + if (!f[route.policy]->FindKey(nextDirectory.c_str())) { + f[route.policy]->mkdir(nextDirectory.c_str()); } + currentDirectory = nextDirectory; + currentFile = filename; } - }; - TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); - if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { - auto* outputList = static_cast(entry.obj); - outputList->SetOwner(false); + // translate the list-structure created by the registry into a directory structure within the file + std::function writeListToFile; + writeListToFile = [&](TList* list, TDirectory* parentDir) { + TIter next(list); + TObject* object = nullptr; + while ((object = next())) { + if (object->InheritsFrom(TList::Class())) { + writeListToFile(static_cast(object), parentDir->mkdir(object->GetName(), object->GetName(), true)); + } else { + parentDir->WriteObjectAny(object, object->Class(), object->GetName()); + auto* written = list->Remove(object); + delete written; + } + } + }; + + TDirectory* currentDir = f[route.policy]->GetDirectory(currentDirectory.c_str()); + if (route.sourceType == OutputObjSourceType::HistogramRegistrySource) { + auto* outputList = static_cast(entry.obj); + outputList->SetOwner(false); + + // if registry should live in dedicated folder a TNamed object is appended to the list + if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { + delete outputList->Last(); + outputList->RemoveLast(); + currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + } - // if registry should live in dedicated folder a TNamed object is appended to the list - if (outputList->Last() && outputList->Last()->IsA() == TNamed::Class()) { - delete outputList->Last(); - outputList->RemoveLast(); - currentDir = currentDir->mkdir(outputList->GetName(), outputList->GetName(), true); + writeListToFile(outputList, currentDir); + outputList->SetOwner(); + delete outputList; + entry.obj = nullptr; + } else { + currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); + delete (TObject*)entry.obj; + entry.obj = nullptr; } - - writeListToFile(outputList, currentDir); - outputList->SetOwner(); - delete outputList; - entry.obj = nullptr; - } else { - currentDir->WriteObjectAny(entry.obj, entry.kind, entry.name.c_str()); - delete (TObject*)entry.obj; - entry.obj = nullptr; + }; + for (int pi = 0; pi < pc.inputs().getNofParts(0); ++pi) { + mergePart(pc.inputs().get("x", pi)); } }; }}; From 0386f65567a1fcda173c9ae39304bbd284677774 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Mon, 19 May 2025 10:49:44 +0200 Subject: [PATCH 0544/1914] Revert "GPU Common: Workaround for removing gpustd::array, temporary alias for O2Physics" This reverts commit a850e9eb3e6a634a1e87a70170c05ad6d8bce3af. --- .../ReconstructionDataFormats/TrackParametrization.h | 1 - GPU/Common/GPUCommonArray.h | 6 ------ 2 files changed, 7 deletions(-) diff --git a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h index 1d0a5f1a9f1fd..f240e34861eeb 100644 --- a/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h +++ b/DataFormats/Reconstruction/include/ReconstructionDataFormats/TrackParametrization.h @@ -29,7 +29,6 @@ #include "GPUCommonDef.h" #include "GPUCommonRtypes.h" #include "GPUCommonMath.h" -#include "GPUCommonArray.h" #include "GPUROOTCartesianFwd.h" #ifndef GPUCA_GPUCODE_DEVICE diff --git a/GPU/Common/GPUCommonArray.h b/GPU/Common/GPUCommonArray.h index fa86d7bb4a021..e83ca8c4a69fc 100644 --- a/GPU/Common/GPUCommonArray.h +++ b/GPU/Common/GPUCommonArray.h @@ -48,10 +48,4 @@ using array = std::array; } // namespace std #endif -namespace o2::gpu::gpustd -{ -template -using array = ::std::array; // temporary alias, to remove dependent types -} // o2::gpu::gpustd - #endif // GPUCOMMONARRAY_H From efff2f780c2b35603cdd6614f868100e94ba41e7 Mon Sep 17 00:00:00 2001 From: shahor02 Date: Wed, 21 May 2025 23:04:18 +0200 Subject: [PATCH 0545/1914] Add TPC cluster selector helper for tracking studies (#14308) --- .../study/CMakeLists.txt | 7 ++ .../GlobalTrackingStudy/TPCClusSelector.h | 92 ++++++++++++++ .../study/src/GlobalTrackingStudyLinkDef.h | 1 + .../study/src/TPCClusSelector.cxx | 117 ++++++++++++++++++ 4 files changed, 217 insertions(+) create mode 100644 Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h create mode 100644 Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx diff --git a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt index 398e7eb215f2e..776d3946283c3 100644 --- a/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt +++ b/Detectors/GlobalTrackingWorkflow/study/CMakeLists.txt @@ -12,6 +12,7 @@ #add_compile_options(-O0 -g -fPIC) o2_add_library(GlobalTrackingStudy + TARGETVARNAME targetName SOURCES src/TPCTrackStudy.cxx src/TrackingStudy.cxx src/SVStudy.cxx @@ -23,6 +24,7 @@ o2_add_library(GlobalTrackingStudy src/TrackInfoExt.cxx src/TrackMCStudyConfig.cxx src/TrackMCStudyTypes.cxx + src/TPCClusSelector.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTracking O2::GlobalTrackingWorkflowReaders O2::GlobalTrackingWorkflowHelpers @@ -73,3 +75,8 @@ o2_add_executable(dump-workfow COMPONENT_NAME bc-tracks SOURCES src/track-dump-workflow.cxx PUBLIC_LINK_LIBRARIES O2::GlobalTrackingStudy) + +if (OpenMP_CXX_FOUND) + target_compile_definitions(${targetName} PRIVATE WITH_OPENMP) + target_link_libraries(${targetName} PRIVATE OpenMP::OpenMP_CXX) +endif() diff --git a/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h new file mode 100644 index 0000000000000..c1765558458c2 --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/include/GlobalTrackingStudy/TPCClusSelector.h @@ -0,0 +1,92 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection + +#ifndef ALICEO2_TPCCLUSSELECTOR_H +#define ALICEO2_TPCCLUSSELECTOR_H + +#include +#include +#include + +namespace o2::tpc +{ +class ClusterNativeAccess; + +class TPCClusSelector +{ + // helper to select TPC cluster matching to certain timebin and optionally pads range + // example of usage: + /* + TPCClusSelector clSel; + o2::tpc::ClusterNativeHelper::Reader tcpClusterReader; + tcpClusterReader.init(native_clusters_file.c_str()); + o2::tpc::ClusterNativeAccess tpcClusterIdxStruct; + std::unique_ptr tpcClusterBuffer; ///< buffer for clusters in tpcClusterIdxStruct + o2::tpc::ClusterNativeHelper::ConstMCLabelContainerViewWithBuffer tpcClusterMCBuffer; ///< buffer for mc labels + + tcpClusterReader.read(iTF); + tcpClusterReader.fillIndex(tpcClusterIdxStruct, tpcClusterBuffer, tpcClusterMCBuffer); + + clSel.fill(tpcClusterIdxStruct); // Create sorted index + // to get i-th cluster in orderer timebins: + const auto& clus = tpcClusterIdxStruct.clusters[sector][row][ clSel.getIndex(sector, row, i)]; + + // to get sorted indices range of clusters in the tbmin:tbmax range + auto rng = clSel.findClustersRange(sector, row, tbmin, tbmax, tpcClusterIdxStruct); + if (rng.first>rng.second) { // nothing is found } + const auto& cln = tpcClusterIdxStruct.clusters[sector][row][clSel.getIndex(sector, row, rng.first )]; /... + + // to get number of clusters in tbmin:tbmax, padmin:padmax range (and optionally get the list) + std::vector cllist; // optional list + int nfnd = clSel.findClustersEntries(sector, row, tbmin, tbmax, padmin, padmax, tpcClusterIdxStruct, &cllist); + for (int i=0;i findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + int findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect = nullptr); + void fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct); + + int getNThreads() const { return mNThreads; } + void setNThreads(int n); + + private: + struct Sector { + static constexpr int NRows = 152; + std::array, NRows> rows; + void clear() + { + for (auto& r : rows) + r.clear(); + } + }; + + static constexpr int NSectors = 36; + std::array mSectors{}; + int mNThreads = 1; + + ClassDefNV(TPCClusSelector, 1); +}; + +} // namespace o2::tpc + +#endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h index f666132c9c1cf..f0d3e7d4d0b4e 100644 --- a/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h +++ b/Detectors/GlobalTrackingWorkflow/study/src/GlobalTrackingStudyLinkDef.h @@ -38,5 +38,6 @@ #pragma link C++ class std::vector < o2::trackstudy::ClResTPCCont> + ; #pragma link C++ class o2::trackstudy::TrackPairInfo + ; #pragma link C++ class std::vector < o2::trackstudy::TrackPairInfo> + ; +#pragma ling C++ class o2::tpc::TPCClusSelector + ; #endif diff --git a/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx new file mode 100644 index 0000000000000..e5b28fb0fd62b --- /dev/null +++ b/Detectors/GlobalTrackingWorkflow/study/src/TPCClusSelector.cxx @@ -0,0 +1,117 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +// helper class for TPC clusters selection +#include "GlobalTrackingStudy/TPCClusSelector.h" +#include "DataFormatsTPC/ClusterNativeHelper.h" +#include "Framework/Logger.h" +#include +#ifdef WITH_OPENMP +#include +#endif + +using namespace o2::tpc; + +void TPCClusSelector::setNThreads(int n) +{ +#ifndef WITH_OPENMP + if (n > 1) { + LOGP(warn, "No OpenMP"); + } + n = 1; +#endif + mNThreads = n; +} + +std::pair TPCClusSelector::findClustersRange(int sec, int row, float tbmin, float tbmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + // find sorted indices of clusters in the [tbmin:tbmax] range, if not found, return {-1,-2} + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return {-1, -2}; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int idmin = left, idmax = left, idtst = idmin; + // look at smaller times + while (++idtst < ncl && clarr[vidx[idtst]].getTime() <= tbmax) { + idmax = idtst; + } + return {idmin, idmax}; +} + +int TPCClusSelector::findClustersEntries(int sec, int row, float tbmin, float tbmax, float padmin, float padmax, const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct, std::vector* clIDDirect) +{ + // find direct cluster indices for tbmin:tbmas / padmin/padmax range, fill clIDDirect vector if provided + const auto& vidx = mSectors[sec].rows[row]; + const auto* clarr = tpcClusterIdxStruct.clusters[sec][row]; + // use binary search to find 1st cluster with time >= tb + int ncl = vidx.size(), left = 0, right = ncl; + if (clIDDirect) { + clIDDirect->clear(); + } + while (left < right) { + int mid = left + (right - left) / 2; + if (clarr[vidx[mid]].getTime() < tbmin) { + left = mid + 1; + } else { + right = mid; + } + } + if (left == ncl || clarr[vidx[left]].getTime() > tbmax) { + return 0; // all clusters have time < tbmin or no clusters in the range [tbmin:tbmax] + } + int nclf = 0; + while (left < ncl) { + const auto& cl = clarr[vidx[left]]; + if (cl.getTime() > tbmax) { + break; + } + if (cl.getPad() >= padmin && cl.getPad() <= padmax) { + nclf++; + if (clIDDirect) { + clIDDirect->push_back(vidx[left]); + } + } + } + return nclf; +} + +void TPCClusSelector::fill(const o2::tpc::ClusterNativeAccess& tpcClusterIdxStruct) +{ + for (int is = 0; is < NSectors; is++) { + auto& sect = mSectors[is]; +#ifdef WITH_OPENMP +#pragma omp parallel for schedule(dynamic) num_threads(mNThreads) +#endif + for (int ir = 0; ir < Sector::NRows; ir++) { + size_t ncl = tpcClusterIdxStruct.nClusters[is][ir]; + if (ncl >= 0xffff) { + LOGP(error, "Row {} of sector {} has {} clusters, truncating to {}", ir, is, ncl, int(0xffff)); + ncl = 0xffff; + } + auto& rowidx = sect.rows[ir]; + rowidx.resize(ncl); + std::iota(rowidx.begin(), rowidx.end(), 0); + const auto* clus = tpcClusterIdxStruct.clusters[is][ir]; // C array of clusters + std::sort(rowidx.begin(), rowidx.end(), [&](size_t a, size_t b) { return clus[a].getTime() < clus[b].getTime(); }); + } + } +} From 81b7a64680531129657f5a9eb2a222b3d0c779c1 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:06:16 +0200 Subject: [PATCH 0546/1914] GPU: Fix allocator / deallocator mismatch --- GPU/GPUTracking/Base/GPUReconstruction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.h b/GPU/GPUTracking/Base/GPUReconstruction.h index e0c866fd9421b..06f1c27fb6c06 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.h +++ b/GPU/GPUTracking/Base/GPUReconstruction.h @@ -378,7 +378,7 @@ class GPUReconstruction std::vector res; }; struct alignedDeleter { - void operator()(void* ptr) { ::operator delete(ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; + void operator()(void* ptr) { ::operator delete[](ptr, std::align_val_t(GPUCA_BUFFER_ALIGNMENT)); }; }; std::unordered_map mMemoryReuse1to1; std::vector> mNonPersistentMemoryStack; // hostPoolAddress, devicePoolAddress, individualAllocationCount, directIndividualAllocationCound, tag From b8cacf6b25dbb22b30160821d70992cfac594f8b Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:16:30 +0200 Subject: [PATCH 0547/1914] GPU TPC Merger: Clarify more variable names --- .../DataCompression/GPUTPCCompression.cxx | 2 +- .../GPUChainTrackingDebugAndProfiling.cxx | 4 +- .../Global/GPUChainTrackingMerger.cxx | 10 ++--- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 40 +++++++++---------- GPU/GPUTracking/Merger/GPUTPCGMMerger.h | 8 ++-- GPU/GPUTracking/SectorTracker/GPUTPCTrack.h | 3 -- 6 files changed, 32 insertions(+), 35 deletions(-) diff --git a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx index ec1636dfe7f59..61f8a614fbe6f 100644 --- a/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx +++ b/GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx @@ -124,7 +124,7 @@ void GPUTPCCompression::SetMaxData(const GPUTrackingInOutPointers& io) mMaxClusters = io.clustersNative->nClustersTotal; mMaxClusterFactorBase1024 = mMaxClusters > 100000000 ? mRec->MemoryScalers()->NTPCUnattachedHitsBase1024(mRec->GetParam().rec.tpc.rejectionStrategy) : 1024; mMaxClustersInCache = mMaxClusters * mMaxClusterFactorBase1024 / 1024; - mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NOutputTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include + mMaxTrackClusters = mRec->GetConstantMem().tpcMerger.NMergedTrackClusters(); // TODO: Why is this not using ioPtrs? Could remove GPUConstantMem.h include mMaxTracks = mRec->GetConstantMem().tpcMerger.NMergedTracks(); if (mMaxClusters % 16) { mMaxClusters += 16 - (mMaxClusters % 16); diff --git a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx index e9721ec9d12bf..173d2fb916239 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx @@ -154,7 +154,7 @@ void GPUChainTracking::PrintMemoryStatistics() } addToMap("TPC Clusterer Clusters", usageMap, mRec->MemoryScalers()->nTPCHits, mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits)); addToMap("TPC Tracks", usageMap, processors()->tpcMerger.NMergedTracks(), processors()->tpcMerger.NMaxTracks()); - addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NOutputTrackClusters(), processors()->tpcMerger.NMaxOutputTrackClusters()); + addToMap("TPC TrackHits", usageMap, processors()->tpcMerger.NMergedTrackClusters(), processors()->tpcMerger.NMaxMergedTrackClusters()); if (mRec->GetProcessingSettings().createO2Output) { addToMap("TPC O2 Tracks", usageMap, processors()->tpcMerger.NOutputTracksTPCO2(), processors()->tpcMerger.NOutputTracksTPCO2()); @@ -182,7 +182,7 @@ void GPUChainTracking::PrintMemoryRelations() GPUInfo("MEMREL SectorTrackHits NCl %d NTrkH %d", processors()->tpcTrackers[i].NHitsTotal(), *processors()->tpcTrackers[i].NTrackHits()); } GPUInfo("MEMREL Tracks NCl %d NTrk %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTracks()); - GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NOutputTrackClusters()); + GPUInfo("MEMREL TrackHitss NCl %d NTrkH %d", processors()->tpcMerger.NMaxClusters(), processors()->tpcMerger.NMergedTrackClusters()); } void GPUChainTracking::PrepareKernelDebugOutput() diff --git a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx index 2b3d719a27dea..118f0bf73a845 100644 --- a/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx +++ b/GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx @@ -261,9 +261,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) if (param().dodEdxEnabled) { GPUMemCpy(RecoStep::TPCMerging, Merger.MergedTracksdEdx(), MergerShadowAll.MergedTracksdEdx(), Merger.NMergedTracks() * sizeof(*Merger.MergedTracksdEdx()), outputStream, 0); } - GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NOutputTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.Clusters(), MergerShadowAll.Clusters(), Merger.NMergedTrackClusters() * sizeof(*Merger.Clusters()), outputStream, 0); if (param().par.earlyTpcTransform) { - GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NOutputTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); + GPUMemCpy(RecoStep::TPCMerging, Merger.ClustersXYZ(), MergerShadowAll.ClustersXYZ(), Merger.NMergedTrackClusters() * sizeof(*Merger.ClustersXYZ()), outputStream, 0); } GPUMemCpy(RecoStep::TPCMerging, Merger.ClusterAttachment(), MergerShadowAll.ClusterAttachment(), Merger.NMaxClusters() * sizeof(*Merger.ClusterAttachment()), outputStream, 0); } @@ -330,7 +330,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) mIOPtrs.nMergedTracks = Merger.NMergedTracks(); mIOPtrs.mergedTrackHits = Merger.Clusters(); mIOPtrs.mergedTrackHitsXYZ = Merger.ClustersXYZ(); - mIOPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + mIOPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); mIOPtrs.mergedTrackHitAttachment = Merger.ClusterAttachment(); mIOPtrs.mergedTrackHitStates = Merger.ClusterStateExt(); mIOPtrs.outputTracksTPCO2 = Merger.OutputTracksTPCO2(); @@ -344,7 +344,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) processorsShadow()->ioPtrs.nMergedTracks = Merger.NMergedTracks(); processorsShadow()->ioPtrs.mergedTrackHits = MergerShadow.Clusters(); processorsShadow()->ioPtrs.mergedTrackHitsXYZ = MergerShadow.ClustersXYZ(); - processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NOutputTrackClusters(); + processorsShadow()->ioPtrs.nMergedTrackHits = Merger.NMergedTrackClusters(); processorsShadow()->ioPtrs.mergedTrackHitAttachment = MergerShadow.ClusterAttachment(); processorsShadow()->ioPtrs.mergedTrackHitStates = MergerShadow.ClusterStateExt(); processorsShadow()->ioPtrs.outputTracksTPCO2 = MergerShadow.OutputTracksTPCO2(); @@ -355,7 +355,7 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput) } if (GetProcessingSettings().debugLevel >= 2) { - GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NOutputTrackClusters(), Merger.NClusters()); + GPUInfo("TPC Merger Finished (output clusters %d / input clusters %d)", Merger.NMergedTrackClusters(), Merger.NClusters()); } return 0; } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 1d5a7a0b1df47..16182464c12fe 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -372,9 +372,9 @@ void* GPUTPCGMMerger::SetPointersOutput(void* mem) computePointerWithAlignment(mem, mMergedTracksdEdxAlt, mNMaxTracks); } } - computePointerWithAlignment(mem, mClusters, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClusters, mNMaxMergedTrackClusters); if (mRec->GetParam().par.earlyTpcTransform) { - computePointerWithAlignment(mem, mClustersXYZ, mNMaxOutputTrackClusters); + computePointerWithAlignment(mem, mClustersXYZ, mNMaxMergedTrackClusters); } computePointerWithAlignment(mem, mClusterAttachment, mNMaxClusters); return mem; @@ -446,7 +446,7 @@ void GPUTPCGMMerger::SetMaxData(const GPUTrackingInOutPointers& io) mNMaxSingleSectorTracks = ntrk; } } - mNMaxOutputTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); + mNMaxMergedTrackClusters = mRec->MemoryScalers()->NTPCMergedTrackHits(mNClusters); if (CAMath::Abs(Param().polynomialField.GetNominalBz()) < (gpu_common_constants::kZeroFieldCut * gpu_common_constants::kCLight)) { mNMaxTracks = mRec->MemoryScalers()->getValue(mNTotalSectorTracks, mNTotalSectorTracks); // 0 magnetic field } else { @@ -1354,14 +1354,14 @@ GPUd() void GPUTPCGMMerger::MergeCE(int32_t nBlocks, int32_t nThreads, int32_t i continue; } - uint32_t newRef = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); - if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxOutputTrackClusters); - for (uint32_t k = newRef; k < mNMaxOutputTrackClusters; k++) { + uint32_t newRef = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, trk[0]->NClusters() + trk[1]->NClusters()); + if (newRef + trk[0]->NClusters() + trk[1]->NClusters() >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_CE_HIT_OVERFLOW, newRef + trk[0]->NClusters() + trk[1]->NClusters(), mNMaxMergedTrackClusters); + for (uint32_t k = newRef; k < mNMaxMergedTrackClusters; k++) { mClusters[k].num = 0; mClusters[k].state = 0; } - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); return; } @@ -1711,20 +1711,20 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread nHits = nFilteredHits; } - const uint32_t iOutTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nOutputTrackClusters, (uint32_t)nHits); - if (iOutTrackFirstCluster >= mNMaxOutputTrackClusters) { - raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iOutTrackFirstCluster, mNMaxOutputTrackClusters); - CAMath::AtomicExch(&mMemory->nOutputTrackClusters, mNMaxOutputTrackClusters); + const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); + if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); + CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; } - GPUTPCGMMergedTrackHit* const cl = mClusters + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHit* const cl = mClusters + iMergedTrackFirstCluster; for (int32_t i = 0; i < nHits; i++) { uint8_t state; if (Param().par.earlyTpcTransform) { const GPUTPCClusterData& c = GetConstantMem()->tpcTrackers[trackClusters[i].sector].ClusterData()[trackClusters[i].id - GetConstantMem()->tpcTrackers[trackClusters[i].sector].Data().ClusterIdOffset()]; - GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; clXYZ[i].x = c.x; clXYZ[i].y = c.y; clXYZ[i].z = c.z; @@ -1759,13 +1759,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread mergedTrack.SetLooper(leg > 0); mergedTrack.SetLegs(leg); mergedTrack.SetNClusters(nHits); - mergedTrack.SetFirstClusterRef(iOutTrackFirstCluster); + mergedTrack.SetFirstClusterRef(iMergedTrackFirstCluster); GPUTPCGMTrackParam& p1 = mergedTrack.Param(); const GPUTPCGMSectorTrack& p2 = *trackParts[firstTrackIndex]; mergedTrack.SetCSide(p2.CSide()); GPUTPCGMBorderTrack b; - const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iOutTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); + const float toX = Param().par.earlyTpcTransform ? mClustersXYZ[iMergedTrackFirstCluster].x : GPUTPCGeometry::Row2X(cl[0].row); if (p2.TransportToX(this, toX, Param().bzCLight, b, GPUCA_MAX_SIN_PHI, false)) { p1.X() = toX; p1.Y() = b.Par()[0]; @@ -1796,13 +1796,13 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread if (Param().rec.tpc.mergeCE) { bool CEside; if (Param().par.earlyTpcTransform) { - const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iOutTrackFirstCluster; + const GPUTPCGMMergedTrackHitXYZ* const clXYZ = mClustersXYZ + iMergedTrackFirstCluster; CEside = (mergedTrack.CSide() != 0) ^ (clXYZ[0].z > clXYZ[nHits - 1].z); } else { auto& cls = mConstantMem->ioPtrs.clustersNative->clustersLinear; CEside = cls[cl[0].num].getTime() < cls[cl[nHits - 1].num].getTime(); } - MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iOutTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); + MergeCEFill(trackParts[CEside ? lastTrackIndex : firstTrackIndex], cl[CEside ? (nHits - 1) : 0], Param().par.earlyTpcTransform ? &(mClustersXYZ + iMergedTrackFirstCluster)[CEside ? (nHits - 1) : 0] : nullptr, iOutputTrack); } } // itr } @@ -1855,7 +1855,7 @@ GPUd() void GPUTPCGMMerger::PrepareClustersForFit1(int32_t nBlocks, int32_t nThr GPUd() void GPUTPCGMMerger::PrepareClustersForFit2(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread) { - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nBlocks * nThreads) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nBlocks * nThreads) { if (mSharedCount[mClusters[i].num] > 1) { mClusters[i].state |= GPUTPCGMMergedTrackHit::flagShared; } @@ -1876,7 +1876,7 @@ GPUd() void GPUTPCGMMerger::Finalize0(int32_t nBlocks, int32_t nThreads, int32_t for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTracks; i += nThreads * nBlocks) { mTrackSort[mTrackOrderAttach[i]] = i; } - for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nOutputTrackClusters; i += nThreads * nBlocks) { + for (uint32_t i = iBlock * nThreads + iThread; i < mMemory->nMergedTrackClusters; i += nThreads * nBlocks) { mClusterAttachment[mClusters[i].num] = 0; // Reset adjacent attachment for attached clusters, set correctly below } } diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h index 4487b6d937dc2..54a541ebe0fd6 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.h +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.h @@ -70,7 +70,7 @@ class GPUTPCGMMerger : public GPUProcessor GPUAtomic(uint32_t) nLoopData; GPUAtomic(uint32_t) nUnpackedTracks; GPUAtomic(uint32_t) nMergedTracks; - GPUAtomic(uint32_t) nOutputTrackClusters; + GPUAtomic(uint32_t) nMergedTrackClusters; GPUAtomic(uint32_t) nO2Tracks; GPUAtomic(uint32_t) nO2ClusRefs; const GPUTPCTrack* firstExtrapolatedTracks[NSECTORS]; @@ -113,8 +113,8 @@ class GPUTPCGMMerger : public GPUProcessor GPUhdi() uint32_t NClusters() const { return mNClusters; } GPUhdi() uint32_t NMaxClusters() const { return mNMaxClusters; } GPUhdi() uint32_t NMaxTracks() const { return mNMaxTracks; } - GPUhdi() uint32_t NMaxOutputTrackClusters() const { return mNMaxOutputTrackClusters; } - GPUhdi() uint32_t NOutputTrackClusters() const { return mMemory->nOutputTrackClusters; } + GPUhdi() uint32_t NMaxMergedTrackClusters() const { return mNMaxMergedTrackClusters; } + GPUhdi() uint32_t NMergedTrackClusters() const { return mMemory->nMergedTrackClusters; } GPUhdi() const GPUTPCGMMergedTrackHit* Clusters() const { return mClusters; } GPUhdi() GPUTPCGMMergedTrackHit* Clusters() { return (mClusters); } GPUhdi() const GPUTPCGMMergedTrackHitXYZ* ClustersXYZ() const { return mClustersXYZ; } @@ -249,7 +249,7 @@ class GPUTPCGMMerger : public GPUProcessor uint32_t mNTotalSectorTracks = 0; // maximum number of incoming sector tracks uint32_t mNMaxTracks = 0; // maximum number of output tracks uint32_t mNMaxSingleSectorTracks = 0; // max N tracks in one sector - uint32_t mNMaxOutputTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) + uint32_t mNMaxMergedTrackClusters = 0; // max number of clusters in output tracks (double-counting shared clusters) uint32_t mNMaxClusters = 0; // max total unique clusters (in event) uint32_t mNMaxLooperMatches = 0; // Maximum number of candidate pairs for looper matching diff --git a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h index 225f5f0e2c7ad..7306c84cf949c 100644 --- a/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h +++ b/GPU/GPUTracking/SectorTracker/GPUTPCTrack.h @@ -53,9 +53,6 @@ class GPUTPCTrack GPUhd() static int32_t GetSize(int32_t nClust) { return sizeof(GPUTPCTrack) + nClust * sizeof(GPUTPCSectorOutCluster); } GPUhd() const GPUTPCTrack* GetNextTrack() const { return (const GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } GPUhd() GPUTPCTrack* NextTrack() { return (GPUTPCTrack*)(((char*)this) + GetSize(mNHits)); } - GPUhd() void SetOutTrackCluster(int32_t i, const GPUTPCSectorOutCluster& v) { ((GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)))[i] = v; } - GPUhd() const GPUTPCSectorOutCluster* OutTrackClusters() const { return (const GPUTPCSectorOutCluster*)((char*)this + sizeof(*this)); } - GPUhd() const GPUTPCSectorOutCluster& OutTrackCluster(int32_t i) const { return OutTrackClusters()[i]; } private: int32_t mFirstHitID; // index of the first track cell in the track->cell pointer array From c5498af7e0ed421d066b22fc36d34530bcd6c478 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 19:43:59 +0200 Subject: [PATCH 0548/1914] GPU TPC Merger: Fix out of bounds check --- GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx index 16182464c12fe..533e697cc5852 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMMerger.cxx @@ -1513,7 +1513,6 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread GPUTPCGMSectorTrack* trackParts[kMaxParts]; for (int32_t itr = iBlock * nThreads + iThread; itr < SectorTrackInfoLocalTotal(); itr += nThreads * nBlocks) { - GPUTPCGMSectorTrack& track = mSectorTrackInfos[itr]; if (track.PrevSegmentNeighbour() >= 0) { @@ -1712,7 +1711,7 @@ GPUd() void GPUTPCGMMerger::CollectMergedTracks(int32_t nBlocks, int32_t nThread } const uint32_t iMergedTrackFirstCluster = CAMath::AtomicAdd(&mMemory->nMergedTrackClusters, (uint32_t)nHits); - if (iMergedTrackFirstCluster >= mNMaxMergedTrackClusters) { + if (iMergedTrackFirstCluster + nHits > mNMaxMergedTrackClusters) { raiseError(GPUErrors::ERROR_MERGER_HIT_OVERFLOW, iMergedTrackFirstCluster, mNMaxMergedTrackClusters); CAMath::AtomicExch(&mMemory->nMergedTrackClusters, mNMaxMergedTrackClusters); continue; From 0a7f3df8bf7e24304a1d2882061f8589e746598f Mon Sep 17 00:00:00 2001 From: David Rohr Date: Wed, 21 May 2025 21:06:40 +0200 Subject: [PATCH 0549/1914] GPU: Add memoryScaling fuzzing debug option --- GPU/GPUTracking/Base/GPUReconstruction.cxx | 2 +- GPU/GPUTracking/Base/GPUReconstructionCPU.cxx | 8 ++++++ .../DataTypes/GPUMemorySizeScalers.cxx | 26 +++++++++++++++++++ .../DataTypes/GPUMemorySizeScalers.h | 11 +++++--- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + GPU/GPUTracking/Interface/GPUO2Interface.cxx | 2 +- 6 files changed, 45 insertions(+), 5 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstruction.cxx b/GPU/GPUTracking/Base/GPUReconstruction.cxx index a4e5d5e1189f5..ad7a31cbd7470 100644 --- a/GPU/GPUTracking/Base/GPUReconstruction.cxx +++ b/GPU/GPUTracking/Base/GPUReconstruction.cxx @@ -304,7 +304,7 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice() mProcessingSettings->rtc.optConstexpr = false; } - mMemoryScalers->factor = GetProcessingSettings().memoryScalingFactor; + mMemoryScalers->scalingFactor = GetProcessingSettings().memoryScalingFactor; mMemoryScalers->conservative = GetProcessingSettings().conservativeMemoryEstimate; mMemoryScalers->returnMaxVal = GetProcessingSettings().forceMaxMemScalers != 0; if (GetProcessingSettings().forceMaxMemScalers > 1) { diff --git a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx index 5f80a56e9e64e..2d1061616d907 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionCPU.cxx @@ -216,6 +216,14 @@ int32_t GPUReconstructionCPU::ExitDevice() int32_t GPUReconstructionCPU::RunChains() { mMemoryScalers->temporaryFactor = 1.; + if (GetProcessingSettings().memoryScalingFuzz) { + static std::mt19937 rng; + static std::uniform_int_distribution dist(0, 1000000); + uint64_t fuzzFactor = GetProcessingSettings().memoryScalingFuzz == 1 ? dist(rng) : GetProcessingSettings().memoryScalingFuzz; + GPUInfo("Fuzzing memory scaling factor with %lu", fuzzFactor); + mMemoryScalers->fuzzScalingFactor(fuzzFactor); + } + mStatNEvents++; mNEventsProcessed++; diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx index 8b8fbc3ecae20..42ac2e8015f45 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.cxx @@ -15,6 +15,8 @@ #include "GPUMemorySizeScalers.h" #include "GPULogging.h" +#include + using namespace o2::gpu; void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) @@ -36,3 +38,27 @@ void GPUMemorySizeScalers::rescaleMaxMem(size_t newAvailableMemory) tpcMaxMergedTrackHits = (double)tmp.tpcMaxMergedTrackHits * scaleFactor; availableMemory = newAvailableMemory; } + +double GPUMemorySizeScalers::getScalingFactor() +{ + if (!doFuzzing) { + return scalingFactor; + } + static std::uniform_int_distribution dist(0, 1000000); + static std::mt19937 rng; + if (fuzzSeed) { + rng = std::mt19937(fuzzSeed); + fuzzLimit = dist(rng) / 10; + fuzzSeed = 0; + } + if (dist(rng) > fuzzLimit) { + return scalingFactor; + } + return scalingFactor * 0.000001 * dist(rng); +} + +void GPUMemorySizeScalers::fuzzScalingFactor(uint64_t seed) +{ + fuzzSeed = seed; + doFuzzing = true; +} diff --git a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h index e5012d86742f8..164ecb32c26c7 100644 --- a/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h +++ b/GPU/GPUTracking/DataTypes/GPUMemorySizeScalers.h @@ -28,7 +28,9 @@ struct GPUMemorySizeScalers { size_t nITSTracks = 0; // General scaling factor - double factor = 1; + double scalingFactor = 1; + uint64_t fuzzSeed = 0; + uint64_t fuzzLimit = 0; double temporaryFactor = 1; bool conservative = 0; @@ -64,11 +66,14 @@ struct GPUMemorySizeScalers { size_t tpcMaxMergedTrackHits = 200000000; size_t availableMemory = 20500000000; bool returnMaxVal = false; + bool doFuzzing = false; void rescaleMaxMem(size_t newAvailableMemory); + double getScalingFactor(); + void fuzzScalingFactor(uint64_t seed); inline size_t getValue(size_t maxVal, size_t val) { - return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * factor * temporaryFactor); + return returnMaxVal ? maxVal : (std::min(maxVal, offset + val) * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } inline size_t NTPCPeaks(size_t tpcDigits, bool perSector = false) { return getValue(perSector ? tpcMaxPeaks : (GPUCA_NSECTORS * tpcMaxPeaks), hitOffset + tpcDigits * tpcPeaksPerDigit); } @@ -81,7 +86,7 @@ struct GPUMemorySizeScalers { inline size_t NTPCSectorTrackHits(size_t tpcHits, uint8_t withRejection = 0) { return getValue(tpcMaxSectorTrackHits, tpcHits * (withRejection ? tpcSectorTrackHitsPerHitWithRejection : tpcSectorTrackHitsPerHit)); } inline size_t NTPCMergedTracks(size_t tpcSectorTracks) { return getValue(tpcMaxMergedTracks, tpcSectorTracks * (conservative ? 1.0 : tpcMergedTrackPerSectorTrack)); } inline size_t NTPCMergedTrackHits(size_t tpcSectorTrackHitss) { return getValue(tpcMaxMergedTrackHits, tpcSectorTrackHitss * tpcMergedTrackHitPerSectorHit); } - inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * factor * temporaryFactor); } + inline size_t NTPCUnattachedHitsBase1024(int32_t type) { return (returnMaxVal || conservative) ? 1024 : std::min(1024, tpcCompressedUnattachedHitsBase1024[type] * (doFuzzing == 0 ? scalingFactor : getScalingFactor()) * temporaryFactor); } }; } // namespace o2::gpu diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 12f40cda4c398..238994ee53af5 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -312,6 +312,7 @@ AddOption(memoryAllocationStrategy, int8_t, 0, "", 0, "Memory Allocation Strageg AddOption(forceMemoryPoolSize, uint64_t, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ul)) AddOption(forceHostMemoryPoolSize, uint64_t, 0, "hostMemSize", 0, "Force size of allocated host page locked host memory (overriding memSize)", min(0ul)) AddOption(memoryScalingFactor, float, 1.f, "", 0, "Factor to apply to all memory scalers") +AddOption(memoryScalingFuzz, uint64_t, 0, "", 0, "Fuzz the memoryScalingFactor (0 disable, 1 enable, >1 set seed", def(1)) AddOption(conservativeMemoryEstimate, bool, false, "", 0, "Use some more conservative defaults for larger buffers during TPC processing") AddOption(tpcInputWithClusterRejection, uint8_t, 0, "", 0, "Indicate whether the TPC input is CTF data with cluster rejection, to tune buffer estimations") AddOption(forceMaxMemScalers, uint64_t, 0, "", 0, "Force using the maximum values for all buffers, Set a value n > 1 to rescale all maximums to a memory size of n") diff --git a/GPU/GPUTracking/Interface/GPUO2Interface.cxx b/GPU/GPUTracking/Interface/GPUO2Interface.cxx index 81eb2c285192b..f7e972315a739 100644 --- a/GPU/GPUTracking/Interface/GPUO2Interface.cxx +++ b/GPU/GPUTracking/Interface/GPUO2Interface.cxx @@ -110,7 +110,7 @@ int32_t GPUO2Interface::Initialize(const GPUO2InterfaceConfiguration& config) return (1); } if (!mCtx[i].mRec->IsGPU() && mCtx[i].mRec->GetProcessingSettings().memoryAllocationStrategy == GPUMemoryResource::ALLOCATION_INDIVIDUAL) { - mCtx[i].mRec->MemoryScalers()->factor *= 2; + mCtx[i].mRec->MemoryScalers()->scalingFactor *= 2; } } if (mConfig->configProcessing.doublePipeline) { From 2673d512ffe9e1e1f658ace2ccf93ac799501b56 Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Thu, 22 May 2025 11:17:08 +0200 Subject: [PATCH 0550/1914] Drop obsolete documentation (#14309) --- .cmake-format.py | 6 -- Algorithm/CMakeLists.txt | 3 - Algorithm/doc/Algorithm.3.in | 12 --- Algorithm/doc/algorithm_parser.3.in | 135 ---------------------------- CMakeLists.txt | 5 -- Examples/Ex5/CMakeLists.txt | 2 - Examples/Ex5/doc/ex5.7.in | 62 ------------- cmake/O2TargetManPage.cmake | 79 ---------------- doc/CMakeInstructions.md | 18 ---- doc/FairMQDevice.1.in | 64 ------------- doc/ManPages.md | 23 ----- doc/o2-timeframe-file-format.1.in | 27 ------ doc/o2.1.in | 19 ---- 13 files changed, 455 deletions(-) delete mode 100644 Algorithm/doc/Algorithm.3.in delete mode 100644 Algorithm/doc/algorithm_parser.3.in delete mode 100644 Examples/Ex5/doc/ex5.7.in delete mode 100644 cmake/O2TargetManPage.cmake delete mode 100644 doc/FairMQDevice.1.in delete mode 100644 doc/ManPages.md delete mode 100644 doc/o2-timeframe-file-format.1.in delete mode 100644 doc/o2.1.in diff --git a/.cmake-format.py b/.cmake-format.py index 9827eecd329c4..ae092bc09f363 100644 --- a/.cmake-format.py +++ b/.cmake-format.py @@ -66,12 +66,6 @@ "HEADERS": '*', } }, - "o2_target_man_page": { - "kwargs": { - "NAME": '+', - "SECTION": '*', - } - }, "add_root_dictionary": { "kwargs": { "LINKDEF": '+', diff --git a/Algorithm/CMakeLists.txt b/Algorithm/CMakeLists.txt index b245562c7cc93..ed7a42a96e528 100644 --- a/Algorithm/CMakeLists.txt +++ b/Algorithm/CMakeLists.txt @@ -11,9 +11,6 @@ o2_add_header_only_library(Algorithm INTERFACE_LINK_LIBRARIES O2::Headers) -o2_target_man_page(Algorithm NAME Algorithm SECTION 3) -o2_target_man_page(Algorithm NAME algorithm_parser SECTION 3) - o2_add_test(o2formatparser SOURCES test/o2formatparser.cxx COMPONENT_NAME Algorithm diff --git a/Algorithm/doc/Algorithm.3.in b/Algorithm/doc/Algorithm.3.in deleted file mode 100644 index eaf618ee68da2..0000000000000 --- a/Algorithm/doc/Algorithm.3.in +++ /dev/null @@ -1,12 +0,0 @@ -.\" Alice O2 manpage for module Algorithm -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm man page" - -.SH NAME -AliceO2 - module -.B Algorithm - -.SH DESCRIPTION -A collection of generic algorithms for Alice O2 - -.SH SEE ALSO -algorithm_parser(3) diff --git a/Algorithm/doc/algorithm_parser.3.in b/Algorithm/doc/algorithm_parser.3.in deleted file mode 100644 index 98f45df279669..0000000000000 --- a/Algorithm/doc/algorithm_parser.3.in +++ /dev/null @@ -1,135 +0,0 @@ -.\" Alice O2 manpage for parser algorithms -.TH "AliceO2" 3 "17 Jan 2017" "1.0" "Algorithm Parser man page" - -.SH NAME -AliceO2 - module -.B Algorithm -- data parsers - -.SH SYNOPSIS -.B ForwardParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.B ReverseParser< -.I SomeHeaderType -, -.I SomeTrailerType -.B > - -.SS Public types -.TP 2 -// a compound of header, data, and trailer -.B struct FrameInfo { - using PtrT = const PayloadType*; - const HeaderType* header = nullptr; - const TrailerType* trailer = nullptr; - PtrT payload = nullptr; - size_t length = 0; - -.B }; - -.TP 2 -.B using CheckHeaderFct = std::function; -alias for callback checking the header, return true if the object is a valid header -.TP 2 -.B using CheckTrailerFct = std::function; -alias for callback checking the trailer -.TP 2 -.B using GetFrameSizeFct = std::function; -alias for callback to get the complete frame size including header, trailer and the data -.TP 2 -.B using InsertFct = std::function; -function callback to insert/handle one frame into, sequentially called for all frames if the whole block has a valid format - -.SS Public member functions -.TP 2 -.B template -.B int parse(const InputType* \fIbuffer\fB, size_t \fIbufferSize\fB, CheckHeaderFct \fIcheckHeader\fB, CheckTrailerFct \fIcheckTrailer\fB, GetFrameSizeFct \fIgetFrameSize\fB, InsertFct \fIinsert\fB) - -.SS Public member variables -.TP 2 -.B static const size_t headOffset = typesize::size; -the length offset due to header -.TP 2 -.B static const size_t tailOffset = typesize::size; -the length offset due to trailer -.TP 2 -.B static const size_t totalOffset = headOffset + tailOffset; -total length offset due to header and trailer - -.SH DESCRIPTION -Template utilities for parsing of data sequences. Each entry in the sequence consist of a header, variable payload, and optionally a trailer. The three parts are collected in the FrameInfo structure for every entry. - -Callback functions for checking header and trailer integrity, getting length of the current frame and handling of a frame. - -.SS ForwardParser -The size is expected to be part of the header, parsing starts at beginning of buffer. -Trailer type can be void, which is also the default template parameter. That -allows to define a frame consisting of only header and data. - -.SS ReverseParser -The size is expected to be part of the trailer, the parsing is thus in reverse direction. Also the insert callback is called with the entries starting form the end of the buffer. -An easy extension can be to reverse the order of the inserts, meaning that the entries are read from the beginning. - -.SH EXAMPLES -.SS ReverseParser example -.EX -using SomeParser = ReverseParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // check the trailer - return true; - }, - [] (const typename SomeParser::TrailerType& t) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a trailer member - return t.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SS ForwardParser example with frame consisting of header and payload -.EX -using SomeParser = ForwardParser; -SomeParser parser; -std::vector frames; -parser.parse(ptr, size, - [] (const typename SomeParser::HeaderType& h) { - // check the header - return true; - }, - [] (const typename SomeParser::HeaderType& h) { - // get the size of the frame including payload - // and header and trailer size, e.g. payload size - // from a header member - return h.payloadSize + SomeParser::totalOffset; - }, - [&frames] (typename SomeParser::FrameInfo& info) { - frames.emplace_back(info); - return true; - } - ) -.EE - -.SH BUGS, CONTRIBUTIONS -Please add an issue to -.UR https://github.com/AliceO2Group/AliceO2/issues -.UE - -.SH SEE ALSO -.UR https://github.com/AliceO2Group/AliceO2/blob/dev/Algorithm/include/Algorithm/Parser.h -.UE diff --git a/CMakeLists.txt b/CMakeLists.txt index b71d05175e9e9..adecffc0f4dbf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -86,7 +86,6 @@ include(O2AddTestRootMacro) include(O2ReportNonTestedMacros) include(O2TargetRootDictionary) include(O2DataFile) -include(O2TargetManPage) include(O2AddWorkflow) include(O2SetROOTPCMDependencies) include(O2AddHipifiedExecutable) @@ -117,10 +116,6 @@ endif() add_subdirectory(config) -add_custom_target(man ALL) -o2_target_man_page(man NAME o2) -o2_target_man_page(man NAME FairMQDevice) - # Testing and packaging only needed if we are the top level directory if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) # Documentation diff --git a/Examples/Ex5/CMakeLists.txt b/Examples/Ex5/CMakeLists.txt index 3951709a8a050..f6216bcebdad2 100644 --- a/Examples/Ex5/CMakeLists.txt +++ b/Examples/Ex5/CMakeLists.txt @@ -12,5 +12,3 @@ o2_add_executable(ex5 SOURCES src/run.cxx COMPONENT_NAME example TARGETVARNAME targetName) - -o2_target_man_page(${targetName} NAME ex5 SECTION 7) diff --git a/Examples/Ex5/doc/ex5.7.in b/Examples/Ex5/doc/ex5.7.in deleted file mode 100644 index 5d346f7f4798d..0000000000000 --- a/Examples/Ex5/doc/ex5.7.in +++ /dev/null @@ -1,62 +0,0 @@ -.\" Manpage for ex5. - -.\" this file gives some basic introduction on how to use the -.\" roff format to write man pages -.\" NOTE: all formatting commands start with a dot and must be -.\" at the beginning of the line - -.\" the header section -.TH AliceO2 1 "07 July 2019" "1.0" "ex5 man page" - -.\" .SH starts a new section, NAME is the first section -.SH NAME - -ex5 - A simple example for AliceO2 submodules - -.\" next is the SYNOPSIS section -.SH SYNOPSIS - -.\" some bold formatted text -.B ex5 -.\" alternate between roman and bold font, separated by blank, i.e. the -.\" square backets in roman and the option in bold -.RB [ --someoption ] -.\" same here, in addition, the dots indicating the argument string are in -.\" italic. Note: the quoted " [" makes sure there is a blank -.RB [ --debug " [" --gdb=\fI...\fR ]] -.\" italic formatting (underline in man) -.I mandatory_argument -.\" alternate roman and italic -.RI [ further_arguments... ] - - -.SH DESCRIPTION - -ex5 is an example to demonstrate the AliceO2 cmake setup of -modules. This document illustrates creation of man pages. All options and -arument are pure fictive. - -.SH OPTIONS - -.\" indented paragraph with label, indentation is set to the optional number -.TP 5 -.B --someoption -This is a fancy option of the example. - -.TP 5 -.B --debug -Run everything with debugging options - -.TP 5 -.B --gdb=\fI...\fR -Add additional information to run with gdb - -.SH SEE ALSO - -ex5(1) - -http://gnustep.made-it.com/man-groff.html - -.SH BUGS - -No known bugs diff --git a/cmake/O2TargetManPage.cmake b/cmake/O2TargetManPage.cmake deleted file mode 100644 index 5d29447c52536..0000000000000 --- a/cmake/O2TargetManPage.cmake +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2019-2020 CERN and copyright holders of ALICE O2. -# See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. -# All rights not expressly granted are reserved. -# -# This software is distributed under the terms of the GNU General Public -# License v3 (GPL Version 3), copied verbatim in the file "COPYING". -# -# In applying this license CERN does not waive the privileges and immunities -# granted to it by virtue of its status as an Intergovernmental Organization -# or submit itself to any jurisdiction. - -include_guard() - -# Generate a man page -# -# Make sure we have nroff. If that is not the case we will not generate man -# pages -find_program(NROFF_FOUND nroff) - -function(o2_target_man_page target) - if(NOT NROFF_FOUND) - return() - endif() - cmake_parse_arguments(PARSE_ARGV - 1 - A - "" - "NAME;SECTION" - "") - - # check the target exists - if(NOT TARGET ${target}) - # try with out naming conventions - set(baseTargetName ${target}) - o2_name_target(${baseTargetName} NAME target) - if(NOT TARGET ${target}) - # not a library, maybe an executable ? - o2_name_target(${baseTargetName} NAME target IS_EXE) - if(NOT TARGET ${target}) - message(FATAL_ERROR "Target ${target} does not exist") - endif() - endif() - endif() - - if(NOT A_SECTION) - set(A_SECTION 1) - endif() - if(NOT A_NAME) - message( - FATAL_ERROR - "You must provide the name of the input man file in doc/.
.in" - ) - endif() - if(NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in) - message( - FATAL_ERROR - "Input file ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in does not exist" - ) - endif() - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - COMMAND nroff - -Tascii - -man - ${CMAKE_CURRENT_SOURCE_DIR}/doc/${A_NAME}.${A_SECTION}.in - > - ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - VERBATIM) - # the prefix man. for the target name avoids circular dependencies for the man - # pages added at top level. Simply droping the dependency for those does not - # invoke the custom command on all systems. - set(CUSTOM_TARGET_NAME man.${A_NAME}.${A_SECTION}) - add_custom_target(${CUSTOM_TARGET_NAME} - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION}) - add_dependencies(${target} ${CUSTOM_TARGET_NAME}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${A_NAME}.${A_SECTION} - DESTINATION ${CMAKE_INSTALL_DATADIR}/man/man${A_SECTION}) -endfunction() diff --git a/doc/CMakeInstructions.md b/doc/CMakeInstructions.md index c4f55140f611c..e0438c985c41d 100644 --- a/doc/CMakeInstructions.md +++ b/doc/CMakeInstructions.md @@ -349,24 +349,6 @@ Note as well that some (very few) tests are ran only for some configurations ctest -C RelWithDebInfo ``` -#### [Ex5](../Examples/Ex5) Adding a man page - -If a module provides one or more executables, it might be of interest for the users of those executables to have access to a man page for them. Ex5 illustates that use case. - - . - ├── CMakeLists.txt - ├── README.md - ├── doc - │   └── ex5.7.in - └── src - └── run.cxx - -The [man page](ManPages.md) is created using : - - o2_target_man_page([targetName] NAME ex5 SECTION 7) - -where `NAME xx` refers to a file `doc/xx.[SECTION].in`, and the actual `targetName` can be found from the base target name (ex5 in that case) using the [o2_name_target](../cmake/O2NameTarget.cmake) function. - ## CTest In the build directory of O2, if you launch the `ctest` command, all the O2 tests will be ran, which is not always what you want/need, in particular during development. diff --git a/doc/FairMQDevice.1.in b/doc/FairMQDevice.1.in deleted file mode 100644 index 4e33e3379ae82..0000000000000 --- a/doc/FairMQDevice.1.in +++ /dev/null @@ -1,64 +0,0 @@ -.\" Manpage for general FairMQ device properties and options. -.TH AliceO2 1 "10 Apr 2022" "1.1" "FairMQ device man page" -.SH NAME -fair::mq::Device - The basis for software devices in O2 - -.SH SYNOPSIS -.I deviceapp -.BI --id deviceid -.BI --mq-config " configfile" -| -.BI --channel-config " config" -| -.BI --config-json-string " arg" -| -.BI --config-xml-string " arg" - -.SH DESCRIPTION -All AliceO2 devices derive from fair::mq::Device which provides the -transport functionality. - -.SH OPTIONS -.SS Common FairMQ device options -.TP 2 -.BI --id " id" -A unique identifier of the device -.TP 2 -.BI --verbosity " arg " \fR(=DEBUG) -.RS -Verbosity level : -.B TRACE -.B DEBUG -.B RESULTS -.B INFO -.B WARN -.B ERROR -.B STATE -.B NOLOG -.RE - -.SS FairMQ device channel configuration -.TP 2 -.BI --config-xml-string " arg " -XML input as command line string. -.TP 2 -.BI --config-json-string " arg " -JSON input as command line string. -.TP 2 -.BI --mq-config " arg " -JSON/XML input as file. The configuration object will check xml or -json file extention and will call the json or xml parser accordingly -.TP 2 -.BI --channel-config " args " -channel configuration as comma separated key=\fIvalue\fR pairs -.RS -Valid Keys: -.B name -.B type -.B method -.B address -.B property -.RE - -.SH MORE OPTIONS -Use '\fIdeviceapp\fR --help' to get a full list of options. diff --git a/doc/ManPages.md b/doc/ManPages.md deleted file mode 100644 index 8fb7b48b07beb..0000000000000 --- a/doc/ManPages.md +++ /dev/null @@ -1,23 +0,0 @@ - - -# Man Pages - -You can create man pages in nroff format under: - - Subsystem/Module/docs/.
.in - -and it will create a man page for you in: - - ${CMAKE_BINARY_DIR}/stage/share/man/man
- -if you add: - - o2_target_man_page(target NAME SECTION
) - -to your `CMakeLists.txt`. Note the man page is "attached" to a given target. -If `SECTION` is omitted it will default to 1 -(executables). For more informantion about nroff format you can look at: - - http://www.linuxjournal.com/article/1158 diff --git a/doc/o2-timeframe-file-format.1.in b/doc/o2-timeframe-file-format.1.in deleted file mode 100644 index df36ff7256e33..0000000000000 --- a/doc/o2-timeframe-file-format.1.in +++ /dev/null @@ -1,27 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 Timeframe Format" - -.SH DESCRIPTION - -O2 is Alice next generation software framework to be used for RUN3. This is a -quick desctiption of the timeframe file format as dumped by -o2-timeframe-writer-device and read by the o2-timeframe-reader-device. - -The file format is simply a dump of the timeframe on disk. Multiple timeframes -can be concatenated resulting in a valid file. The format is as follow: - -o2tf: Timeframe [Timeframe [..]] -Timeframe: Subtimeframe [Subtimeframe [...]] TimeframeIndex -Subtimeframe: Header Payload -Header: DataHeader derived header stack -Payload: binary blob -TimeframeIndex: IndexElement [IndexElement [..]] -IndexElement: DataHeader Payload -Position in timeframe: int (4 bytes) -DataHeader: only the DataHeader part -Payload: binary blob - -.SH DISCLAIMER - -Notice that this file format is a work in progress and cannot be used for -anything but debugging purposes. diff --git a/doc/o2.1.in b/doc/o2.1.in deleted file mode 100644 index 57d74acf1640d..0000000000000 --- a/doc/o2.1.in +++ /dev/null @@ -1,19 +0,0 @@ -.\" Manpage for O2. -.TH man 1 "19 May 2017" "1.0" "Alice O2 man page" - -.SH NAME - -O2 is Alice next generation software framework to be used for RUN3. - -.SH DEVICES - -o2-alicehlt-wrapper-device(1), o2-subframebuilder-device(1) - -.\.SH TOOLS - -.SH SEE ALSO -FairMQDevice(1) - -.SH BUGS - -No bugs whatsoever From b41a2a1e13cd537654c05d5dedb25b9c63e473da Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 14:44:22 +0200 Subject: [PATCH 0551/1914] Update GPU documentation build-standalone.md --- GPU/documentation/build-standalone.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/GPU/documentation/build-standalone.md b/GPU/documentation/build-standalone.md index 891d16b4dc2c4..bf84fd3edf0fb 100644 --- a/GPU/documentation/build-standalone.md +++ b/GPU/documentation/build-standalone.md @@ -55,9 +55,20 @@ An example line would .e.g. be ``` Some other noteworthy options are `--display` to run the GPU event display, `--qa` to run a QA task on MC data, `--runs` and `--runs2` to run multiple iterations of the benchmark, `--printSettings` to print all the settings that were used, `--memoryStat` to print memory statistics, `--sync` to run with settings for online reco, `--syncAsync` to run online reco first, and then offline reco on the produced TPC CTF data, `--setO2Settings` to use some defaults as they are in O2 not in the standalone version, `--PROCdoublePipeline` to enable the double-threaded pipeline for best performance (works only with multiple iterations, and not in async mode), and `--RTCenable` to enable the run time compilation improvements (check also `--RTCcacheOutput`). -An example for a benchmark in online mode would be: +With `--memSize` you can control the amount of GPU memory to use, and with `--inputMemory` and `--outputMemory` GPU-registered input/output buffers can be preallocated (as is the SHM memory when running in O2). +An example for a benchmark that runs with the same settings as in online data taking would be: ``` -./ca -e o2-pbpb-100 -g --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 +./ca -e o2-pbpb-100 -g --gpuType HIP --sync --setO2Settings --PROCdoublePipeline --RTCenable --runs 10 --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 +``` + +For setting a GPU device, you can use the `--gpuDevice` option with the GPU index. +For ROCm with many GPUs, however, like on the EPNs with 8 GPUs, it is better to set the `ROCR_VISIBLE_DEVICES` env variable to the GPU you want to use. +MAKE SURE TO CHECK IF IT IS ALREADY SET BY SLURM WHEN YOU GET THE NODE!!! IN THAT CASE, USE ONLY THE GPUS ASSIGNED TO YOU BY SLURM! + +Finally, also NUMA pinning can play a role. On the EPN, you should use memory and GPUs and CPU cores from the same NUMA domain. +For a reaslistic benchmark using GPU 0 on the EPNs, please use: +``` +ROCR_VISIBLE_DEVICES=0 numactl --membind 0 --cpunodebind 0 ./ca -e o2-pbpb-100 --gpuType HIP --memSize 15000000000 --inputMemory 6000000000 --outputMemory 10000000000 --sync --runs 10 --RTCenable --setO2Settings --PROCdoublePipeline ``` # Generating a dataset @@ -84,3 +95,5 @@ To dump standalone data from CTF raw data in `myctf.root`, you can use the same ``` CTFINPUT=1 INPUT_FILE_LIST=myctf.root CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow="GPU_global.dump=1;" WORKFLOW_DETECTORS=TPC SHMSIZE=16000000000 $O2_ROOT/prodtests/full-system-test/dpl-workflow.sh ``` + +On the EPNs, you can find some reference data sets at `/home/drohr/standalone/events`. From 010f8676d1c1e4849281bc18405192dea0c9bc58 Mon Sep 17 00:00:00 2001 From: Ernst Hellbar Date: Wed, 21 May 2025 09:47:17 +0200 Subject: [PATCH 0552/1914] dpl-workflow.sh: using MI100 serialization workaround by default again, with option to disable it with env var --- prodtests/full-system-test/dpl-workflow.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/prodtests/full-system-test/dpl-workflow.sh b/prodtests/full-system-test/dpl-workflow.sh index 4e6cbbebe7db7..dd575099857f4 100755 --- a/prodtests/full-system-test/dpl-workflow.sh +++ b/prodtests/full-system-test/dpl-workflow.sh @@ -270,6 +270,7 @@ if [[ $GPUTYPE == "HIP" ]]; then TIMESLICEOFFSET=$(($GPU_FIRST_ID + ($NUMAGPUIDS != 0 ? ($NGPUS * $NUMAID) : 0))) GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\"" fi + [[ $EPNSYNCMODE == 1 || ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;" #export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2 else GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;" From b6f15f87e212a896e8c56f3cf475d1ef2c677889 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:53:13 +0200 Subject: [PATCH 0553/1914] GPU RTC: Add keepTempFiles option --- GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu | 6 ++++-- GPU/GPUTracking/Definitions/GPUSettingsList.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index c8e5420a8bcf3..907bd08779ec2 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -420,8 +420,10 @@ void GPUReconstructionCUDA::genAndLoadRTC() mInternals->kernelModules.emplace_back(std::make_unique()); GPUChkErr(cuModuleLoad(mInternals->kernelModules.back().get(), (filename + "_" + std::to_string(i) + mRtcBinExtension).c_str())); } - remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); - remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + if (!GetProcessingSettings().rtctech.keepTempFiles) { + remove((filename + "_" + std::to_string(i) + mRtcSrcExtension).c_str()); + remove((filename + "_" + std::to_string(i) + mRtcBinExtension).c_str()); + } } if (GetProcessingSettings().rtctech.runTest == 2) { return; diff --git a/GPU/GPUTracking/Definitions/GPUSettingsList.h b/GPU/GPUTracking/Definitions/GPUSettingsList.h index 238994ee53af5..8dabd17f95d23 100644 --- a/GPU/GPUTracking/Definitions/GPUSettingsList.h +++ b/GPU/GPUTracking/Definitions/GPUSettingsList.h @@ -233,6 +233,7 @@ AddOption(cacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the c AddOption(prependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string") AddOption(overrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line") // Part of cmdLine, so checked against the cache AddOption(loadLaunchBoundsFromFile, std::string, "", "", 0, "Load a parameter object containing the launch bounds from a file") +AddOption(keepTempFiles, bool, false, "", 0, "Keep temporary source and object files") AddHelp("help", 'h') EndConfig() From e060099977dac22c9eea41bc27234e45ee590a27 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 10:16:48 +0200 Subject: [PATCH 0554/1914] GPU: Fix CUDA GetMemInfo must use correct device and simplify context creation / cleanup --- .../Base/cuda/GPUReconstructionCUDA.cu | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu index 907bd08779ec2..0d5666b8ee790 100644 --- a/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu +++ b/GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu @@ -125,34 +125,25 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } std::vector devicesOK(count, false); std::vector devMemory(count, 0); - bool contextCreated = false; + std::vector contextCreated(count, false); for (int32_t i = 0; i < count; i++) { if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Examining device %d", i); } size_t free, total; -#ifndef __HIPCC__ // CUDA - if (GPUChkErrI(cudaInitDevice(i, 0, 0))) { -#else // HIP - if (GPUChkErrI(hipSetDevice(i))) { -#endif + if (GPUChkErrI(cudaSetDevice(i))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Couldn't create context for device %d. Skipping it.", i); } continue; } - contextCreated = true; + contextCreated[i] = true; if (GPUChkErrI(cudaMemGetInfo(&free, &total))) { if (GetProcessingSettings().debugLevel >= 4) { GPUWarning("Error obtaining CUDA memory info about device %d! Skipping it.", i); } - GPUChkErr(cudaDeviceReset()); continue; } - if (count > 1) { - GPUChkErr(cudaDeviceReset()); - contextCreated = false; - } if (GetProcessingSettings().debugLevel >= 4) { GPUInfo("Obtained current memory usage for device %d", i); } @@ -212,13 +203,20 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() bestDevice = GetProcessingSettings().deviceNum; } } - if (noDevice) { - if (contextCreated) { + for (int32_t i = 0; i < count; i++) { + if (contextCreated[i] && (noDevice || i != bestDevice)) { + GPUChkErrI(cudaSetDevice(i)); GPUChkErrI(cudaDeviceReset()); } + } + if (noDevice) { return (1); } mDeviceId = bestDevice; + if (GPUChkErrI(cudaSetDevice(mDeviceId))) { + GPUError("Could not set CUDA Device!"); + return (1); + } GPUChkErrI(cudaGetDeviceProperties(&deviceProp, mDeviceId)); @@ -262,15 +260,6 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime() } #endif -#ifndef __HIPCC__ // CUDA - if (contextCreated == 0 && GPUChkErrI(cudaInitDevice(mDeviceId, 0, 0))) { -#else // HIP - if (contextCreated == 0 && GPUChkErrI(hipSetDevice(mDeviceId))) { -#endif - GPUError("Could not set CUDA Device!"); - return (1); - } - #ifndef __HIPCC__ // CUDA if (GPUChkErrI(cudaDeviceSetLimit(cudaLimitStackSize, GPUCA_GPU_STACK_SIZE))) { GPUError("Error setting CUDA stack size"); From 52937edaf56f75f1e347141e4105a31a95c55fc9 Mon Sep 17 00:00:00 2001 From: David Rohr Date: Thu, 22 May 2025 13:43:20 +0200 Subject: [PATCH 0555/1914] GPU Display: Store pointer to GPUSettingsProcessing, so we do not need to copy debugLevel to GPUParam --- .../Base/GPUReconstructionTimeframe.cxx | 3 +-- .../Interface/GPUO2InterfaceDisplay.cxx | 2 +- GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx | 2 +- GPU/GPUTracking/display/GPUDisplay.cxx | 18 ++++++++++-------- GPU/GPUTracking/display/GPUDisplay.h | 8 +++++++- .../display/GPUDisplayInterface.cxx | 4 ++-- GPU/GPUTracking/display/GPUDisplayInterface.h | 14 ++++++++++++-- .../backend/GPUDisplayBackendOpenGL.cxx | 2 +- .../backend/GPUDisplayBackendVulkan.cxx | 8 ++++---- .../frontend/GPUDisplayFrontendWayland.cxx | 6 +++--- .../display/helpers/GPUDisplayLoader.cxx | 15 +++++++++++++-- .../display/render/GPUDisplayDraw.cxx | 2 +- GPU/GPUTracking/qa/GPUQA.cxx | 2 +- GPU/GPUTracking/qa/genEvents.cxx | 3 +-- 14 files changed, 58 insertions(+), 31 deletions(-) diff --git a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx index b25b93e957b15..fefcd0ac925fe 100644 --- a/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx +++ b/GPU/GPUTracking/Base/GPUReconstructionTimeframe.cxx @@ -25,14 +25,13 @@ #include "TPCFastTransform.h" #include "CorrectionMapsHelper.h" #include "GPUO2DataTypes.h" +#include "GPUSettings.h" #include #include #include #include -#include "utils/qconfig.h" - using namespace o2::gpu; namespace o2::gpu diff --git a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx index f84f29d826f1d..60d5eaf9ae162 100644 --- a/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx +++ b/GPU/GPUTracking/Interface/GPUO2InterfaceDisplay.cxx @@ -35,7 +35,7 @@ GPUO2InterfaceDisplay::GPUO2InterfaceDisplay(const GPUO2InterfaceConfiguration* mQA.reset(new GPUQA(nullptr, &config->configQA, mParam.get())); mQA->InitO2MCData(); } - mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay)); + mDisplay.reset(GPUDisplayInterface::getDisplay(mFrontend.get(), nullptr, mQA.get(), mParam.get(), &mConfig->configCalib, &mConfig->configDisplay, &mConfig->configProcessing)); } GPUO2InterfaceDisplay::~GPUO2InterfaceDisplay() = default; diff --git a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx index aed42e4f98f0c..9fb12432e763a 100644 --- a/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx +++ b/GPU/GPUTracking/Merger/GPUTPCGMTrackParam.cxx @@ -42,7 +42,7 @@ #include "GPUGetConstexpr.h" #ifdef GPUCA_CADEBUG_ENABLED -#include "../utils/qconfig.h" +#include "GPUSettings.h" #include "AliHLTTPCClusterMCData.h" #endif diff --git a/GPU/GPUTracking/display/GPUDisplay.cxx b/GPU/GPUTracking/display/GPUDisplay.cxx index 5b0960919da15..136b1947f60ee 100644 --- a/GPU/GPUTracking/display/GPUDisplay.cxx +++ b/GPU/GPUTracking/display/GPUDisplay.cxx @@ -34,7 +34,7 @@ #include "GPUTPCTracker.h" #include "GPUTPCGMMergedTrack.h" #include "GPUO2DataTypes.h" -#include "utils/qconfig.h" +#include "GPUSettings.h" #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" @@ -44,17 +44,19 @@ constexpr hmm_mat4 MY_HMM_IDENTITY = {{{1, 0, 0, 0}, {0, 1, 0, 0}, {0, 0, 1, 0}, using namespace o2::gpu; -static const GPUSettingsDisplay& GPUDisplay_GetConfig(GPUChainTracking* chain) +const GPUSettingsDisplay& GPUDisplay::GetConfig(GPUChainTracking* chain) { static GPUSettingsDisplay defaultConfig; - if (chain && chain->mConfigDisplay) { - return *chain->mConfigDisplay; - } else { - return defaultConfig; - } + return (chain && chain->mConfigDisplay) ? *chain->mConfigDisplay : defaultConfig; +} + +const GPUSettingsProcessing& GPUDisplay::GetProcessingConfig(GPUChainTracking* chain) +{ + static GPUSettingsProcessing defaultConfig; + return chain ? chain->GetProcessingSettings() : defaultConfig; } -GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GPUDisplay_GetConfig(chain)), mQA(qa) +GPUDisplay::GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) : GPUDisplayInterface(), mFrontend(frontend), mChain(chain), mConfig(config ? *config : GetConfig(chain)), mProcessingSettings(proc ? *proc : GetProcessingConfig(chain)), mQA(qa) { mParam = param ? param : &mChain->GetParam(); mCalib = calib; diff --git a/GPU/GPUTracking/display/GPUDisplay.h b/GPU/GPUTracking/display/GPUDisplay.h index dbd90020698b2..06977c26e0b63 100644 --- a/GPU/GPUTracking/display/GPUDisplay.h +++ b/GPU/GPUTracking/display/GPUDisplay.h @@ -18,6 +18,7 @@ #include "frontend/GPUDisplayFrontend.h" #include "backend/GPUDisplayBackend.h" #include "GPUDisplayInterface.h" +#include "GPUSettings.h" #include "../utils/vecpod.h" #include "../utils/qsem.h" @@ -37,7 +38,7 @@ class GPUTRDGeometry; class GPUDisplay : public GPUDisplayInterface { public: - GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr); + GPUDisplay(GPUDisplayFrontend* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param = nullptr, const GPUCalibObjectsConst* calib = nullptr, const GPUSettingsDisplay* config = nullptr, const GPUSettingsProcessing* proc = nullptr); GPUDisplay(const GPUDisplay&) = delete; ~GPUDisplay() override = default; @@ -71,6 +72,7 @@ class GPUDisplay : public GPUDisplayInterface }; vecpod* vertexBuffer() { return mVertexBuffer; } const GPUParam* param() { return mParam; } + const GPUSettingsProcessing& GetProcessingSettings() const { return mProcessingSettings; } GPUDisplayFrontend* frontend() { return mFrontend; } bool drawTextInCompatMode() const { return mDrawTextInCompatMode; } int32_t& drawTextFontSize() { return mDrawTextFontSize; } @@ -140,6 +142,9 @@ class GPUDisplay : public GPUDisplayInterface bool mVerbose = false; }; + static const GPUSettingsDisplay& GetConfig(GPUChainTracking* chain); + static const GPUSettingsProcessing& GetProcessingConfig(GPUChainTracking* chain); + void DrawGLScene_internal(float animateTime = -1.f, bool renderToMixBuffer = false); void DrawGLScene_updateEventData(); void DrawGLScene_cameraAndAnimation(float animateTime, float& mixSlaveImage, hmm_mat4& nextViewMatrix); @@ -214,6 +219,7 @@ class GPUDisplay : public GPUDisplayInterface GPUSettingsDisplayLight mCfgL; GPUSettingsDisplayHeavy mCfgH; GPUSettingsDisplayRenderer mCfgR; + const GPUSettingsProcessing& mProcessingSettings; GPUQA* mQA; qSem mSemLockDisplay; diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.cxx b/GPU/GPUTracking/display/GPUDisplayInterface.cxx index 2f5cc9cbb5dd5..2eddef998fa8b 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.cxx +++ b/GPU/GPUTracking/display/GPUDisplayInterface.cxx @@ -65,9 +65,9 @@ static void* loadUnloadLib(bool load) return nullptr; } -GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config) +GPUDisplayInterface* GPUDisplayInterface::getDisplay(GPUDisplayFrontendInterface* frontend, GPUChainTracking* chain, GPUQA* qa, const GPUParam* param, const GPUCalibObjectsConst* calib, const GPUSettingsDisplay* config, const GPUSettingsProcessing* proc) { - std::tuple args = {frontend, chain, qa, param, calib, config}; + std::tuple args = {frontend, chain, qa, param, calib, config, proc}; auto func = (GPUDisplayInterface * (*)(const char*, void*)) loadUnloadLib(true); return func ? func("display", &args) : nullptr; } diff --git a/GPU/GPUTracking/display/GPUDisplayInterface.h b/GPU/GPUTracking/display/GPUDisplayInterface.h index 3c6928c78e5a1..574a8cffc71f0 100644 --- a/GPU/GPUTracking/display/GPUDisplayInterface.h +++ b/GPU/GPUTracking/display/GPUDisplayInterface.h @@ -15,7 +15,7 @@ #ifndef GPUDISPLAYINTERFACE_H #define GPUDISPLAYINTERFACE_H -#include "GPUSettings.h" +#include namespace o2::gpu { @@ -23,6 +23,16 @@ namespace o2::gpu class GPUChainTracking; class GPUQA; struct GPUParam; +struct GPUTrackingInOutPointers; +template +struct ConstPtr; +template