Skip to content

Commit 02a3d6a

Browse files
committed
GPU: Add possibility to save/dump TPC digits for standalone benchmark
1 parent 2642049 commit 02a3d6a

11 files changed

Lines changed: 72 additions & 54 deletions

File tree

Detectors/TPC/reconstruction/src/GPUCATracking.cxx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,24 +82,24 @@ int GPUCATracking::runTracking(GPUO2InterfaceIOPtrs* data)
8282

8383
const ClusterNativeAccess* clusters;
8484
std::vector<gpucf::PackedDigit> gpuDigits[Sector::MAXSECTOR];
85+
GPUTrackingInOutDigits gpuDigitsMap;
8586
GPUTrackingInOutPointers ptrs;
8687
if (data->o2Digits) {
8788
ptrs.clustersNative = nullptr;
88-
ptrs.tpcRaw = 0;
8989
for (int i = 0; i < Sector::MAXSECTOR; i++) {
9090
const std::vector<o2::tpc::Digit>& d = (*(data->o2Digits))[i];
9191
gpuDigits[i].reserve(d.size());
92+
gpuDigitsMap.tpcDigits[i] = gpuDigits[i].data();
9293
for (int j = 0; j < d.size(); j++) {
9394
gpuDigits[i].emplace_back(gpucf::PackedDigit{d[j].getChargeFloat(), (gpucf::timestamp)d[j].getTimeStamp(), (gpucf::pad_t)d[j].getPad(), (gpucf::row_t)d[j].getRow()});
9495
}
95-
ptrs.tpcRaw += d.size();
96-
ptrs.tpcDigits[i] = gpuDigits[i].data();
97-
ptrs.nTPCDigits[i] = gpuDigits[i].size();
96+
gpuDigitsMap.nTPCDigits[i] = gpuDigits[i].size();
9897
}
98+
ptrs.tpcPackedDigits = &gpuDigitsMap;
9999
} else {
100100
clusters = data->clusters;
101101
ptrs.clustersNative = clusters;
102-
ptrs.tpcRaw = 0;
102+
ptrs.tpcPackedDigits = nullptr;
103103
}
104104
int retVal = mTrackingCAO2Interface->RunTracking(&ptrs);
105105
if (data->o2Digits) {

GPU/GPUTracking/Base/GPUDataTypes.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,9 @@ class GPUDataTypes
131131
typedef bitfield<RecoStep, unsigned int> RecoStepField;
132132
typedef bitfield<InOutType, unsigned int> InOutTypeField;
133133
#endif
134-
134+
#ifdef GPUCA_NOCOMPAT
135+
static constexpr unsigned int NSLICES = 36;
136+
#endif
135137
static DeviceType GetDeviceType(const char* type);
136138
};
137139

@@ -157,14 +159,18 @@ struct GPUCalibObjectsConst { // TODO: Any chance to do this as template?
157159
const o2::trd::TRDGeometryFlat* trdGeometry = nullptr;
158160
};
159161

162+
struct GPUTrackingInOutDigits {
163+
static constexpr unsigned int NSLICES = GPUDataTypes::NSLICES;
164+
const gpucf::PackedDigit* tpcDigits[NSLICES] = {nullptr};
165+
unsigned long long int nTPCDigits[NSLICES] = {0};
166+
};
167+
160168
struct GPUTrackingInOutPointers {
161169
GPUTrackingInOutPointers() = default;
162170
GPUTrackingInOutPointers(const GPUTrackingInOutPointers&) = default;
163-
static constexpr unsigned int NSLICES = 36;
171+
static constexpr unsigned int NSLICES = GPUDataTypes::NSLICES;
164172

165-
size_t tpcRaw = 0;
166-
const gpucf::PackedDigit* tpcDigits[NSLICES] = {nullptr};
167-
size_t nTPCDigits[NSLICES] = {0};
173+
GPUTrackingInOutDigits* tpcPackedDigits = nullptr;
168174
const GPUTPCClusterData* clusterData[NSLICES] = {nullptr};
169175
unsigned int nClusterData[NSLICES] = {0};
170176
const AliHLTTPCRawCluster* rawClusters[NSLICES] = {nullptr};

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,11 @@ class GPUReconstruction
9595
RAW_CLUSTERS = 9,
9696
CLUSTERS_NATIVE = 10,
9797
TRD_TRACKLET_MC = 11,
98-
TPC_COMPRESSED_CL = 12 };
98+
TPC_COMPRESSED_CL = 12,
99+
TPC_DIGIT = 13 };
99100
static constexpr const char* const IOTYPENAMES[] = {"TPC Clusters", "TPC Slice Tracks", "TPC Slice Track Clusters", "TPC Cluster MC Labels", "TPC Track MC Informations", "TPC Tracks", "TPC Track Clusters", "TRD Tracks", "TRD Tracklets",
100-
"Raw Clusters", "ClusterNative", "TRD Tracklet MC Labels", "TPC Compressed Clusters"};
101+
"Raw Clusters", "ClusterNative", "TRD Tracklet MC Labels", "TPC Compressed Clusters", "TPC Digit"};
102+
static unsigned int getNIOTypeMultiplicity(InOutPointerType type) { return (type == CLUSTER_DATA || type == SLICE_OUT_TRACK || type == SLICE_OUT_CLUSTER || type == RAW_CLUSTERS || type == TPC_DIGIT) ? NSLICES : 1; }
101103

102104
// Functionality to create an instance of GPUReconstruction for the desired device
103105
static GPUReconstruction* CreateInstance(const GPUSettingsProcessing& cfg);
@@ -223,10 +225,10 @@ class GPUReconstruction
223225
size_t AllocateRegisteredPermanentMemory();
224226

225227
// Private helper functions for reading / writing / allocating IO buffer from/to file
226-
template <class T>
227-
void DumpData(FILE* fp, const T* const* entries, const unsigned int* num, InOutPointerType type);
228-
template <class T>
229-
size_t ReadData(FILE* fp, const T** entries, unsigned int* num, std::unique_ptr<T[]>* mem, InOutPointerType type);
228+
template <class T, class S>
229+
void DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type);
230+
template <class T, class S>
231+
size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type);
230232
template <class T>
231233
void AllocateIOMemoryHelper(unsigned int n, const T*& ptr, std::unique_ptr<T[]>& u);
232234

@@ -390,15 +392,10 @@ inline void GPUReconstruction::SetupGPUProcessor(T* proc, bool allocate)
390392
}
391393
}
392394

393-
template <class T>
394-
inline void GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const unsigned int* num, InOutPointerType type)
395+
template <class T, class S>
396+
inline void GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type)
395397
{
396-
int count;
397-
if (type == CLUSTER_DATA || type == SLICE_OUT_TRACK || type == SLICE_OUT_CLUSTER || type == RAW_CLUSTERS) {
398-
count = NSLICES;
399-
} else {
400-
count = 1;
401-
}
398+
int count = getNIOTypeMultiplicity(type);
402399
unsigned int numTotal = 0;
403400
for (int i = 0; i < count; i++) {
404401
numTotal += num[i];
@@ -415,8 +412,8 @@ inline void GPUReconstruction::DumpData(FILE* fp, const T* const* entries, const
415412
}
416413
}
417414

418-
template <class T>
419-
inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, unsigned int* num, std::unique_ptr<T[]>* mem, InOutPointerType type)
415+
template <class T, class S>
416+
inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type)
420417
{
421418
if (feof(fp)) {
422419
return 0;
@@ -429,12 +426,7 @@ inline size_t GPUReconstruction::ReadData(FILE* fp, const T** entries, unsigned
429426
return 0;
430427
}
431428

432-
int count;
433-
if (type == CLUSTER_DATA || type == SLICE_OUT_TRACK || type == SLICE_OUT_CLUSTER || type == RAW_CLUSTERS) {
434-
count = NSLICES;
435-
} else {
436-
count = 1;
437-
}
429+
int count = getNIOTypeMultiplicity(type);
438430
size_t numTotal = 0;
439431
for (int i = 0; i < count; i++) {
440432
r = fread(&num[i], sizeof(num[i]), 1, fp);

GPU/GPUTracking/Base/GPUSettings.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ void GPUSettingsEvent::SetDefaults()
5151
constBz = 0;
5252
homemadeEvents = 0;
5353
continuousMaxTimeBin = 0;
54+
needsClusterer = 0;
5455
}
5556

5657
void GPUSettingsProcessing::SetDefaults()

GPU/GPUTracking/Base/GPUSettings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ struct GPUSettingsEvent {
9090
int constBz; // for test-MC events with constant Bz
9191
int homemadeEvents; // Toy-MC events
9292
int continuousMaxTimeBin; // 0 for triggered events, -1 for default of 23ms
93+
int needsClusterer; // Set to true if the data requires the clusterizer
9394
};
9495

9596
// Settings defining the setup of the GPUReconstruction processing (basically selecting the device / class instance)

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ class GPUChain
102102
{
103103
mRec->AllocateIOMemoryHelper<T>(n, ptr, u);
104104
}
105-
template <class T>
106-
void DumpData(FILE* fp, const T* const* entries, const unsigned int* num, InOutPointerType type)
105+
template <class T, class S>
106+
void DumpData(FILE* fp, const T* const* entries, const S* num, InOutPointerType type)
107107
{
108108
mRec->DumpData<T>(fp, entries, num, type);
109109
}
110-
template <class T>
111-
size_t ReadData(FILE* fp, const T** entries, unsigned int* num, std::unique_ptr<T[]>* mem, InOutPointerType type)
110+
template <class T, class S>
111+
size_t ReadData(FILE* fp, const T** entries, S* num, std::unique_ptr<T[]>* mem, InOutPointerType type)
112112
{
113113
return mRec->ReadData<T>(fp, entries, num, mem, type);
114114
}

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -281,23 +281,24 @@ void GPUChainTracking::PrepareEventFromNative()
281281
int GPUChainTracking::PrepareEvent()
282282
{
283283
mRec->MemoryScalers()->nTRDTracklets = mIOPtrs.nTRDTracklets;
284-
if (mIOPtrs.tpcRaw) {
285-
mRec->MemoryScalers()->nTPCdigits = mIOPtrs.tpcRaw;
286-
mRec->MemoryScalers()->nTPCHits = mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits);
284+
if (mIOPtrs.tpcPackedDigits) {
285+
mRec->MemoryScalers()->nTPCdigits = 0;
287286
size_t maxDigits = 0;
288287
for (unsigned int iSlice = 0; iSlice < NSLICES; iSlice++) {
289-
if (mIOPtrs.nTPCDigits[iSlice] > maxDigits) {
290-
maxDigits = mIOPtrs.nTPCDigits[iSlice];
288+
mRec->MemoryScalers()->nTPCdigits += mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice];
289+
if (mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice] > maxDigits) {
290+
maxDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice];
291291
}
292-
processors()->tpcTrackers[iSlice].Data().SetClusterData(nullptr, mRec->MemoryScalers()->NTPCClusters(mIOPtrs.nTPCDigits[iSlice]), 0); // TODO: fixme
293292
}
294-
// Distribute maximum digits, so that we can reuse the memory easily
295293
for (unsigned int iSlice = 0; iSlice < NSLICES; iSlice++) {
294+
processors()->tpcTrackers[iSlice].Data().SetClusterData(nullptr, mRec->MemoryScalers()->NTPCClusters(mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice]), 0); // TODO: fixme
295+
// Distribute maximum digits, so that we can reuse the memory easily
296296
processors()->tpcClusterer[iSlice].SetNMaxDigits(maxDigits);
297297
}
298+
mRec->MemoryScalers()->nTPCHits = mRec->MemoryScalers()->NTPCClusters(mRec->MemoryScalers()->nTPCdigits);
298299
processors()->tpcCompressor.mMaxClusters = mRec->MemoryScalers()->nTPCHits;
299300
processors()->tpcConverter.mNClustersTotal = mRec->MemoryScalers()->nTPCHits;
300-
GPUInfo("Event has %lld TPC Digits", (long long int)mIOPtrs.tpcRaw);
301+
GPUInfo("Event has %lld TPC Digits", (long long int)mRec->MemoryScalers()->nTPCdigits);
301302
} else if (mIOPtrs.clustersNative) {
302303
PrepareEventFromNative();
303304
} else {
@@ -402,6 +403,9 @@ void GPUChainTracking::DumpData(const char* filename)
402403
DumpData(fp, &mIOPtrs.clustersNative->clustersLinear, &mIOPtrs.clustersNative->nClustersTotal, InOutPointerType::CLUSTERS_NATIVE);
403404
fwrite(&mIOPtrs.clustersNative->nClusters[0][0], sizeof(mIOPtrs.clustersNative->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp);
404405
}
406+
if (mIOPtrs.tpcPackedDigits) {
407+
DumpData(fp, mIOPtrs.tpcPackedDigits->tpcDigits, mIOPtrs.tpcPackedDigits->nTPCDigits, InOutPointerType::TPC_DIGIT);
408+
}
405409
DumpData(fp, mIOPtrs.sliceOutTracks, mIOPtrs.nSliceOutTracks, InOutPointerType::SLICE_OUT_TRACK);
406410
DumpData(fp, mIOPtrs.sliceOutClusters, mIOPtrs.nSliceOutClusters, InOutPointerType::SLICE_OUT_CLUSTER);
407411
DumpData(fp, &mIOPtrs.mcLabelsTPC, &mIOPtrs.nMCLabelsTPC, InOutPointerType::MC_LABEL_TPC);
@@ -486,6 +490,10 @@ int GPUChainTracking::ReadData(const char* filename)
486490
r = fread(&mClusterNativeAccess->nClusters[0][0], sizeof(mClusterNativeAccess->nClusters[0][0]), NSLICES * GPUCA_ROW_COUNT, fp);
487491
mClusterNativeAccess->setOffsetPtrs();
488492
}
493+
mDigitMap.reset(new GPUTrackingInOutDigits);
494+
if (ReadData(fp, mDigitMap->tpcDigits, mDigitMap->nTPCDigits, mIOMem.tpcDigits, InOutPointerType::TPC_DIGIT)) {
495+
mIOPtrs.tpcPackedDigits = mDigitMap.get();
496+
}
489497
#endif
490498
ReadData(fp, mIOPtrs.sliceOutTracks, mIOPtrs.nSliceOutTracks, mIOMem.sliceOutTracks, InOutPointerType::SLICE_OUT_TRACK);
491499
ReadData(fp, mIOPtrs.sliceOutClusters, mIOPtrs.nSliceOutClusters, mIOMem.sliceOutClusters, InOutPointerType::SLICE_OUT_CLUSTER);
@@ -780,12 +788,12 @@ int GPUChainTracking::RunTPCClusterizer()
780788
SetupGPUProcessor(&clusterer, false);
781789
memset((void*)clusterer.mPmemory, 0, sizeof(*clusterer.mPmemory));
782790
if (doGPU) {
783-
clusterer.mPmemory->nDigits = mIOPtrs.nTPCDigits[iSlice];
791+
clusterer.mPmemory->nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice];
784792
TransferMemoryResourceLinkToGPU(clusterer.mMemoryId, -1);
785-
mRec->GPUMemCpy(clustererShadow.mPdigits, mIOPtrs.tpcDigits[iSlice], sizeof(mIOPtrs.tpcDigits[iSlice][0]) * clusterer.mPmemory->nDigits, 0, true);
793+
mRec->GPUMemCpy(clustererShadow.mPdigits, mIOPtrs.tpcPackedDigits->tpcDigits[iSlice], sizeof(clustererShadow.mPdigits[0]) * clusterer.mPmemory->nDigits, 0, true);
786794
} else {
787-
clusterer.mPdigits = (gpucf::PackedDigit*)mIOPtrs.tpcDigits[iSlice]; // TODO: Needs fixing, double-allocated and invalid const cast
788-
clusterer.mPmemory->nDigits = mIOPtrs.nTPCDigits[iSlice];
795+
clusterer.mPdigits = (gpucf::PackedDigit*)mIOPtrs.tpcPackedDigits->tpcDigits[iSlice]; // TODO: Needs fixing, double-allocated and invalid const cast
796+
clusterer.mPmemory->nDigits = mIOPtrs.tpcPackedDigits->nTPCDigits[iSlice];
789797
}
790798
runKernel<GPUMemClean16>({BlockCount(), ThreadCount(), 0}, nullptr, krnlRunRangeNone, {}, clustererShadow.mPchargeMap, TPC_NUM_OF_PADS * TPC_MAX_TIME_PADDED * sizeof(*clustererShadow.mPchargeMap));
791799
runKernel<GPUMemClean16>({BlockCount(), ThreadCount(), 0}, nullptr, krnlRunRangeNone, {}, clustererShadow.mPpeakMap, TPC_NUM_OF_PADS * TPC_MAX_TIME_PADDED * sizeof(*clustererShadow.mPpeakMap));
@@ -1578,7 +1586,7 @@ int GPUChainTracking::RunChain()
15781586
return (1);
15791587
}
15801588

1581-
if (GetRecoSteps().isSet(RecoStep::TPCClusterFinding)) {
1589+
if (GetRecoSteps().isSet(RecoStep::TPCClusterFinding) && mIOPtrs.tpcPackedDigits) {
15821590
timerClusterer.Start();
15831591
RunTPCClusterizer();
15841592
timerClusterer.Stop();
@@ -1632,7 +1640,7 @@ int GPUChainTracking::RunChain()
16321640
if (GetDeviceProcessingSettings().runQA) {
16331641
printf("QA Time: %'d us\n", (int)(1000000 * timerQA.GetElapsedTime() / nCount));
16341642
}
1635-
if (mIOPtrs.tpcRaw) {
1643+
if (mIOPtrs.tpcPackedDigits) {
16361644
printf("TPC Clusterizer Time: %'d us\n", (int)(1000000 * timerClusterer.GetElapsedTime() / nCount));
16371645
}
16381646
if (mIOPtrs.clustersNative) {

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
8080
InOutMemory(InOutMemory&&);
8181
InOutMemory& operator=(InOutMemory&&);
8282

83+
std::unique_ptr<gpucf::PackedDigit[]> tpcDigits[NSLICES];
8384
std::unique_ptr<GPUTPCClusterData[]> clusterData[NSLICES];
8485
std::unique_ptr<AliHLTTPCRawCluster[]> rawClusters[NSLICES];
8586
std::unique_ptr<o2::tpc::ClusterNative[]> clustersNative;
@@ -188,6 +189,7 @@ class GPUChainTracking : public GPUChain, GPUReconstructionHelpers::helperDelega
188189

189190
// Ptr to reconstruction detector objects
190191
std::unique_ptr<o2::tpc::ClusterNativeAccess> mClusterNativeAccess; // Internal memory for clusterNativeAccess
192+
std::unique_ptr<o2::gpu::GPUTrackingInOutDigits> mDigitMap; // Internal memory for digit-map, if needed
191193
std::unique_ptr<TPCFastTransform> mTPCFastTransformU; // Global TPC fast transformation object
192194
std::unique_ptr<o2::base::MatLayerCylSet> mMatLUTU; // Material Lookup Table
193195
std::unique_ptr<o2::trd::TRDGeometryFlat> mTRDGeometryU; // TRD Geometry

GPU/GPUTracking/Interface/GPUO2Interface.cxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ int GPUTPCO2Interface::Initialize(const GPUO2InterfaceConfiguration& config)
4545
mChain = mRec->AddChain<GPUChainTracking>(mConfig->configInterface.maxTPCHits, mConfig->configInterface.maxTRDTracklets);
4646
mChain->mConfigDisplay = &mConfig->configDisplay;
4747
mChain->mConfigQA = &mConfig->configQA;
48+
if (mConfig->configWorkflow.inputs.isSet(GPUDataTypes::InOutType::TPCRaw)) {
49+
mConfig->configEvent.needsClusterer = 1;
50+
}
4851
mRec->SetSettings(&mConfig->configEvent, &mConfig->configReconstruction, &mConfig->configDeviceProcessing, &mConfig->configWorkflow);
4952
mChain->SetTPCFastTransform(mConfig->configCalib.fastTransform);
5053
mChain->SetMatLUT(mConfig->configCalib.matLUT);
@@ -74,8 +77,7 @@ int GPUTPCO2Interface::RunTracking(GPUTrackingInOutPointers* data)
7477
if (mDumpEvents) {
7578
mChain->ClearIOPointers();
7679
mChain->mIOPtrs.clustersNative = data->clustersNative;
77-
memcpy(mChain->mIOPtrs.tpcDigits, data->tpcDigits, sizeof(data->tpcDigits));
78-
mChain->mIOPtrs.tpcRaw = data->tpcRaw;
80+
mChain->mIOPtrs.tpcPackedDigits = data->tpcPackedDigits;
7981

8082
char fname[1024];
8183
sprintf(fname, "event.%d.dump", nEvent);

GPU/GPUTracking/Standalone/standalone.cxx

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,14 @@ int SetupReconstruction()
357357
if (configStandalone.configTF.bunchSim || configStandalone.configTF.nMerge) {
358358
steps.steps.setBits(GPUReconstruction::RecoStep::TRDTracking, false);
359359
}
360-
steps.steps.setBits(GPUReconstruction::RecoStep::TPCClusterFinding, false); // Disable cluster finding for now
361360
steps.inputs.set(GPUDataTypes::InOutType::TPCClusters, GPUDataTypes::InOutType::TRDTracklets);
361+
if (ev.needsClusterer) {
362+
steps.inputs.setBits(GPUDataTypes::InOutType::TPCRaw, true);
363+
steps.inputs.setBits(GPUDataTypes::InOutType::TPCClusters, false);
364+
} else {
365+
steps.steps.setBits(GPUReconstruction::RecoStep::TPCClusterFinding, false);
366+
}
367+
362368
steps.outputs.set(GPUDataTypes::InOutType::TPCSectorTracks);
363369
steps.outputs.setBits(GPUDataTypes::InOutType::TPCMergedTracks, steps.steps.isSet(GPUReconstruction::RecoStep::TPCMerging));
364370
steps.outputs.setBits(GPUDataTypes::InOutType::TPCCompressedClusters, steps.steps.isSet(GPUReconstruction::RecoStep::TPCCompression));

0 commit comments

Comments
 (0)