Skip to content

Commit e82ac5f

Browse files
committed
GPU: Use auto-detected GPU memory size for GPU allocation when possible by default in standalone test
1 parent 5e68b74 commit e82ac5f

7 files changed

Lines changed: 19 additions & 7 deletions

File tree

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,9 @@ int GPUReconstruction::Init()
141141
mDeviceMemorySize += memGpu;
142142
mHostMemorySize += memHost;
143143
}
144-
if (mDeviceProcessingSettings.forceMemoryPoolSize) {
144+
if (mDeviceProcessingSettings.forceMemoryPoolSize && mDeviceProcessingSettings.forceMemoryPoolSize <= 2 && CanQueryMaxMemory()) {
145+
mDeviceMemorySize = mDeviceProcessingSettings.forceMemoryPoolSize;
146+
} else if (mDeviceProcessingSettings.forceMemoryPoolSize > 2) {
145147
mDeviceMemorySize = mHostMemorySize = mDeviceProcessingSettings.forceMemoryPoolSize;
146148
}
147149

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ class GPUReconstruction
257257

258258
// Others
259259
virtual RecoStepField AvailableRecoSteps() { return RecoStep::AllRecoSteps; }
260+
virtual bool CanQueryMaxMemory() { return false; }
260261

261262
// Pointers to tracker classes
262263
GPUConstantMem* processors() { return mHostConstantMem.get(); }

GPU/GPUTracking/Base/GPUSettings.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ struct GPUSettingsDeviceProcessing {
140140
int nStreams; // Number of parallel GPU streams
141141
bool trackletConstructorInPipeline; // Run tracklet constructor in pileline like the preceeding tasks instead of as one big block
142142
bool trackletSelectorInPipeline; // Run tracklet selector in pipeline, requres also tracklet constructor in pipeline
143-
size_t forceMemoryPoolSize; // Override size of memory pool to be allocated on GPU / Host
143+
size_t forceMemoryPoolSize; // Override size of memory pool to be allocated on GPU / Host (set =1 to force allocating all device memory, if supported)
144144
int nTPCClustererLanes; // Number of TPC clusterers that can run in parallel
145145
bool deviceTimers; // Use device timers instead of host-based timers
146146
};

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ int GPUReconstructionCUDABackend::InitDevice_Runtime()
156156
const int reqVerMaj = 2;
157157
const int reqVerMin = 0;
158158
std::vector<bool> devicesOK(count, false);
159+
std::vector<size_t> devMemory(count, 0);
159160
bool contextCreated = false;
160161
for (int i = 0; i < count; i++) {
161162
if (mDeviceProcessingSettings.debugLevel >= 4) {
@@ -200,7 +201,7 @@ int GPUReconstructionCUDABackend::InitDevice_Runtime()
200201
} else if (cudaDeviceProp.major < reqVerMaj || (cudaDeviceProp.major == reqVerMaj && cudaDeviceProp.minor < reqVerMin)) {
201202
deviceOK = false;
202203
deviceFailure = "Too low device revision";
203-
} else if (free < mDeviceMemorySize) {
204+
} else if (free < std::max(mDeviceMemorySize, (size_t)512 * 1024 * 1024)) {
204205
deviceOK = false;
205206
deviceFailure = "Insufficient GPU memory";
206207
}
@@ -213,6 +214,7 @@ int GPUReconstructionCUDABackend::InitDevice_Runtime()
213214
continue;
214215
}
215216
devicesOK[i] = true;
217+
devMemory[i] = free;
216218
if (deviceSpeed > bestDeviceSpeed) {
217219
bestDevice = i;
218220
bestDeviceSpeed = deviceSpeed;
@@ -226,7 +228,7 @@ int GPUReconstructionCUDABackend::InitDevice_Runtime()
226228
bool noDevice = false;
227229
if (bestDevice == -1) {
228230
GPUWarning("No %sCUDA Device available, aborting CUDA Initialisation", count ? "appropriate " : "");
229-
GPUImportant("Requiring Revision %d.%d, Mem: %lld", reqVerMaj, reqVerMin, (long long int)mDeviceMemorySize);
231+
GPUImportant("Requiring Revision %d.%d, Mem: %lld", reqVerMaj, reqVerMin, (long long int)std::max(mDeviceMemorySize, (size_t)512 * 1024 * 1024));
230232
noDevice = true;
231233
} else if (mDeviceProcessingSettings.deviceNum > -1) {
232234
if (mDeviceProcessingSettings.deviceNum >= (signed)count) {
@@ -303,6 +305,12 @@ int GPUReconstructionCUDABackend::InitDevice_Runtime()
303305
return (1);
304306
}
305307

308+
if (mDeviceMemorySize == 2) {
309+
mDeviceMemorySize = devMemory[mDeviceId] * 2 / 3; // Leave 1/3 of GPU memory for event display
310+
} else if (mDeviceMemorySize == 1) {
311+
mDeviceMemorySize = devMemory[mDeviceId] - 512 * 1024 * 1024; // Take all GPU memory but 1/2 GB
312+
}
313+
306314
if (mDeviceMemorySize > cudaDeviceProp.totalGlobalMem || GPUFailedMsgI(cudaMalloc(&mDeviceMemoryBase, mDeviceMemorySize))) {
307315
GPUError("CUDA Memory Allocation Error");
308316
GPUFailedMsgI(cudaDeviceReset());

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
5050
GPUReconstructionCUDAInternals* mContext = nullptr;
5151
};
5252

53-
virtual std::unique_ptr<GPUThreadContext> GetThreadContext() override;
53+
std::unique_ptr<GPUThreadContext> GetThreadContext() override;
54+
bool CanQueryMaxMemory() { return true; }
5455
void SynchronizeGPU() override;
5556
int GPUDebug(const char* state = "UNKNOWN", int stream = -1) override;
5657
void SynchronizeStream(int stream) override;

GPU/GPUTracking/Standalone/qconfigoptions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ AddOptionSet(nways, int, 1, "1Way", 0, "Use 3-way track-fit")
136136
AddOption(nwaysouter, bool, false, "OuterParam", 0, "Create OuterParam")
137137
AddOption(dzdr, float, 2.5f, "DzDr", 0, "Use dZ/dR search window instead of vertex window")
138138
AddOption(cont, bool, false, "continuous", 0, "Process continuous timeframe data")
139-
AddOption(forceMemorySize, unsigned long long int, 0, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ull))
139+
AddOption(forceMemorySize, unsigned long long int, 1, "memSize", 0, "Force size of allocated GPU / page locked host memory", min(0ull))
140140
AddOption(outputcontrolmem, unsigned long long int, 0, "outputMemory", 0, "Use predefined output buffer of this size", min(0ull), message("Using %lld bytes as output memory"))
141141
AddOption(affinity, int, -1, "cpuAffinity", 0, "Pin CPU affinity to this CPU core", min(-1), message("Setting affinity to restrict on CPU %d"))
142142
AddOption(fifo, bool, false, "fifoScheduler", 0, "Use FIFO realtime scheduler", message("Setting FIFO scheduler: %s"))

GPU/GPUTracking/Standalone/standalone.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ int SetupReconstruction()
293293
devProc.nThreads = configStandalone.OMPThreads;
294294
}
295295
devProc.deviceNum = configStandalone.cudaDevice;
296-
devProc.forceMemoryPoolSize = configStandalone.forceMemorySize;
296+
devProc.forceMemoryPoolSize = (configStandalone.forceMemorySize == 1 && configStandalone.eventDisplay) ? 2 : configStandalone.forceMemorySize;
297297
devProc.debugLevel = configStandalone.DebugLevel;
298298
devProc.deviceTimers = configStandalone.DeviceTiming;
299299
devProc.runQA = configStandalone.qa;

0 commit comments

Comments
 (0)