Skip to content

Commit 2ae9da8

Browse files
CLOUDSTACK-6357: Not able to select GPU card in case of GPU-passthrough.
1 parent 09f83e4 commit 2ae9da8

9 files changed

Lines changed: 47 additions & 64 deletions

File tree

api/src/com/cloud/gpu/GPU.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@ public enum Keys {
2323
pciDevice,
2424
vgpuType
2525
}
26-
public enum Type {
27-
GPU_Passthrough,
28-
VGPU
29-
}
3026

3127
public enum vGPUType {
3228
GRID_K100("GRID K100"),

engine/components-api/src/com/cloud/resource/ResourceManager.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,26 +151,29 @@ public interface ResourceManager extends ResourceService {
151151
/**
152152
* Check if host has GPU devices available
153153
* @param hostId the host to be checked
154+
* @param groupName: gpuCard name
154155
* @param vgpuType the VGPU type
155156
* @return true when the host has the capacity with given VGPU type
156157
*/
157-
boolean isGPUDeviceAvailable(long hostId, String vgpuType);
158+
boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType);
158159

159160
/**
160161
* Get available GPU device
161162
* @param hostId the host to be checked
163+
* @param groupName: gpuCard name
162164
* @param vgpuType the VGPU type
163165
* @return GPUDeviceTO[]
164166
*/
165-
GPUDeviceTO getGPUDevice(long hostId, String vgpuType);
167+
GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType);
166168

167169
/**
168170
* Return listof available GPU devices
169171
* @param hostId, the host to be checked
172+
* @param groupName: gpuCard name
170173
* @param vgpuType the VGPU type
171174
* @return List of HostGpuGroupsVO.
172175
*/
173-
List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType);
176+
List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType);
174177

175178
/**
176179
* Update GPU device details (post VM deployment)

server/src/com/cloud/agent/manager/allocator/impl/FirstFitAllocator.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,12 @@ protected List<Host> allocateTo(DeploymentPlan plan, ServiceOffering offering, V
278278
}
279279

280280
// Check if GPU device is required by offering and host has the availability
281-
if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null
282-
&& !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
283-
s_logger.info("Host name: " + host.getName() + ", hostId: "+ host.getId() +" does not have required GPU devices available");
284-
continue;
281+
if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.vgpuType.toString())) != null) {
282+
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(serviceOfferingId, GPU.Keys.pciDevice.toString());
283+
if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
284+
s_logger.info("Host name: " + host.getName() + ", hostId: "+ host.getId() +" does not have required GPU devices available");
285+
continue;
286+
}
285287
}
286288

287289
int cpu_requested = offering.getCpu() * offering.getSpeed();

server/src/com/cloud/configuration/ConfigurationManagerImpl.java

Lines changed: 9 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2084,47 +2084,33 @@ protected ServiceOfferingVO createServiceOffering(long userId, boolean isSystem,
20842084
offering.setHypervisorSnapshotReserve(hypervisorSnapshotReserve);
20852085

20862086
List<ServiceOfferingDetailsVO> detailsVO = null;
2087-
if (details != null) {
2087+
if (details != null) {
20882088
// Check if the user has passed the gpu-type before passing the VGPU type
2089-
if (!details.containsKey(GPU.Keys.pciDevice.toString()) && details.containsKey(GPU.Keys.vgpuType.toString())) {
2090-
throw new InvalidParameterValueException("Please specify the gpu type");
2089+
if (!details.containsKey(GPU.Keys.pciDevice.toString()) || !details.containsKey(GPU.Keys.vgpuType.toString())) {
2090+
throw new InvalidParameterValueException("Please specify the pciDevice and vgpuType correctly.");
20912091
}
20922092
detailsVO = new ArrayList<ServiceOfferingDetailsVO>();
2093-
for (Entry<String, String> detailEntry : details.entrySet()) {
2093+
for (Entry<String, String> detailEntry : details.entrySet()) {
20942094
String value = null;
20952095
if (detailEntry.getKey().equals(GPU.Keys.pciDevice.toString())) {
2096-
for (GPU.Type type : GPU.Type.values()) {
2097-
if (detailEntry.getValue().equals(type.toString())) {
2098-
value = detailEntry.getValue();
2099-
}
2100-
}
2101-
if (value == null) {
2102-
throw new InvalidParameterValueException("Please specify valid gpu type");
2096+
if (detailEntry.getValue() == null) {
2097+
throw new InvalidParameterValueException("Please specify a GPU Card.");
21032098
}
21042099
}
21052100
if (detailEntry.getKey().equals(GPU.Keys.vgpuType.toString())) {
2106-
if (details.get(GPU.Keys.pciDevice.toString()).equals(GPU.Type.GPU_Passthrough.toString())) {
2107-
throw new InvalidParameterValueException("vgpuTypes are supported only with vGPU pciDevice");
2108-
}
21092101
if (detailEntry.getValue() == null) {
2110-
throw new InvalidParameterValueException("With vGPU as pciDevice, vGPUType value cannot be null");
2102+
throw new InvalidParameterValueException("vGPUType value cannot be null");
21112103
}
21122104
for (GPU.vGPUType entry : GPU.vGPUType.values()) {
21132105
if (detailEntry.getValue().equals(entry.getType())) {
21142106
value = entry.getType();
21152107
}
21162108
}
2117-
if (value == null || detailEntry.getValue().equals(GPU.vGPUType.passthrough.getType())) {
2109+
if (value == null) {
21182110
throw new InvalidParameterValueException("Please specify valid vGPU type");
21192111
}
21202112
}
2121-
detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(), detailEntry.getKey(), detailEntry.getValue(), true));
2122-
}
2123-
// If pciDevice type is passed, put the default VGPU type as 'passthrough'
2124-
if (details.containsKey(GPU.Keys.pciDevice.toString())
2125-
&& !details.containsKey(GPU.Keys.vgpuType.toString())) {
2126-
detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(),
2127-
GPU.Keys.vgpuType.toString(), GPU.vGPUType.passthrough.getType(), true));
2113+
detailsVO.add(new ServiceOfferingDetailsVO(offering.getId(), detailEntry.getKey(), detailEntry.getValue(), true));
21282114
}
21292115
}
21302116

server/src/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,9 +363,11 @@ public DeployDestination planDeployment(VirtualMachineProfile vmProfile, Deploym
363363
} else if (_capacityMgr.checkIfHostReachMaxGuestLimit(host)) {
364364
s_logger.debug("The last Host, hostId: " + host.getId() +
365365
" already has max Running VMs(count includes system VMs), skipping this and trying other available hosts");
366-
} else if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null
367-
&& !_resourceMgr.isGPUDeviceAvailable(host.getId(), offeringDetails.getValue())){
368-
s_logger.debug("The last host of this VM does not have required GPU devices available");
366+
} else if ((offeringDetails = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
367+
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
368+
if(!_resourceMgr.isGPUDeviceAvailable(host.getId(), groupName.getValue(), offeringDetails.getValue())){
369+
s_logger.debug("The last host of this VM does not have required GPU devices available");
370+
}
369371
} else {
370372
if (host.getStatus() == Status.Up && host.getResourceState() == ResourceState.Enabled) {
371373
boolean hostTagsMatch = true;

server/src/com/cloud/hypervisor/HypervisorGuruBase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ protected VirtualMachineTO toVirtualMachineTO(VirtualMachineProfile vmProfile) {
144144
// Set GPU details
145145
ServiceOfferingDetailsVO offeringDetail = null;
146146
if ((offeringDetail = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString())) != null) {
147-
to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), offeringDetail.getValue()));
147+
ServiceOfferingDetailsVO groupName = _serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.pciDevice.toString());
148+
to.setGpuDevice(_resourceMgr.getGPUDevice(vm.getHostId(), groupName.getValue(), offeringDetail.getValue()));
148149
}
149150

150151
// Workaround to make sure the TO has the UUID we need for Niciri integration

server/src/com/cloud/resource/ResourceManagerImpl.java

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@
9797
import com.cloud.exception.InvalidParameterValueException;
9898
import com.cloud.exception.PermissionDeniedException;
9999
import com.cloud.exception.ResourceInUseException;
100-
import com.cloud.gpu.GPU.vGPUType;
101100
import com.cloud.gpu.HostGpuGroupsVO;
102101
import com.cloud.gpu.VGPUTypesVO;
103102
import com.cloud.gpu.dao.HostGpuGroupsDao;
@@ -1349,6 +1348,7 @@ public boolean configure(String name, Map<String, Object> params) throws Configu
13491348

13501349
_gpuAvailability = _hostGpuGroupsDao.createSearchBuilder();
13511350
_gpuAvailability.and("hostId", _gpuAvailability.entity().getHostId(), Op.EQ);
1351+
_gpuAvailability.and("groupName", _gpuAvailability.entity().getGroupName(), Op.EQ);
13521352
SearchBuilder<VGPUTypesVO> join1 = _vgpuTypesDao.createSearchBuilder();
13531353
join1.and("vgpuType", join1.entity().getVgpuType(), Op.EQ);
13541354
join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GT);
@@ -2508,21 +2508,19 @@ public boolean isHostGpuEnabled(long hostId) {
25082508
}
25092509

25102510
@Override
2511-
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
2512-
if (vgpuType == null) {
2513-
vgpuType = vGPUType.passthrough.getType();
2514-
}
2511+
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
25152512
Filter searchFilter = new Filter(VGPUTypesVO.class, "remainingCapacity", false, null, null);
25162513
SearchCriteria<HostGpuGroupsVO> sc = _gpuAvailability.create();
25172514
sc.setParameters("hostId", hostId);
2515+
sc.setParameters("groupName", groupName);
25182516
sc.setJoinParameters("groupId", "vgpuType", vgpuType);
25192517
sc.setJoinParameters("groupId", "remainingCapacity", 0);
25202518
return _hostGpuGroupsDao.customSearch(sc, searchFilter);
25212519
}
25222520

25232521
@Override
2524-
public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
2525-
if(!listAvailableGPUDevice(hostId, vgpuType).isEmpty()) {
2522+
public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
2523+
if(!listAvailableGPUDevice(hostId, groupName, vgpuType).isEmpty()) {
25262524
return true;
25272525
} else {
25282526
if (s_logger.isDebugEnabled()) {
@@ -2533,8 +2531,8 @@ public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
25332531
}
25342532

25352533
@Override
2536-
public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
2537-
HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, vgpuType).get(0);
2534+
public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
2535+
HostGpuGroupsVO gpuDevice = listAvailableGPUDevice(hostId, groupName, vgpuType).get(0);
25382536
return new GPUDeviceTO(gpuDevice.getGroupName(), vgpuType, null);
25392537
}
25402538

server/test/com/cloud/resource/MockResourceManagerImpl.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -558,19 +558,19 @@ public boolean releaseHostReservation(Long hostId) {
558558
}
559559

560560
@Override
561-
public boolean isGPUDeviceAvailable(long hostId, String vgpuType) {
561+
public boolean isGPUDeviceAvailable(long hostId, String groupName, String vgpuType) {
562562
// TODO Auto-generated method stub
563563
return false;
564564
}
565565

566566
@Override
567-
public GPUDeviceTO getGPUDevice(long hostId, String vgpuType) {
567+
public GPUDeviceTO getGPUDevice(long hostId, String groupName, String vgpuType) {
568568
// TODO Auto-generated method stub
569569
return null;
570570
}
571571

572572
@Override
573-
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String vgpuType) {
573+
public List<HostGpuGroupsVO> listAvailableGPUDevice(long hostId, String groupName, String vgpuType) {
574574
// TODO Auto-generated method stub
575575
return null;
576576
}

ui/scripts/configuration.js

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -370,25 +370,16 @@
370370
description: ''
371371
});
372372
items.push({
373-
id: 'GPU_Passthrough',
374-
description: 'GPU-Passthrough'
373+
id: 'Group of NVIDIA Corporation GK107GL [GRID K1] GPUs',
374+
description: 'NVIDIA GRID K1'
375375
});
376376
items.push({
377-
id: 'VGPU',
378-
description: 'VGPU'
377+
id: 'Group of NVIDIA Corporation GK104GL [GRID K2] GPUs',
378+
description: 'NVIDIA GRID K2'
379379
});
380380
args.response.success({
381381
data: items
382382
});
383-
args.$select.change(function() {
384-
var $form = $(this).closest('form');
385-
var $fields = $form.find('.field');
386-
if (($(this).val() == "") || $(this).val() == "GPU-Passthrough") {
387-
$form.find('[rel=vgpuType]').hide();
388-
} else if ($(this).val() == "VGPU") {
389-
$form.find('[rel=vgpuType]').css('display', 'block');
390-
}
391-
});
392383
}
393384
},
394385

@@ -400,6 +391,10 @@
400391
id: '',
401392
description: ''
402393
});
394+
items.push({
395+
id: 'passthrough',
396+
description: 'passthrough'
397+
});
403398
items.push({
404399
id: 'GRID K100',
405400
description: 'GRID K100'
@@ -499,7 +494,7 @@
499494
array1.push("&serviceofferingdetails[1].value" + "=" + args.data.pciDevice);
500495
}
501496

502-
if (args.data.pciDevice == "VGPU") {
497+
if (args.data.vgpuType != "") {
503498
array1.push("&serviceofferingdetails[2].key" + "=" + "vgpuType");
504499
array1.push("&serviceofferingdetails[2].value" + "=" + args.data.vgpuType);
505500
}

0 commit comments

Comments
 (0)