Skip to content

Commit 35cd61c

Browse files
CLOUDSTACK-6649: CS is not giving the system-wide capacity for GPU reosurce.
1 parent a605ca0 commit 35cd61c

13 files changed

Lines changed: 195 additions & 24 deletions

File tree

api/src/com/cloud/agent/api/VgpuTypesInfo.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
public class VgpuTypesInfo {
1919

2020
private String modelName;
21+
private String groupName;
2122
private Long maxHeads;
2223
private Long videoRam;
2324
private Long maxResolutionX;
@@ -30,6 +31,10 @@ public String getModelName() {
3031
return modelName;
3132
}
3233

34+
public String getGroupName() {
35+
return groupName;
36+
}
37+
3338
public Long getVideoRam() {
3439
return videoRam;
3540
}
@@ -66,8 +71,9 @@ public void setMaxVmCapacity(Long maxCapacity) {
6671
this.maxCapacity = maxCapacity;
6772
}
6873

69-
public VgpuTypesInfo(String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
74+
public VgpuTypesInfo(String groupName, String modelName, Long videoRam, Long maxHeads, Long maxResolutionX, Long maxResolutionY, Long maxVgpuPerGpu,
7075
Long remainingCapacity, Long maxCapacity) {
76+
this.groupName = groupName;
7177
this.modelName = modelName;
7278
this.videoRam = videoRam;
7379
this.maxHeads = maxHeads;

api/src/com/cloud/capacity/Capacity.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ public interface Capacity extends InternalIdentity, Identity {
3030
public static final short CAPACITY_TYPE_VLAN = 7;
3131
public static final short CAPACITY_TYPE_DIRECT_ATTACHED_PUBLIC_IP = 8;
3232
public static final short CAPACITY_TYPE_LOCAL_STORAGE = 9;
33+
public static final short CAPACITY_TYPE_GPU = 19;
3334

3435
public Long getHostOrPoolId();
3536

api/src/org/apache/cloudstack/api/response/HostResponse.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ public void setMemoryUsed(Long memoryUsed) {
328328
public void setGpuGroups(List<GpuResponse> gpuGroup) {
329329
this.gpuGroup = gpuGroup;
330330
}
331+
331332
public void setDiskSizeTotal(Long diskSizeTotal) {
332333
this.diskSizeTotal = diskSizeTotal;
333334
}

engine/schema/src/com/cloud/gpu/dao/HostGpuGroupsDao.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
public interface HostGpuGroupsDao extends GenericDao<HostGpuGroupsVO, Long> {
2525

2626
/**
27-
* Find host device by hostId and PCI ID
27+
* Find host device by hostId and groupName
2828
* @param hostId the host
2929
* @param groupName GPU group
3030
* @return HostGpuGroupsVO

engine/schema/src/com/cloud/gpu/dao/VGPUTypesDao.java

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,34 @@
2525

2626
public interface VGPUTypesDao extends GenericDao<VGPUTypesVO, Long> {
2727

28-
/**
29-
* Find VGPU types by group Id
30-
* @param groupId of the GPU group
31-
* @return list of VGPUTypesVO
32-
*/
33-
List<VGPUTypesVO> listByGroupId(long groupId);
28+
/**
29+
* List zonewide/podwide/clusterwide GPU card capacities.
30+
* @param zoneId
31+
* @param podId
32+
* @param clusterId
33+
* @return Custom Query result
34+
*/
35+
List<VgpuTypesInfo> listGPUCapacities(Long zoneId, Long podId, Long clusterId);
3436

35-
/**
36-
* Find VGPU type by group Id and VGPU type
37-
* @param groupId of the GPU group
38-
* @param vgpuType name of VGPU type
39-
* @return VGPUTypesVO
40-
*/
41-
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
37+
/**
38+
* Find VGPU types by group Id
39+
* @param groupId of the GPU group
40+
* @return list of VGPUTypesVO
41+
*/
42+
List<VGPUTypesVO> listByGroupId(long groupId);
4243

43-
/**
44-
* Save the list of enabled VGPU types
45-
* @param hostId the host
46-
* @param groupDetails with enabled VGPU types
47-
*/
48-
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
44+
/**
45+
* Find VGPU type by group Id and VGPU type
46+
* @param groupId of the GPU group
47+
* @param vgpuType name of VGPU type
48+
* @return VGPUTypesVO
49+
*/
50+
VGPUTypesVO findByGroupIdVGPUType(long groupId, String vgpuType);
51+
52+
/**
53+
* Save the list of enabled VGPU types
54+
* @param hostId the host
55+
* @param groupDetails with enabled VGPU types
56+
*/
57+
void persist(long hostId, HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails);
4958
}

engine/schema/src/com/cloud/gpu/dao/VGPUTypesDaoImpl.java

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
//under the License.
1717
package com.cloud.gpu.dao;
1818

19+
import java.sql.PreparedStatement;
20+
import java.sql.ResultSet;
21+
import java.sql.SQLException;
22+
import java.util.ArrayList;
1923
import java.util.HashMap;
2024
import java.util.Iterator;
2125
import java.util.List;
@@ -33,6 +37,8 @@
3337
import com.cloud.utils.db.GenericDaoBase;
3438
import com.cloud.utils.db.SearchBuilder;
3539
import com.cloud.utils.db.SearchCriteria;
40+
import com.cloud.utils.db.TransactionLegacy;
41+
import com.cloud.utils.exception.CloudRuntimeException;
3642

3743
@Component
3844
@Local(value = VGPUTypesDao.class)
@@ -41,11 +47,14 @@ public class VGPUTypesDaoImpl extends GenericDaoBase<VGPUTypesVO, Long> implemen
4147

4248
private final SearchBuilder<VGPUTypesVO> _searchByGroupId;
4349
private final SearchBuilder<VGPUTypesVO> _searchByGroupIdVGPUType;
44-
// private final SearchBuilder<VGPUTypesVO> _searchByHostId;
45-
// private final SearchBuilder<VGPUTypesVO> _searchForStaleEntries;
4650

4751
@Inject protected HostGpuGroupsDao _hostGpuGroupsDao;
4852

53+
private static final String LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES =
54+
"SELECT host_gpu_groups.group_name, vgpu_type, max_vgpu_per_pgpu, SUM(remaining_capacity) AS remaining_capacity, SUM(max_capacity) AS total_capacity FROM" +
55+
" `cloud`.`vgpu_types` INNER JOIN `cloud`.`host_gpu_groups` ON vgpu_types.gpu_group_id = host_gpu_groups.id INNER JOIN `cloud`.`host`" +
56+
" ON host_gpu_groups.host_id = host.id WHERE host.type = 'Routing' AND host.data_center_id = ?";
57+
4958
public VGPUTypesDaoImpl() {
5059

5160
_searchByGroupId = createSearchBuilder();
@@ -58,6 +67,47 @@ public VGPUTypesDaoImpl() {
5867
_searchByGroupIdVGPUType.done();
5968
}
6069

70+
@Override
71+
public List<VgpuTypesInfo> listGPUCapacities(Long dcId, Long podId, Long clusterId) {
72+
StringBuilder finalQuery = new StringBuilder();
73+
TransactionLegacy txn = TransactionLegacy.currentTxn();
74+
PreparedStatement pstmt = null;
75+
List<Long> resourceIdList = new ArrayList<Long>();
76+
ArrayList<VgpuTypesInfo> result = new ArrayList<VgpuTypesInfo>();
77+
78+
resourceIdList.add(dcId);
79+
finalQuery.append(LIST_ZONE_POD_CLUSTER_WIDE_GPU_CAPACITIES);
80+
81+
if (podId != null) {
82+
finalQuery.append(" AND host.pod_id = ?");
83+
resourceIdList.add(podId);
84+
}
85+
86+
if (clusterId != null) {
87+
finalQuery.append(" AND host.cluster_id = ?");
88+
resourceIdList.add(clusterId);
89+
}
90+
finalQuery.append(" GROUP BY host_gpu_groups.group_name, vgpu_type");
91+
92+
try {
93+
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
94+
for (int i = 0; i < resourceIdList.size(); i++) {
95+
pstmt.setLong(1 + i, resourceIdList.get(i));
96+
}
97+
ResultSet rs = pstmt.executeQuery();
98+
while (rs.next()) {
99+
100+
VgpuTypesInfo gpuCapacity = new VgpuTypesInfo(rs.getString(1), rs.getString(2), null, null, null, null, rs.getLong(3), rs.getLong(4), rs.getLong(5));
101+
result.add(gpuCapacity);
102+
}
103+
return result;
104+
} catch (SQLException e) {
105+
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
106+
} catch (Throwable e) {
107+
throw new CloudRuntimeException("Caught: " + finalQuery, e);
108+
}
109+
}
110+
61111
@Override
62112
public List<VGPUTypesVO> listByGroupId(long groupId) {
63113
SearchCriteria<VGPUTypesVO> sc = _searchByGroupId.create();

engine/schema/src/com/cloud/vm/dao/VMInstanceDao.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package com.cloud.vm.dao;
1818

1919
import java.util.Date;
20+
import java.util.HashMap;
2021
import java.util.List;
2122
import java.util.Map;
2223

@@ -133,4 +134,6 @@ public interface VMInstanceDao extends GenericDao<VMInstanceVO, Long>, StateDao<
133134
void resetVmPowerStateTracking(long instanceId);
134135

135136
void resetHostPowerStateTracking(long hostId);
137+
138+
HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId);
136139
}

engine/schema/src/com/cloud/vm/dao/VMInstanceDaoImpl.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
115115

116116
private static final String ORDER_HOSTS_NUMBER_OF_VMS_FOR_ACCOUNT_PART2 = " GROUP BY host.id ORDER BY 2 ASC ";
117117

118+
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES1 =
119+
"SELECT pci, type, SUM(vmcount) FROM (SELECT MAX(IF(offering.name = 'pciDevice',value,'')) AS pci, MAX(IF(offering.name = 'vgpuType', value,'')) " +
120+
"AS type, COUNT(DISTINCT vm.id) AS vmcount FROM service_offering_details offering INNER JOIN vm_instance vm ON offering.service_offering_id = vm.service_offering_id " +
121+
"INNER JOIN `cloud`.`host` ON vm.host_id = host.id WHERE vm.state = 'Running' AND host.data_center_id = ? ";
122+
private static final String COUNT_VMS_BASED_ON_VGPU_TYPES2 =
123+
"GROUP BY offering.service_offering_id) results GROUP BY pci, type";
124+
118125
@Inject
119126
protected HostDao _hostDao;
120127

@@ -640,6 +647,45 @@ public List<Long> listHostIdsByVmCount(long dcId, Long podId, Long clusterId, lo
640647
}
641648
}
642649

650+
@Override
651+
public HashMap<String, Long> countVgpuVMs(Long dcId, Long podId, Long clusterId) {
652+
StringBuilder finalQuery = new StringBuilder();
653+
TransactionLegacy txn = TransactionLegacy.currentTxn();
654+
PreparedStatement pstmt = null;
655+
List<Long> resourceIdList = new ArrayList<Long>();
656+
HashMap<String, Long> result = new HashMap<String, Long>();
657+
658+
resourceIdList.add(dcId);
659+
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES1);
660+
661+
if (podId != null) {
662+
finalQuery.append(" AND host.pod_id = ?");
663+
resourceIdList.add(podId);
664+
}
665+
666+
if (clusterId != null) {
667+
finalQuery.append(" AND host.cluster_id = ?");
668+
resourceIdList.add(clusterId);
669+
}
670+
finalQuery.append(COUNT_VMS_BASED_ON_VGPU_TYPES2);
671+
672+
try {
673+
pstmt = txn.prepareAutoCloseStatement(finalQuery.toString());
674+
for (int i = 0; i < resourceIdList.size(); i++) {
675+
pstmt.setLong(1 + i, resourceIdList.get(i));
676+
}
677+
ResultSet rs = pstmt.executeQuery();
678+
while (rs.next()) {
679+
result.put(rs.getString(1).concat(rs.getString(2)), rs.getLong(3));
680+
}
681+
return result;
682+
} catch (SQLException e) {
683+
throw new CloudRuntimeException("DB Exception on: " + finalQuery, e);
684+
} catch (Throwable e) {
685+
throw new CloudRuntimeException("Caught: " + finalQuery, e);
686+
}
687+
}
688+
643689
@Override
644690
public Long countRunningByAccount(long accountId) {
645691
SearchCriteria<Long> sc = CountRunningByAccount.create();

plugins/hypervisors/xen/src/com/cloud/hypervisor/xen/resource/XenServer620SP1Resource.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ protected HashMap<String, HashMap<String, VgpuTypesInfo>> getGPUGroupDetails(Con
121121
entry.setMaxVmCapacity(maxCapacity);
122122
gpuCapacity.put(record.modelName, entry);
123123
} else {
124-
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(record.modelName, record.framebufferSize, record.maxHeads,
124+
VgpuTypesInfo vgpuTypeRecord = new VgpuTypesInfo(null, record.modelName, record.framebufferSize, record.maxHeads,
125125
record.maxResolutionX, record.maxResolutionY, maxCapacity, remainingCapacity, maxCapacity);
126126
gpuCapacity.put(record.modelName, vgpuTypeRecord);
127127
}

server/src/com/cloud/api/ApiDBUtils.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.util.ArrayList;
2020
import java.util.EnumSet;
21+
import java.util.HashMap;
2122
import java.util.List;
2223
import java.util.ListIterator;
2324
import java.util.Map;
@@ -64,6 +65,7 @@
6465
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
6566
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
6667

68+
import com.cloud.agent.api.VgpuTypesInfo;
6769
import com.cloud.api.query.dao.AccountJoinDao;
6870
import com.cloud.api.query.dao.AffinityGroupJoinDao;
6971
import com.cloud.api.query.dao.AsyncJobJoinDao;
@@ -1101,6 +1103,14 @@ public static List<HostGpuGroupsVO> getGpuGroups(long hostId) {
11011103
return s_hostGpuGroupsDao.listByHostId(hostId);
11021104
}
11031105

1106+
public static List<VgpuTypesInfo> getGpuCapacites(Long zoneId, Long podId, Long clusterId) {
1107+
return s_vgpuTypesDao.listGPUCapacities(zoneId, podId, clusterId);
1108+
}
1109+
1110+
public static HashMap<String, Long> getVgpuVmsCount(Long zoneId, Long podId, Long clusterId) {
1111+
return s_vmDao.countVgpuVMs(zoneId, podId, clusterId);
1112+
}
1113+
11041114
public static List<VGPUTypesVO> getVgpus(long groupId) {
11051115
return s_vgpuTypesDao.listByGroupId(groupId);
11061116
}

0 commit comments

Comments
 (0)