Skip to content

Commit d910b4f

Browse files
CLOUDSTACK-6650: Reorder Cluster list in deployment planner to protect
GPU enabled hosts from non-GPU VM deployment. Cluster reordering is based on the number of unique host tags in a cluster, cluster with most number of unique host tags will put at the end of list. Hosts with GPU capability will get tagged with implicit tags defined by global config param 'implicit.host.tags' at the time os host discovery. Also added FirstFitPlannerTest unit test file.
1 parent 4a1018e commit d910b4f

11 files changed

Lines changed: 514 additions & 4 deletions

File tree

core/src/com/cloud/agent/api/StartupRoutingCommand.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919

2020
package com.cloud.agent.api;
2121

22+
import java.util.ArrayList;
2223
import java.util.HashMap;
24+
import java.util.List;
2325
import java.util.Map;
2426

2527
import com.cloud.host.Host;
@@ -39,6 +41,7 @@ public class StartupRoutingCommand extends StartupCommand {
3941
String pool;
4042
HypervisorType hypervisorType;
4143
Map<String, String> hostDetails; //stuff like host os, cpu capabilities
44+
List<String> hostTags = new ArrayList<String>();
4245
String hypervisorVersion;
4346
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
4447

@@ -162,6 +165,14 @@ public void setHypervisorVersion(String hypervisorVersion) {
162165
this.hypervisorVersion = hypervisorVersion;
163166
}
164167

168+
public List<String> getHostTags() {
169+
return hostTags;
170+
}
171+
172+
public void setHostTags(String hostTag) {
173+
this.hostTags.add(hostTag);
174+
}
175+
165176
public HashMap<String, HashMap<String, VgpuTypesInfo>> getGpuGroupDetails() {
166177
return groupDetails;
167178
}

engine/schema/src/com/cloud/host/dao/HostTagsDao.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,6 @@ public interface HostTagsDao extends GenericDao<HostTagVO, Long> {
2727

2828
List<String> gethostTags(long hostId);
2929

30+
List<String> getDistinctImplicitHostTags(List<Long> hostIds, String[] implicitHostTags);
31+
3032
}

engine/schema/src/com/cloud/host/dao/HostTagsDaoImpl.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,28 @@
2525

2626
import com.cloud.host.HostTagVO;
2727
import com.cloud.utils.db.GenericDaoBase;
28+
import com.cloud.utils.db.GenericSearchBuilder;
2829
import com.cloud.utils.db.SearchBuilder;
2930
import com.cloud.utils.db.SearchCriteria;
3031
import com.cloud.utils.db.TransactionLegacy;
32+
import com.cloud.utils.db.SearchCriteria.Func;
3133

3234
@Component
3335
@Local(value = HostTagsDao.class)
3436
public class HostTagsDaoImpl extends GenericDaoBase<HostTagVO, Long> implements HostTagsDao {
3537
protected final SearchBuilder<HostTagVO> HostSearch;
38+
protected final GenericSearchBuilder<HostTagVO, String> DistinctImplictTagsSearch;
3639

3740
public HostTagsDaoImpl() {
3841
HostSearch = createSearchBuilder();
3942
HostSearch.and("hostId", HostSearch.entity().getHostId(), SearchCriteria.Op.EQ);
4043
HostSearch.done();
44+
45+
DistinctImplictTagsSearch = createSearchBuilder(String.class);
46+
DistinctImplictTagsSearch.select(null, Func.DISTINCT, DistinctImplictTagsSearch.entity().getTag());
47+
DistinctImplictTagsSearch.and("hostIds", DistinctImplictTagsSearch.entity().getHostId(), SearchCriteria.Op.IN);
48+
DistinctImplictTagsSearch.and("implicitTags", DistinctImplictTagsSearch.entity().getTag(), SearchCriteria.Op.IN);
49+
DistinctImplictTagsSearch.done();
4150
}
4251

4352
@Override
@@ -54,6 +63,14 @@ public List<String> gethostTags(long hostId) {
5463
return hostTags;
5564
}
5665

66+
@Override
67+
public List<String> getDistinctImplicitHostTags(List<Long> hostIds, String[] implicitHostTags) {
68+
SearchCriteria<String> sc = DistinctImplictTagsSearch.create();
69+
sc.setParameters("hostIds", hostIds.toArray(new Object[hostIds.size()]));
70+
sc.setParameters("implicitTags", (Object[])implicitHostTags);
71+
return customSearch(sc, null);
72+
}
73+
5774
@Override
5875
public void persist(long hostId, List<String> hostTags) {
5976
TransactionLegacy txn = TransactionLegacy.currentTxn();

plugins/deployment-planners/implicit-dedication/test/org/apache/cloudstack/implicitplanner/ImplicitPlannerTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,10 @@
6969
import com.cloud.deploy.DeploymentPlanner.ExcludeList;
7070
import com.cloud.deploy.ImplicitDedicationPlanner;
7171
import com.cloud.exception.InsufficientServerCapacityException;
72+
import com.cloud.gpu.dao.HostGpuGroupsDao;
7273
import com.cloud.host.HostVO;
7374
import com.cloud.host.dao.HostDao;
75+
import com.cloud.host.dao.HostTagsDao;
7476
import com.cloud.resource.ResourceManager;
7577
import com.cloud.service.ServiceOfferingVO;
7678
import com.cloud.service.dao.ServiceOfferingDao;
@@ -467,6 +469,16 @@ public HostDao hostDao() {
467469
return Mockito.mock(HostDao.class);
468470
}
469471

472+
@Bean
473+
public HostTagsDao hostTagsDao() {
474+
return Mockito.mock(HostTagsDao.class);
475+
}
476+
477+
@Bean
478+
public HostGpuGroupsDao hostGpuGroupsDao() {
479+
return Mockito.mock(HostGpuGroupsDao.class);
480+
}
481+
470482
@Bean
471483
public DataCenterDao dcDao() {
472484
return Mockito.mock(DataCenterDao.class);

plugins/hypervisors/xenserver/src/com/cloud/hypervisor/xenserver/resource/XenServer620SP1Resource.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ protected void fillHostInfo(Connection conn, StartupRoutingCommand cmd) {
8484
try {
8585
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = getGPUGroupDetails(conn);
8686
cmd.setGpuGroupDetails(groupDetails);
87+
if (groupDetails != null && !groupDetails.isEmpty()) {
88+
cmd.setHostTags("GPU");
89+
}
8790
} catch (Exception e) {
8891
if (s_logger.isDebugEnabled()) {
8992
s_logger.debug("Error while getting GPU device info from host " + cmd.getName(), e);

server/src/com/cloud/configuration/Config.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,14 @@ public enum Config {
13601360
"false",
13611361
"Deploys a VM per zone to manage secondary storage if true, otherwise secondary storage is mounted on management server",
13621362
null),
1363+
ImplicitHostTags(
1364+
"Hidden",
1365+
ManagementServer.class,
1366+
String.class,
1367+
"implicit.host.tags",
1368+
"GPU",
1369+
"Tag hosts at the time of host disovery based on the host properties/capabilities",
1370+
null),
13631371
CreatePoolsInPod(
13641372
"Hidden",
13651373
ManagementServer.class,

server/src/com/cloud/deploy/FirstFitPlanner.java

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package com.cloud.deploy;
1818

1919
import java.util.ArrayList;
20+
import java.util.Collections;
21+
import java.util.Comparator;
2022
import java.util.HashMap;
2123
import java.util.List;
2224
import java.util.Map;
@@ -44,9 +46,14 @@
4446
import com.cloud.dc.dao.DataCenterDao;
4547
import com.cloud.dc.dao.HostPodDao;
4648
import com.cloud.exception.InsufficientServerCapacityException;
49+
import com.cloud.gpu.GPU;
50+
import com.cloud.gpu.dao.HostGpuGroupsDao;
51+
import com.cloud.host.Host;
4752
import com.cloud.host.dao.HostDao;
53+
import com.cloud.host.dao.HostTagsDao;
4854
import com.cloud.hypervisor.Hypervisor.HypervisorType;
4955
import com.cloud.offering.ServiceOffering;
56+
import com.cloud.service.dao.ServiceOfferingDetailsDao;
5057
import com.cloud.storage.StorageManager;
5158
import com.cloud.storage.dao.DiskOfferingDao;
5259
import com.cloud.storage.dao.GuestOSCategoryDao;
@@ -102,9 +109,16 @@ public class FirstFitPlanner extends PlannerBase implements DeploymentClusterPla
102109
DataStoreManager dataStoreMgr;
103110
@Inject
104111
protected ClusterDetailsDao _clusterDetailsDao;
112+
@Inject
113+
protected ServiceOfferingDetailsDao _serviceOfferingDetailsDao;
114+
@Inject
115+
protected HostGpuGroupsDao _hostGpuGroupsDao;
116+
@Inject
117+
protected HostTagsDao _hostTagsDao;
105118

106119
protected String _allocationAlgorithm = "random";
107120
protected String _globalDeploymentPlanner = "FirstFitPlanner";
121+
protected String[] _implicitHostTags;
108122

109123
@Override
110124
public List<Long> orderClusters(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoid) throws InsufficientServerCapacityException {
@@ -131,7 +145,6 @@ public List<Long> orderClusters(VirtualMachineProfile vmProfile, DeploymentPlan
131145
clusterList.add(clusterIdSpecified);
132146
removeClustersCrossingThreshold(clusterList, avoid, vmProfile, plan);
133147
}
134-
return clusterList;
135148
} else {
136149
s_logger.debug("The specified cluster cannot be found, returning.");
137150
avoid.addCluster(plan.getClusterId());
@@ -152,7 +165,6 @@ public List<Long> orderClusters(VirtualMachineProfile vmProfile, DeploymentPlan
152165
avoid.addPod(plan.getPodId());
153166
}
154167
}
155-
return clusterList;
156168
} else {
157169
s_logger.debug("The specified Pod cannot be found, returning.");
158170
avoid.addPod(plan.getPodId());
@@ -164,13 +176,39 @@ public List<Long> orderClusters(VirtualMachineProfile vmProfile, DeploymentPlan
164176
boolean applyAllocationAtPods = Boolean.parseBoolean(_configDao.getValue(Config.ApplyAllocationAlgorithmToPods.key()));
165177
if (applyAllocationAtPods) {
166178
//start scan at all pods under this zone.
167-
return scanPodsForDestination(vmProfile, plan, avoid);
179+
clusterList = scanPodsForDestination(vmProfile, plan, avoid);
168180
} else {
169181
//start scan at clusters under this zone.
170-
return scanClustersForDestinationInZoneOrPod(plan.getDataCenterId(), true, vmProfile, plan, avoid);
182+
clusterList = scanClustersForDestinationInZoneOrPod(plan.getDataCenterId(), true, vmProfile, plan, avoid);
183+
}
184+
}
185+
186+
if (clusterList != null && !clusterList.isEmpty()) {
187+
ServiceOffering offering = vmProfile.getServiceOffering();
188+
// In case of non-GPU VMs, protect GPU enabled Hosts and prefer VM deployment on non-GPU Hosts.
189+
if ((_serviceOfferingDetailsDao.findDetail(offering.getId(), GPU.Keys.vgpuType.toString()) == null) && !(_hostGpuGroupsDao.listHostIds().isEmpty())) {
190+
int requiredCpu = offering.getCpu() * offering.getSpeed();
191+
long requiredRam = offering.getRamSize() * 1024L * 1024L;
192+
reorderClustersBasedOnImplicitTags(clusterList, requiredCpu, requiredRam);
171193
}
172194
}
195+
return clusterList;
196+
}
173197

198+
private void reorderClustersBasedOnImplicitTags(List<Long> clusterList, int requiredCpu, long requiredRam) {
199+
final HashMap<Long, Long> UniqueTagsInClusterMap = new HashMap<Long, Long>();
200+
for (Long clusterId : clusterList) {
201+
List<Long> hostList = _capacityDao.listHostsWithEnoughCapacity(requiredCpu, requiredRam, clusterId, Host.Type.Routing.toString());
202+
UniqueTagsInClusterMap.put(clusterId, new Long(_hostTagsDao.getDistinctImplicitHostTags(hostList, _implicitHostTags).size()));
203+
}
204+
Collections.sort(clusterList, new Comparator<Long>() {
205+
@Override
206+
public int compare(Long o1, Long o2) {
207+
Long t1 = UniqueTagsInClusterMap.get(o1);
208+
Long t2 = UniqueTagsInClusterMap.get(o2);
209+
return t1.compareTo(t2);
210+
}
211+
});
174212
}
175213

176214
private List<Long> scanPodsForDestination(VirtualMachineProfile vmProfile, DeploymentPlan plan, ExcludeList avoid) {
@@ -504,6 +542,10 @@ public boolean configure(String name, Map<String, Object> params) throws Configu
504542
super.configure(name, params);
505543
_allocationAlgorithm = _configDao.getValue(Config.VmAllocationAlgorithm.key());
506544
_globalDeploymentPlanner = _configDao.getValue(Config.VmDeploymentPlanner.key());
545+
String configValue;
546+
if ((configValue = _configDao.getValue(Config.ImplicitHostTags.key())) != null) {
547+
_implicitHostTags = configValue.trim().split("\\s*,\\s*");
548+
}
507549
return true;
508550
}
509551

server/src/com/cloud/resource/ResourceManagerImpl.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,22 @@ protected HostVO createHostVO(StartupCommand[] cmds, ServerResource resource, Ma
16321632
}
16331633
}
16341634

1635+
if (startup instanceof StartupRoutingCommand) {
1636+
StartupRoutingCommand ssCmd = ((StartupRoutingCommand)startup);
1637+
List<String> implicitHostTags = ssCmd.getHostTags();
1638+
if (!implicitHostTags.isEmpty()) {
1639+
if (hostTags == null) {
1640+
hostTags = _hostTagsDao.gethostTags(host.getId());
1641+
}
1642+
if (hostTags != null) {
1643+
implicitHostTags.removeAll(hostTags);
1644+
hostTags.addAll(implicitHostTags);
1645+
} else {
1646+
hostTags = implicitHostTags;
1647+
}
1648+
}
1649+
}
1650+
16351651
host.setDataCenterId(dc.getId());
16361652
host.setPodId(podId);
16371653
host.setClusterId(clusterId);

server/test/com/cloud/vm/DeploymentPlanningManagerImplTest.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@
7676
import com.cloud.deploy.dao.PlannerHostReservationDao;
7777
import com.cloud.exception.AffinityConflictException;
7878
import com.cloud.exception.InsufficientServerCapacityException;
79+
import com.cloud.gpu.dao.HostGpuGroupsDao;
7980
import com.cloud.host.dao.HostDao;
81+
import com.cloud.host.dao.HostTagsDao;
8082
import com.cloud.hypervisor.Hypervisor.HypervisorType;
8183
import com.cloud.resource.ResourceManager;
8284
import com.cloud.service.ServiceOfferingVO;
@@ -239,6 +241,11 @@ public VirtualMachineProfileImpl virtualMachineProfileImpl() {
239241
return Mockito.mock(VirtualMachineProfileImpl.class);
240242
}
241243

244+
@Bean
245+
public HostTagsDao hostTagsDao() {
246+
return Mockito.mock(HostTagsDao.class);
247+
}
248+
242249
@Bean
243250
public ClusterDetailsDao clusterDetailsDao() {
244251
return Mockito.mock(ClusterDetailsDao.class);
@@ -389,6 +396,11 @@ public AffinityGroupService affinityGroupService() {
389396
return Mockito.mock(AffinityGroupService.class);
390397
}
391398

399+
@Bean
400+
public HostGpuGroupsDao hostGpuGroupsDap() {
401+
return Mockito.mock(HostGpuGroupsDao.class);
402+
}
403+
392404
public static class Library implements TypeFilter {
393405

394406
@Override

0 commit comments

Comments
 (0)