Skip to content

Commit b101dc7

Browse files
Edison XuAlena Prokharchyk
authored andcommitted
KVM agent connet:
* send StartupAnswer right after StartupCommand is recieved * if post processor going wrong, send out readycommand with error message to agent, then agent will exit
1 parent c10eeb6 commit b101dc7

3 files changed

Lines changed: 86 additions & 86 deletions

File tree

agent/src/com/cloud/agent/Agent.java

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import java.util.Map;
2828
import java.util.Timer;
2929
import java.util.TimerTask;
30-
3130
import java.util.concurrent.ExecutorService;
3231
import java.util.concurrent.LinkedBlockingQueue;
3332
import java.util.concurrent.SynchronousQueue;
@@ -48,6 +47,7 @@
4847
import com.cloud.agent.api.MaintainCommand;
4948
import com.cloud.agent.api.ModifySshKeysCommand;
5049
import com.cloud.agent.api.PingCommand;
50+
import com.cloud.agent.api.ReadyCommand;
5151
import com.cloud.agent.api.ShutdownCommand;
5252
import com.cloud.agent.api.StartupAnswer;
5353
import com.cloud.agent.api.StartupCommand;
@@ -491,6 +491,10 @@ protected void processRequest(final Request request, final Link link) {
491491
cancelTasks();
492492
_reconnectAllowed = false;
493493
answer = new Answer(cmd, true, null);
494+
} else if (cmd instanceof ReadyCommand && ((ReadyCommand)cmd).getDetails() != null) {
495+
s_logger.debug("Not ready to connect to mgt server: " + ((ReadyCommand)cmd).getDetails());
496+
System.exit(1);
497+
return;
494498
} else if (cmd instanceof MaintainCommand) {
495499
s_logger.debug("Received maintainCommand" );
496500
cancelTasks();
@@ -513,6 +517,9 @@ protected void processRequest(final Request request, final Link link) {
513517
}
514518

515519
} else {
520+
if (cmd instanceof ReadyCommand) {
521+
processReadyCommand((ReadyCommand)cmd);
522+
}
516523
_inProgress.incrementAndGet();
517524
try {
518525
answer = _resource.executeRequest(cmd);
@@ -576,6 +583,19 @@ public void processResponse(final Response response, final Link link) {
576583
setLastPingResponseTime();
577584
}
578585
}
586+
587+
588+
public void processReadyCommand(Command cmd) {
589+
590+
final ReadyCommand ready = (ReadyCommand) cmd;
591+
592+
s_logger.info("Proccess agent ready command, agent id = " + ready.getHostId());
593+
if (ready.getHostId() != null) {
594+
setId(ready.getHostId());
595+
}
596+
s_logger.info("Ready command is processed: agent id = " + getId());
597+
598+
}
579599

580600
public void processOtherTask(Task task) {
581601
final Object obj = task.get();
@@ -601,6 +621,7 @@ public void processOtherTask(Task task) {
601621
} catch (final ClosedChannelException e) {
602622
s_logger.warn("Unable to send request: " + request.toString());
603623
}
624+
604625
} else if (obj instanceof Request) {
605626
final Request req = (Request) obj;
606627
final Command command = req.getCommand();

api/src/com/cloud/agent/api/ReadyCommand.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@ public ReadyCommand() {
2323
}
2424

2525
private Long dcId;
26+
private Long hostId;
2627

2728
public ReadyCommand(Long dcId) {
2829
super();
2930
this.dcId = dcId;
3031
}
3132

33+
public ReadyCommand(Long dcId, Long hostId) {
34+
this(dcId);
35+
this.hostId = hostId;
36+
}
37+
3238
public void setDetails(String details) {
3339
_details = details;
3440
}
@@ -46,4 +52,7 @@ public boolean executeInSequence() {
4652
return true;
4753
}
4854

55+
public Long getHostId() {
56+
return hostId;
57+
}
4958
}

server/src/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 55 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final St
634634
}
635635

636636
Long dcId = host.getDataCenterId();
637-
ReadyCommand ready = new ReadyCommand(dcId);
637+
ReadyCommand ready = new ReadyCommand(dcId, host.getId());
638638
Answer answer = easySend(hostId, ready);
639639
if (answer == null || !answer.getResult()) {
640640
// this is tricky part for secondary storage
@@ -1096,91 +1096,37 @@ protected AgentAttache createAttacheForConnect(HostVO host, Link link) throws Co
10961096
return attache;
10971097
}
10981098

1099-
//TODO: handle mycloud specific
11001099
private AgentAttache handleConnectedAgent(final Link link, final StartupCommand[] startup, Request request) {
11011100
AgentAttache attache = null;
1102-
StartupAnswer[] answers = new StartupAnswer[startup.length];
1103-
try {
1104-
HostVO host = _resourceMgr.createHostVOForConnectedAgent(startup);
1101+
ReadyCommand ready = null;
1102+
try {
1103+
HostVO host = _resourceMgr.createHostVOForConnectedAgent(startup);
11051104
if (host != null) {
1105+
ready = new ReadyCommand(host.getDataCenterId(), host.getId());
11061106
attache = createAttacheForConnect(host, link);
1107+
attache = notifyMonitorsOfConnection(attache, startup, false);
11071108
}
1108-
Command cmd;
1109-
for (int i = 0; i < startup.length; i++) {
1110-
cmd = startup[i];
1111-
if ((cmd instanceof StartupRoutingCommand) || (cmd instanceof StartupProxyCommand) || (cmd instanceof StartupSecondaryStorageCommand) || (cmd instanceof StartupStorageCommand)) {
1112-
answers[i] = new StartupAnswer(startup[i], attache.getId(), getPingInterval());
1113-
break;
1114-
}
1115-
}
1116-
}catch (ConnectionException e) {
1117-
Command cmd;
1118-
for (int i = 0; i < startup.length; i++) {
1119-
cmd = startup[i];
1120-
if ((cmd instanceof StartupRoutingCommand) || (cmd instanceof StartupProxyCommand) || (cmd instanceof StartupSecondaryStorageCommand) || (cmd instanceof StartupStorageCommand)) {
1121-
answers[i] = new StartupAnswer(startup[i], e.toString());
1122-
break;
1123-
}
1124-
}
1125-
} catch (IllegalArgumentException e) {
1126-
Command cmd;
1127-
for (int i = 0; i < startup.length; i++) {
1128-
cmd = startup[i];
1129-
if ((cmd instanceof StartupRoutingCommand) || (cmd instanceof StartupProxyCommand) || (cmd instanceof StartupSecondaryStorageCommand) || (cmd instanceof StartupStorageCommand)) {
1130-
answers[i] = new StartupAnswer(startup[i], e.toString());
1131-
break;
1132-
}
1133-
}
1134-
} catch (CloudRuntimeException e) {
1135-
Command cmd;
1136-
for (int i = 0; i < startup.length; i++) {
1137-
cmd = startup[i];
1138-
if ((cmd instanceof StartupRoutingCommand) || (cmd instanceof StartupProxyCommand) || (cmd instanceof StartupSecondaryStorageCommand) || (cmd instanceof StartupStorageCommand)) {
1139-
answers[i] = new StartupAnswer(startup[i], e.toString());
1140-
break;
1141-
}
1142-
}
1143-
}
1144-
1145-
Response response = null;
1146-
if (attache != null) {
1147-
response = new Response(request, answers[0], _nodeId, attache.getId());
1148-
} else {
1149-
response = new Response(request, answers[0], _nodeId, -1);
1150-
}
1151-
1152-
try {
1153-
link.send(response.toBytes());
1154-
} catch (ClosedChannelException e) {
1155-
s_logger.debug("Failed to send startupanswer: " + e.toString());
1156-
return null;
1157-
}
1158-
if (attache == null) {
1159-
return null;
1109+
} catch (Exception e) {
1110+
s_logger.debug("Failed to handle host connection: " + e.toString());
1111+
ready = new ReadyCommand(null);
1112+
ready.setDetails(e.toString());
1113+
} finally {
1114+
if (ready == null) {
1115+
ready = new ReadyCommand(null);
1116+
}
11601117
}
11611118

11621119
try {
1163-
attache = notifyMonitorsOfConnection(attache, startup, false);
1164-
return attache;
1165-
} catch (ConnectionException e) {
1166-
ReadyCommand ready = new ReadyCommand(null);
1167-
ready.setDetails(e.toString());
1168-
try {
1169-
easySend(attache.getId(), ready);
1170-
} catch (Exception e1) {
1171-
s_logger.debug("Failed to send readycommand, due to " + e.toString());
1172-
}
1173-
return null;
1174-
} catch (CloudRuntimeException e) {
1175-
ReadyCommand ready = new ReadyCommand(null);
1176-
ready.setDetails(e.toString());
1177-
try {
1120+
if (attache == null) {
1121+
final Request readyRequest = new Request(-1, -1, ready, false);
1122+
link.send(readyRequest.getBytes());
1123+
} else {
11781124
easySend(attache.getId(), ready);
1179-
} catch (Exception e1) {
1180-
s_logger.debug("Failed to send readycommand, due to " + e.toString());
11811125
}
1182-
return null;
1126+
} catch (Exception e) {
1127+
s_logger.debug("Failed to send ready command:" + e.toString());
11831128
}
1129+
return attache;
11841130
}
11851131

11861132
protected class SimulateStartTask implements Runnable {
@@ -1233,6 +1179,7 @@ public void run() {
12331179
for (int i = 0; i < _cmds.length; i++) {
12341180
startups[i] = (StartupCommand) _cmds[i];
12351181
}
1182+
12361183
AgentAttache attache = handleConnectedAgent(_link, startups, _request);
12371184
if (attache == null) {
12381185
s_logger.warn("Unable to create attache for agent: " + _request);
@@ -1241,6 +1188,23 @@ public void run() {
12411188
}
12421189

12431190
protected void connectAgent(Link link, final Command[] cmds, final Request request) {
1191+
//send startupanswer to agent in the very beginning, so agent can move on without waiting for the answer for an undetermined time, if we put this logic into another thread pool.
1192+
StartupAnswer[] answers = new StartupAnswer[cmds.length];
1193+
Command cmd;
1194+
for (int i = 0; i < cmds.length; i++) {
1195+
cmd = cmds[i];
1196+
if ((cmd instanceof StartupRoutingCommand) || (cmd instanceof StartupProxyCommand) || (cmd instanceof StartupSecondaryStorageCommand) || (cmd instanceof StartupStorageCommand)) {
1197+
answers[i] = new StartupAnswer((StartupCommand)cmds[i], 0, getPingInterval());
1198+
break;
1199+
}
1200+
}
1201+
Response response = null;
1202+
response = new Response(request, answers[0], _nodeId, -1);
1203+
try {
1204+
link.send(response.toBytes());
1205+
} catch (ClosedChannelException e) {
1206+
s_logger.debug("Failed to send startupanswer: " + e.toString());
1207+
}
12441208
_connectExecutor.execute(new HandleAgentConnectTask(link, cmds, request));
12451209
}
12461210

@@ -1327,17 +1291,23 @@ protected void processRequest(final Link link, final Request request) {
13271291
if (cmd instanceof PingRoutingCommand) {
13281292
boolean gatewayAccessible = ((PingRoutingCommand) cmd).isGatewayAccessible();
13291293
HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
1330-
if (!gatewayAccessible) {
1331-
// alert that host lost connection to
1332-
// gateway (cannot ping the default route)
1333-
DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
1334-
HostPodVO podVO = _podDao.findById(host.getPodId());
1335-
String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
1336-
1337-
_alertMgr.sendAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc
1338-
+ "] lost connection to gateway (default route) and is possibly having network connection issues.");
1294+
1295+
if (host != null) {
1296+
if (!gatewayAccessible) {
1297+
// alert that host lost connection to
1298+
// gateway (cannot ping the default route)
1299+
DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
1300+
HostPodVO podVO = _podDao.findById(host.getPodId());
1301+
String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
1302+
1303+
_alertMgr.sendAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc
1304+
+ "] lost connection to gateway (default route) and is possibly having network connection issues.");
1305+
} else {
1306+
_alertMgr.clearAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId());
1307+
}
13391308
} else {
1340-
_alertMgr.clearAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId());
1309+
s_logger.debug("Not processing " + PingRoutingCommand.class.getSimpleName() +
1310+
" for agent id=" + cmdHostId + "; can't find the host in the DB");
13411311
}
13421312
}
13431313
answer = new PingAnswer((PingCommand) cmd);

0 commit comments

Comments
 (0)