Skip to content

Commit 8da2462

Browse files
authored
CLOUDSTACK-10333: Secure Live VM Migration for KVM (#2505)
This extends securing of KVM hosts to securing of libvirt on KVM host as well for TLS enabled live VM migration. To simplify implementation securing of host implies that both host and libvirtd processes are secured with management server's CA plugin issued certificates. Based on whether keystore and certificates files are available at /etc/cloudstack/agent, the KVM agent determines whether to use TLS or TCP based uris for live VM migration. It is also enforced that a secured host will allow live VM migration to/from other secured host, and an unsecured hosts will allow live VM migration to/from other unsecured host only. Post upgrade the KVM agent on startup will expose its security state (secured detail is sent as true or false) to the managements server that gets saved in host_details for the host. This host detail can be accesed via the listHosts response, and in the UI unsecured KVM hosts will show up with the host state of ‘unsecured’. Further, a button has been added that allows admins to provision/renew certificates to KVM hosts and can be used to secure any unsecured KVM host. The `cloudstack-setup-agent` was modified to accept a new flag `-s` which will reconfigure libvirtd with following settings: listen_tcp=0 listen_tls=1 tcp_port="16509" tls_port="16514" auth_tcp="none" auth_tls="none" key_file = "/etc/pki/libvirt/private/serverkey.pem" cert_file = "/etc/pki/libvirt/servercert.pem" ca_file = "/etc/pki/CA/cacert.pem" For a connected KVM host agent, when the certificate are renewed/provisioned a background task is scheduled that waits until all of the agent tasks finish after which libvirt process is restarted and finally the agent is restarted via AgentShell. There are no API or DB changes. Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
1 parent 9288c64 commit 8da2462

File tree

27 files changed

+775
-109
lines changed

27 files changed

+775
-109
lines changed

agent/bindir/cloud-setup-agent.in

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ from cloudutils.configFileOps import configFileOps
2626
from cloudutils.globalEnv import globalEnv
2727
from cloudutils.networkConfig import networkConfig
2828
from cloudutils.syscfg import sysConfigFactory
29+
from cloudutils.serviceConfig import configureLibvirtConfig
2930

3031
from optparse import OptionParser
3132

@@ -100,6 +101,7 @@ if __name__ == '__main__':
100101
parser.add_option("-c", "--cluster", dest="cluster", help="cluster id")
101102
parser.add_option("-t", "--hypervisor", default="kvm", dest="hypervisor", help="hypervisor type")
102103
parser.add_option("-g", "--guid", dest="guid", help="guid")
104+
parser.add_option("-s", action="store_true", default=False, dest="secure", help="Secure and enable TLS for libvirtd")
103105
parser.add_option("--pubNic", dest="pubNic", help="Public traffic interface")
104106
parser.add_option("--prvNic", dest="prvNic", help="Private traffic interface")
105107
parser.add_option("--guestNic", dest="guestNic", help="Guest traffic interface")
@@ -110,6 +112,12 @@ if __name__ == '__main__':
110112
glbEnv.bridgeType = bridgeType
111113

112114
(options, args) = parser.parse_args()
115+
116+
if not options.auto and options.secure:
117+
configureLibvirtConfig(True)
118+
print "Libvirtd with TLS configured"
119+
sys.exit(0)
120+
113121
if options.auto is None:
114122
userInputs = getUserInputs()
115123
glbEnv.mgtSvr = userInputs[0]
@@ -138,7 +146,9 @@ if __name__ == '__main__':
138146
glbEnv.nics.append(options.prvNic)
139147
glbEnv.nics.append(options.pubNic)
140148
glbEnv.nics.append(options.guestNic)
141-
149+
150+
glbEnv.secure = options.secure
151+
142152
print "Starting to configure your system:"
143153
syscfg = sysConfigFactory.getSysConfigFactory(glbEnv)
144154
try:

agent/src/com/cloud/agent/Agent.java

Lines changed: 137 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.cloudstack.agent.directdownload.SetupDirectDownloadCertificate;
4343
import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
4444
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
45+
import org.apache.cloudstack.ca.PostCertificateRenewalCommand;
4546
import org.apache.cloudstack.ca.SetupCertificateAnswer;
4647
import org.apache.cloudstack.ca.SetupCertificateCommand;
4748
import org.apache.cloudstack.ca.SetupKeyStoreCommand;
@@ -68,6 +69,7 @@
6869
import com.cloud.agent.transport.Request;
6970
import com.cloud.agent.transport.Response;
7071
import com.cloud.exception.AgentControlChannelException;
72+
import com.cloud.host.Host;
7173
import com.cloud.resource.ServerResource;
7274
import com.cloud.utils.PropertiesUtil;
7375
import com.cloud.utils.StringUtils;
@@ -127,6 +129,7 @@ public int value() {
127129
Long _id;
128130

129131
Timer _timer = new Timer("Agent Timer");
132+
Timer certTimer;
130133
Timer hostLBTimer;
131134

132135
List<WatchTask> _watchList = new ArrayList<WatchTask>();
@@ -140,9 +143,11 @@ public int value() {
140143
long _startupWait = _startupWaitDefault;
141144
boolean _reconnectAllowed = true;
142145
//For time sentitive task, e.g. PingTask
143-
private final ThreadPoolExecutor _ugentTaskPool;
146+
ThreadPoolExecutor _ugentTaskPool;
144147
ExecutorService _executor;
145148

149+
Thread _shutdownThread = new ShutdownThread(this);
150+
146151
private String _keystoreSetupPath;
147152
private String _keystoreCertImportPath;
148153

@@ -153,7 +158,7 @@ public Agent(final IAgentShell shell) {
153158

154159
_connection = new NioClient("Agent", _shell.getNextHost(), _shell.getPort(), _shell.getWorkers(), this);
155160

156-
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
161+
Runtime.getRuntime().addShutdownHook(_shutdownThread);
157162

158163
_ugentTaskPool =
159164
new ThreadPoolExecutor(shell.getPingRetries(), 2 * shell.getPingRetries(), 10, TimeUnit.MINUTES, new SynchronousQueue<Runnable>(), new NamedThreadFactory(
@@ -192,7 +197,7 @@ public Agent(final IAgentShell shell, final int localAgentId, final ServerResour
192197
// ((NioClient)_connection).setBindAddress(_shell.getPrivateIp());
193198

194199
s_logger.debug("Adding shutdown hook");
195-
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
200+
Runtime.getRuntime().addShutdownHook(_shutdownThread);
196201

197202
_ugentTaskPool =
198203
new ThreadPoolExecutor(shell.getPingRetries(), 2 * shell.getPingRetries(), 10, TimeUnit.MINUTES, new SynchronousQueue<Runnable>(), new NamedThreadFactory(
@@ -239,20 +244,39 @@ public String getResourceName() {
239244
return _resource.getClass().getSimpleName();
240245
}
241246

247+
/**
248+
* In case of a software based agent restart, this method
249+
* can help to perform explicit garbage collection of any old
250+
* agent instances and its inner objects.
251+
*/
252+
private void scavengeOldAgentObjects() {
253+
_executor.submit(new Runnable() {
254+
@Override
255+
public void run() {
256+
try {
257+
Thread.sleep(2000L);
258+
} catch (final InterruptedException ignored) {
259+
} finally {
260+
System.gc();
261+
}
262+
}
263+
});
264+
}
265+
242266
public void start() {
243267
if (!_resource.start()) {
244268
s_logger.error("Unable to start the resource: " + _resource.getName());
245269
throw new CloudRuntimeException("Unable to start the resource: " + _resource.getName());
246270
}
247271

248-
_keystoreSetupPath = Script.findScript("scripts/util/", KeyStoreUtils.keyStoreSetupScript);
272+
_keystoreSetupPath = Script.findScript("scripts/util/", KeyStoreUtils.KS_SETUP_SCRIPT);
249273
if (_keystoreSetupPath == null) {
250-
throw new CloudRuntimeException(String.format("Unable to find the '%s' script", KeyStoreUtils.keyStoreSetupScript));
274+
throw new CloudRuntimeException(String.format("Unable to find the '%s' script", KeyStoreUtils.KS_SETUP_SCRIPT));
251275
}
252276

253-
_keystoreCertImportPath = Script.findScript("scripts/util/", KeyStoreUtils.keyStoreImportScript);
277+
_keystoreCertImportPath = Script.findScript("scripts/util/", KeyStoreUtils.KS_IMPORT_SCRIPT);
254278
if (_keystoreCertImportPath == null) {
255-
throw new CloudRuntimeException(String.format("Unable to find the '%s' script", KeyStoreUtils.keyStoreImportScript));
279+
throw new CloudRuntimeException(String.format("Unable to find the '%s' script", KeyStoreUtils.KS_IMPORT_SCRIPT));
256280
}
257281

258282
try {
@@ -274,6 +298,7 @@ public void start() {
274298
}
275299
}
276300
_shell.updateConnectedHost();
301+
scavengeOldAgentObjects();
277302
}
278303

279304
public void stop(final String reason, final String detail) {
@@ -298,14 +323,42 @@ public void stop(final String reason, final String detail) {
298323
}
299324
_connection.stop();
300325
_connection = null;
326+
_link = null;
301327
}
302328

303329
if (_resource != null) {
304330
_resource.stop();
305331
_resource = null;
306332
}
307333

308-
_ugentTaskPool.shutdownNow();
334+
if (_startup != null) {
335+
_startup = null;
336+
}
337+
338+
if (_ugentTaskPool != null) {
339+
_ugentTaskPool.shutdownNow();
340+
_ugentTaskPool = null;
341+
}
342+
343+
if (_executor != null) {
344+
_executor.shutdown();
345+
_executor = null;
346+
}
347+
348+
if (_timer != null) {
349+
_timer.cancel();
350+
_timer = null;
351+
}
352+
353+
if (hostLBTimer != null) {
354+
hostLBTimer.cancel();
355+
hostLBTimer = null;
356+
}
357+
358+
if (certTimer != null) {
359+
certTimer.cancel();
360+
certTimer = null;
361+
}
309362
}
310363

311364
public Long getId() {
@@ -318,6 +371,15 @@ public void setId(final Long id) {
318371
_shell.setPersistentProperty(getResourceName(), "id", Long.toString(id));
319372
}
320373

374+
private synchronized void scheduleServicesRestartTask() {
375+
if (certTimer != null) {
376+
certTimer.cancel();
377+
certTimer.purge();
378+
}
379+
certTimer = new Timer("Certificate Renewal Timer");
380+
certTimer.schedule(new PostCertificateRenewalTask(this), 5000L);
381+
}
382+
321383
private synchronized void scheduleHostLBCheckerTask(final long checkInterval) {
322384
if (hostLBTimer != null) {
323385
hostLBTimer.cancel();
@@ -578,6 +640,9 @@ protected void processRequest(final Request request, final Link link) {
578640
answer = setupAgentKeystore((SetupKeyStoreCommand) cmd);
579641
} else if (cmd instanceof SetupCertificateCommand && ((SetupCertificateCommand) cmd).isHandleByAgent()) {
580642
answer = setupAgentCertificate((SetupCertificateCommand) cmd);
643+
if (Host.Type.Routing.equals(_resource.getType())) {
644+
scheduleServicesRestartTask();
645+
}
581646
} else if (cmd instanceof SetupDirectDownloadCertificate) {
582647
answer = setupDirectDownloadCertificate((SetupDirectDownloadCertificate) cmd);
583648
} else if (cmd instanceof SetupMSListCommand) {
@@ -641,7 +706,7 @@ private Answer setupDirectDownloadCertificate(SetupDirectDownloadCertificate cmd
641706
return new Answer(cmd, false, "Failed to find agent.properties file");
642707
}
643708

644-
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultKeystoreFile;
709+
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.KS_FILENAME;
645710

646711
String cerFile = agentFile.getParent() + "/" + certificateName + ".cer";
647712
Script.runSimpleBashScript(String.format("echo '%s' > %s", certificate, cerFile));
@@ -666,13 +731,13 @@ public Answer setupAgentKeystore(final SetupKeyStoreCommand cmd) {
666731
if (agentFile == null) {
667732
return new Answer(cmd, false, "Failed to find agent.properties file");
668733
}
669-
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultKeystoreFile;
670-
final String csrFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultCsrFile;
734+
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.KS_FILENAME;
735+
final String csrFile = agentFile.getParent() + "/" + KeyStoreUtils.CSR_FILENAME;
671736

672-
String storedPassword = _shell.getPersistentProperty(null, KeyStoreUtils.passphrasePropertyName);
737+
String storedPassword = _shell.getPersistentProperty(null, KeyStoreUtils.KS_PASSPHRASE_PROPERTY);
673738
if (Strings.isNullOrEmpty(storedPassword)) {
674739
storedPassword = keyStorePassword;
675-
_shell.setPersistentProperty(null, KeyStoreUtils.passphrasePropertyName, storedPassword);
740+
_shell.setPersistentProperty(null, KeyStoreUtils.KS_PASSPHRASE_PROPERTY, storedPassword);
676741
}
677742

678743
Script script = new Script(true, _keystoreSetupPath, 60000, s_logger);
@@ -706,10 +771,10 @@ private Answer setupAgentCertificate(final SetupCertificateCommand cmd) {
706771
if (agentFile == null) {
707772
return new Answer(cmd, false, "Failed to find agent.properties file");
708773
}
709-
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultKeystoreFile;
710-
final String certFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultCertFile;
711-
final String privateKeyFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultPrivateKeyFile;
712-
final String caCertFile = agentFile.getParent() + "/" + KeyStoreUtils.defaultCaCertFile;
774+
final String keyStoreFile = agentFile.getParent() + "/" + KeyStoreUtils.KS_FILENAME;
775+
final String certFile = agentFile.getParent() + "/" + KeyStoreUtils.CERT_FILENAME;
776+
final String privateKeyFile = agentFile.getParent() + "/" + KeyStoreUtils.PKEY_FILENAME;
777+
final String caCertFile = agentFile.getParent() + "/" + KeyStoreUtils.CACERT_FILENAME;
713778

714779
try {
715780
FileUtils.writeStringToFile(new File(certFile), certificate, Charset.defaultCharset());
@@ -722,7 +787,7 @@ private Answer setupAgentCertificate(final SetupCertificateCommand cmd) {
722787
Script script = new Script(true, _keystoreCertImportPath, 60000, s_logger);
723788
script.add(agentFile.getAbsolutePath());
724789
script.add(keyStoreFile);
725-
script.add(KeyStoreUtils.agentMode);
790+
script.add(KeyStoreUtils.AGENT_MODE);
726791
script.add(certFile);
727792
script.add("");
728793
script.add(caCertFile);
@@ -1072,6 +1137,60 @@ public void doTask(final Task task) throws TaskExecutionException {
10721137
}
10731138
}
10741139

1140+
/**
1141+
* Task stops the current agent and launches a new agent
1142+
* when there are no outstanding jobs in the agent's task queue
1143+
*/
1144+
public class PostCertificateRenewalTask extends ManagedContextTimerTask {
1145+
1146+
private Agent agent;
1147+
1148+
public PostCertificateRenewalTask(final Agent agent) {
1149+
this.agent = agent;
1150+
}
1151+
1152+
@Override
1153+
protected void runInContext() {
1154+
while (true) {
1155+
try {
1156+
if (_inProgress.get() == 0) {
1157+
s_logger.debug("Running post certificate renewal task to restart services.");
1158+
1159+
// Let the resource perform any post certificate renewal cleanups
1160+
_resource.executeRequest(new PostCertificateRenewalCommand());
1161+
1162+
IAgentShell shell = agent._shell;
1163+
ServerResource resource = agent._resource.getClass().newInstance();
1164+
1165+
// Stop current agent
1166+
agent.cancelTasks();
1167+
agent._reconnectAllowed = false;
1168+
Runtime.getRuntime().removeShutdownHook(agent._shutdownThread);
1169+
agent.stop(ShutdownCommand.Requested, "Restarting due to new X509 certificates");
1170+
1171+
// Nullify references for GC
1172+
agent._shell = null;
1173+
agent._watchList = null;
1174+
agent._shutdownThread = null;
1175+
agent._controlListeners = null;
1176+
agent = null;
1177+
1178+
// Start a new agent instance
1179+
shell.launchNewAgent(resource);
1180+
return;
1181+
}
1182+
if (s_logger.isTraceEnabled()) {
1183+
s_logger.debug("Other tasks are in progress, will retry post certificate renewal command after few seconds");
1184+
}
1185+
Thread.sleep(5000);
1186+
} catch (final Exception e) {
1187+
s_logger.warn("Failed to execute post certificate renewal command:", e);
1188+
break;
1189+
}
1190+
}
1191+
}
1192+
}
1193+
10751194
public class PreferredHostCheckerTask extends ManagedContextTimerTask {
10761195

10771196
@Override

agent/src/com/cloud/agent/AgentShell.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ private void launchAgentFromClassInfo(String resourceClassNames) throws Configur
419419
final Constructor<?> constructor = impl.getDeclaredConstructor();
420420
constructor.setAccessible(true);
421421
ServerResource resource = (ServerResource)constructor.newInstance();
422-
launchAgent(getNextAgentId(), resource);
422+
launchNewAgent(resource);
423423
} catch (final ClassNotFoundException e) {
424424
throw new ConfigurationException("Resource class not found: " + name + " due to: " + e.toString());
425425
} catch (final SecurityException e) {
@@ -447,9 +447,10 @@ private void launchAgentFromTypeInfo() throws ConfigurationException {
447447
s_logger.trace("Launching agent based on type=" + typeInfo);
448448
}
449449

450-
private void launchAgent(int localAgentId, ServerResource resource) throws ConfigurationException {
450+
public void launchNewAgent(ServerResource resource) throws ConfigurationException {
451451
// we don't track agent after it is launched for now
452-
Agent agent = new Agent(this, localAgentId, resource);
452+
_agents.clear();
453+
Agent agent = new Agent(this, getNextAgentId(), resource);
453454
_agents.add(agent);
454455
agent.start();
455456
}

agent/src/com/cloud/agent/IAgentShell.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
import java.util.Map;
2020
import java.util.Properties;
2121

22+
import javax.naming.ConfigurationException;
23+
24+
import com.cloud.resource.ServerResource;
2225
import com.cloud.utils.backoff.BackoffAlgorithm;
2326

2427
public interface IAgentShell {
@@ -66,4 +69,6 @@ public interface IAgentShell {
6669
void updateConnectedHost();
6770

6871
String getConnectedHost();
72+
73+
void launchNewAgent(ServerResource resource) throws ConfigurationException;
6974
}

0 commit comments

Comments
 (0)