Skip to content

Commit ca6f4fb

Browse files
committed
Skip BMC detection when using out-of-band management
When Ironic uses out-of-band management interfaces like Redfish, iDRAC, iLO, or iRMC, the BMC address is already known and configured in Ironic. This change allows the agent to skip BMC address detection via ipmitool when instructed by Ironic through the lookup response. This reduces deployment time by avoiding unnecessary ipmitool calls during hardware inventory collection. The agent now checks for the 'agent_skip_bmc_detect' flag in the config section of the lookup response and skips BMC detection accordingly. This flag is stored in the cached node data for use during hardware inventory collection. Depends-On: I6a432db3eb238894e0ed2676243ce69ec300a9eb Assisted-By: Claude Sonnet 4.5 Change-Id: Id5470136defb981d1855e3c57cd16c03a6eb916e Signed-off-by: Riccardo Pittau <elfosardo@gmail.com>
1 parent 3067c82 commit ca6f4fb

File tree

4 files changed

+191
-8
lines changed

4 files changed

+191
-8
lines changed

ironic_python_agent/agent.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,15 @@ def process_lookup_data(self, content):
503503
self.node = content['node']
504504
LOG.info('Lookup succeeded, node UUID is %s',
505505
self.node['uuid'])
506+
507+
# Store skip_bmc_detect flag in the node before caching
508+
# This allows hardware managers to check if BMC detection is needed
509+
skip_bmc = content.get('config', {}).get('agent_skip_bmc_detect',
510+
False)
511+
if skip_bmc:
512+
LOG.info('Ironic has indicated BMC detection should be skipped')
513+
self.node['skip_bmc_detect'] = True
514+
506515
hardware.cache_node(self.node)
507516
self.heartbeat_timeout = content['config']['heartbeat_timeout']
508517

ironic_python_agent/hardware.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1151,18 +1151,52 @@ def list_hardware_info(self):
11511151
hardware_info['cpu'] = self.get_cpus()
11521152
hardware_info['disks'] = self.list_block_devices()
11531153
hardware_info['memory'] = self.get_memory()
1154-
hardware_info['bmc_address'] = self.get_bmc_address()
1155-
hardware_info['bmc_v6address'] = self.get_bmc_v6address()
1154+
1155+
# Check if Ironic has indicated BMC detection should be skipped
1156+
# This is set after lookup when using out-of-band
1157+
# management like Redfish
1158+
cached_node = get_cached_node()
1159+
skip_bmc = (cached_node and cached_node.get('skip_bmc_detect',
1160+
False))
1161+
1162+
if skip_bmc:
1163+
LOG.info('Skipping BMC detection as requested by Ironic')
1164+
hardware_info['bmc_address'] = None
1165+
hardware_info['bmc_v6address'] = None
1166+
else:
1167+
# Cache BMC information to avoid repeated expensive ipmitool calls
1168+
if not hasattr(self, '_bmc_cache'):
1169+
LOG.debug('Detecting BMC information (first time)')
1170+
self._bmc_cache = {
1171+
'bmc_address': self.get_bmc_address(),
1172+
'bmc_v6address': self.get_bmc_v6address(),
1173+
'bmc_mac': None # Populated below
1174+
}
1175+
else:
1176+
LOG.debug('Using cached BMC information')
1177+
1178+
hardware_info['bmc_address'] = self._bmc_cache['bmc_address']
1179+
hardware_info['bmc_v6address'] = self._bmc_cache['bmc_v6address']
1180+
11561181
hardware_info['system_vendor'] = self.get_system_vendor_info()
11571182
hardware_info['boot'] = self.get_boot_info()
11581183
hardware_info['hostname'] = netutils.get_hostname()
11591184

1160-
try:
1161-
hardware_info['bmc_mac'] = self.get_bmc_mac()
1162-
except errors.IncompatibleHardwareMethodError:
1163-
# if the hardware manager does not support obtaining the BMC MAC,
1164-
# we simply don't expose it.
1165-
pass
1185+
if not skip_bmc:
1186+
# Try to get BMC MAC, which may not be cached yet
1187+
if self._bmc_cache['bmc_mac'] is None:
1188+
try:
1189+
self._bmc_cache['bmc_mac'] = self.get_bmc_mac()
1190+
LOG.debug('Cached BMC MAC address')
1191+
except errors.IncompatibleHardwareMethodError:
1192+
# if the hardware manager does not support obtaining
1193+
# the BMC MAC, we simply don't expose it.
1194+
# Mark as unavailable to avoid retrying
1195+
self._bmc_cache['bmc_mac'] = 'unavailable'
1196+
1197+
# Only add to hardware_info if we successfully got it
1198+
if self._bmc_cache['bmc_mac'] not in (None, 'unavailable'):
1199+
hardware_info['bmc_mac'] = self._bmc_cache['bmc_mac']
11661200

11671201
LOG.info('Inventory collected in %.2f second(s)', time.time() - start)
11681202
return hardware_info

ironic_python_agent/tests/unit/test_hardware.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,133 @@ def test_list_hardware_info(self, mocked_get_hostname, mocked_lshw):
12391239
self.assertEqual('mock_hostname', hardware_info['hostname'])
12401240
mocked_lshw.assert_called_once_with(self.hardware)
12411241

1242+
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
1243+
@mock.patch.object(hardware.GenericHardwareManager,
1244+
'_get_system_lshw_dict', autospec=True,
1245+
return_value={'id': 'host'})
1246+
@mock.patch.object(netutils, 'get_hostname', autospec=True)
1247+
def test_list_hardware_info_with_bmc_caching(
1248+
self, mocked_get_hostname, mocked_lshw, mocked_get_node):
1249+
"""Test that BMC information is cached after first detection."""
1250+
mocked_get_node.return_value = None # No skip_bmc_detect flag
1251+
1252+
# Mock all the hardware info methods
1253+
self.hardware.list_network_interfaces = mock.Mock(return_value=[])
1254+
self.hardware.get_cpus = mock.Mock()
1255+
self.hardware.get_memory = mock.Mock()
1256+
self.hardware.list_block_devices = mock.Mock(return_value=[])
1257+
self.hardware.get_boot_info = mock.Mock()
1258+
self.hardware.get_system_vendor_info = mock.Mock()
1259+
mocked_get_hostname.return_value = 'mock_hostname'
1260+
1261+
# Mock BMC methods
1262+
self.hardware.get_bmc_address = mock.Mock(return_value='192.168.1.1')
1263+
self.hardware.get_bmc_v6address = mock.Mock(return_value='fe80::1')
1264+
self.hardware.get_bmc_mac = mock.Mock(return_value='aa:bb:cc:dd:ee:ff')
1265+
1266+
# First call - should call BMC detection methods
1267+
hardware_info1 = self.hardware.list_hardware_info()
1268+
self.assertEqual('192.168.1.1', hardware_info1['bmc_address'])
1269+
self.assertEqual('fe80::1', hardware_info1['bmc_v6address'])
1270+
self.assertEqual('aa:bb:cc:dd:ee:ff', hardware_info1['bmc_mac'])
1271+
1272+
# Verify BMC methods were called
1273+
self.hardware.get_bmc_address.assert_called_once()
1274+
self.hardware.get_bmc_v6address.assert_called_once()
1275+
self.hardware.get_bmc_mac.assert_called_once()
1276+
1277+
# Second call - should use cached values
1278+
hardware_info2 = self.hardware.list_hardware_info()
1279+
self.assertEqual('192.168.1.1', hardware_info2['bmc_address'])
1280+
self.assertEqual('fe80::1', hardware_info2['bmc_v6address'])
1281+
self.assertEqual('aa:bb:cc:dd:ee:ff', hardware_info2['bmc_mac'])
1282+
1283+
# BMC methods should NOT be called again (still only once)
1284+
self.hardware.get_bmc_address.assert_called_once()
1285+
self.hardware.get_bmc_v6address.assert_called_once()
1286+
self.hardware.get_bmc_mac.assert_called_once()
1287+
1288+
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
1289+
@mock.patch.object(hardware.GenericHardwareManager,
1290+
'_get_system_lshw_dict', autospec=True,
1291+
return_value={'id': 'host'})
1292+
@mock.patch.object(netutils, 'get_hostname', autospec=True)
1293+
def test_list_hardware_info_skip_bmc_detect(
1294+
self, mocked_get_hostname, mocked_lshw, mocked_get_node):
1295+
"""Test that BMC detection is skipped when flag is set."""
1296+
mocked_get_node.return_value = {'skip_bmc_detect': True}
1297+
1298+
# Mock all the hardware info methods
1299+
self.hardware.list_network_interfaces = mock.Mock(return_value=[])
1300+
self.hardware.get_cpus = mock.Mock()
1301+
self.hardware.get_memory = mock.Mock()
1302+
self.hardware.list_block_devices = mock.Mock(return_value=[])
1303+
self.hardware.get_boot_info = mock.Mock()
1304+
self.hardware.get_system_vendor_info = mock.Mock()
1305+
mocked_get_hostname.return_value = 'mock_hostname'
1306+
1307+
# Mock BMC methods - these should NOT be called
1308+
self.hardware.get_bmc_address = mock.Mock()
1309+
self.hardware.get_bmc_v6address = mock.Mock()
1310+
self.hardware.get_bmc_mac = mock.Mock()
1311+
1312+
# Call list_hardware_info
1313+
hardware_info = self.hardware.list_hardware_info()
1314+
1315+
# BMC info should be None
1316+
self.assertIsNone(hardware_info['bmc_address'])
1317+
self.assertIsNone(hardware_info['bmc_v6address'])
1318+
self.assertNotIn('bmc_mac', hardware_info)
1319+
1320+
# BMC detection methods should NOT have been called
1321+
self.hardware.get_bmc_address.assert_not_called()
1322+
self.hardware.get_bmc_v6address.assert_not_called()
1323+
self.hardware.get_bmc_mac.assert_not_called()
1324+
1325+
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
1326+
@mock.patch.object(hardware.GenericHardwareManager,
1327+
'_get_system_lshw_dict', autospec=True,
1328+
return_value={'id': 'host'})
1329+
@mock.patch.object(netutils, 'get_hostname', autospec=True)
1330+
def test_list_hardware_info_bmc_mac_unavailable(
1331+
self, mocked_get_hostname, mocked_lshw, mocked_get_node):
1332+
"""Test BMC MAC marked unavailable when not supported."""
1333+
mocked_get_node.return_value = None
1334+
1335+
# Mock all the hardware info methods
1336+
self.hardware.list_network_interfaces = mock.Mock(return_value=[])
1337+
self.hardware.get_cpus = mock.Mock()
1338+
self.hardware.get_memory = mock.Mock()
1339+
self.hardware.list_block_devices = mock.Mock(return_value=[])
1340+
self.hardware.get_boot_info = mock.Mock()
1341+
self.hardware.get_system_vendor_info = mock.Mock()
1342+
mocked_get_hostname.return_value = 'mock_hostname'
1343+
1344+
# Mock BMC methods
1345+
self.hardware.get_bmc_address = mock.Mock(return_value='192.168.1.1')
1346+
self.hardware.get_bmc_v6address = mock.Mock(return_value='fe80::1')
1347+
# BMC MAC raises IncompatibleHardwareMethodError
1348+
self.hardware.get_bmc_mac = mock.Mock(
1349+
side_effect=errors.IncompatibleHardwareMethodError)
1350+
1351+
# First call
1352+
hardware_info1 = self.hardware.list_hardware_info()
1353+
self.assertEqual('192.168.1.1', hardware_info1['bmc_address'])
1354+
self.assertEqual('fe80::1', hardware_info1['bmc_v6address'])
1355+
self.assertNotIn('bmc_mac', hardware_info1) # Not in output
1356+
1357+
# Verify get_bmc_mac was called once
1358+
self.hardware.get_bmc_mac.assert_called_once()
1359+
1360+
# Second call - get_bmc_mac should NOT be called again
1361+
hardware_info2 = self.hardware.list_hardware_info()
1362+
self.assertEqual('192.168.1.1', hardware_info2['bmc_address'])
1363+
self.assertEqual('fe80::1', hardware_info2['bmc_v6address'])
1364+
self.assertNotIn('bmc_mac', hardware_info2)
1365+
1366+
# Still only one call (cached as 'unavailable')
1367+
self.hardware.get_bmc_mac.assert_called_once()
1368+
12421369
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
12431370
def test_list_block_devices(self, list_mock):
12441371
device = hardware.BlockDevice('/dev/hdaa', 'small', 65535, False)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
features:
3+
- |
4+
Adds support for automatically skipping BMC detection via ipmitool
5+
when Ironic indicates the node uses out-of-band management interfaces
6+
(e.g., Redfish, iDRAC Redfish, iLO, iRMC). This reduces deployment
7+
time and avoids unnecessary ipmitool calls when the BMC information
8+
is already known to Ironic.
9+
- |
10+
BMC information (address, v6address, and MAC) is now cached after the
11+
first detection to avoid repeated expensive ipmitool calls during
12+
subsequent inventory collections. This improves performance during
13+
heartbeats and long-running operations like cleaning or rescue mode.

0 commit comments

Comments
 (0)