Skip to content

Commit 0968304

Browse files
committed
Add support for inspection using ironic-inspector
Adds a new module ironic_python_agent.inspector and new entry point for extensions, which will allow vendor-specific inspection. Inspection is run on service start up just before the lookup. Due to this early start, and due to the fact we don't even know MAC address of nodes on inspection (to say nothing about IP addresses), exception handling is a bit different from other agent features: we try hard not to error out until we send at least something to inspector. Change-Id: I00932463d41819fd0a050782e2c88eddf6fc08c6
1 parent cdb4b52 commit 0968304

11 files changed

Lines changed: 735 additions & 15 deletions

File tree

doc/source/index.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ After the agent heartbeats, the conductor performs any actions needed against
6363
the node, including querying status of an already run command. For example,
6464
initiating in-band cleaning tasks or deploying an image to the node.
6565

66+
Inspection
67+
~~~~~~~~~~
68+
IPA can conduct hardware inspection on start up and post data to the `Ironic
69+
Inspector`_. Edit your default PXE/iPXE configuration or kernel command
70+
options baked in the image, and set ``ipa-inspection-callback-url`` to the
71+
full endpoint of Ironic Inspector, for example::
72+
73+
ipa-inspection-callback-url=http://IP:5050/v1/continue
74+
75+
Make sure your DHCP environment is set to boot IPA by default.
76+
77+
.. _Ironic Inspector: https://github.com/openstack/ironic-inspector
78+
6679
Image Builders
6780
--------------
6881
Unlike most other python software, you must build an IPA ramdisk image before

ironic_python_agent/agent.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ironic_python_agent import errors
2929
from ironic_python_agent.extensions import base
3030
from ironic_python_agent import hardware
31+
from ironic_python_agent import inspector
3132
from ironic_python_agent import ironic_api_client
3233

3334

@@ -280,11 +281,16 @@ def run(self):
280281
hardware.load_managers()
281282

282283
if not self.standalone:
284+
# Inspection should be started before call to lookup, otherwise
285+
# lookup will fail due to unknown MAC.
286+
uuid = inspector.inspect()
287+
283288
content = self.api_client.lookup_node(
284289
hardware_info=hardware.dispatch_to_managers(
285290
'list_hardware_info'),
286291
timeout=self.lookup_timeout,
287-
starting_interval=self.lookup_interval)
292+
starting_interval=self.lookup_interval,
293+
node_uuid=uuid)
288294

289295
self.node = content['node']
290296
self.heartbeat_timeout = content['heartbeat_timeout']

ironic_python_agent/cmd/agent.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from oslo_log import log
1919

2020
from ironic_python_agent import agent
21+
from ironic_python_agent import inspector
2122
from ironic_python_agent import utils
2223

2324
CONF = cfg.CONF
@@ -99,6 +100,19 @@
99100
default=APARAMS.get('ipa-standalone', False),
100101
help='Note: for debugging only. Start the Agent but suppress '
101102
'any calls to Ironic API.'),
103+
104+
cfg.StrOpt('inspection_callback_url',
105+
default=APARAMS.get('ipa-inspection-callback-url'),
106+
help='Endpoint of ironic-inspector. If set, hardware inventory '
107+
'will be collected and sent to ironic-inspector '
108+
'on start up.'),
109+
110+
cfg.StrOpt('inspection_collectors',
111+
default=APARAMS.get('ipa-inspection-collectors',
112+
inspector.DEFAULT_COLLECTOR),
113+
help='Comma-separated list of plugins providing additional '
114+
'hardware data for inspection, empty value gives '
115+
'a minimum required set of plugins.'),
102116
]
103117

104118
CONF.register_cli_opts(cli_opts)

ironic_python_agent/errors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,3 +321,9 @@ class DeviceNotFound(NotFound):
321321

322322
def __init__(self, details):
323323
super(DeviceNotFound, self).__init__(details)
324+
325+
326+
# This is not something we return to a user, so we don't inherit it from
327+
# RESTError.
328+
class InspectionError(Exception):
329+
"""Failure during inspection."""

ironic_python_agent/hardware.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -391,20 +391,7 @@ def get_os_install_device(self):
391391
root_device_hints = utils.parse_root_device_hints()
392392

393393
if not root_device_hints:
394-
# If no hints are passed find the first device larger than
395-
# 4GiB, assume it is the OS disk
396-
min_size_required = 4 * units.Gi
397-
# TODO(russellhaering): This isn't a valid assumption in
398-
# all cases, is there a more reasonable default behavior?
399-
block_devices.sort(key=lambda device: device.size)
400-
if block_devices[-1].size < min_size_required:
401-
raise errors.DeviceNotFound("No suitable device was found "
402-
"for deployment - root device hints were not provided "
403-
"and all found block devices are smaller than %iB."
404-
% min_size_required)
405-
for device in block_devices:
406-
if device.size >= min_size_required:
407-
return device.name
394+
return utils.guess_root_disk(block_devices).name
408395
else:
409396

410397
def match(hint, current_value, device):

ironic_python_agent/inspector.py

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
# Copyright 2015 Red Hat, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12+
# implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import logging
17+
18+
from oslo_concurrency import processutils
19+
from oslo_config import cfg
20+
from oslo_utils import excutils
21+
from oslo_utils import units
22+
import requests
23+
import stevedore
24+
25+
from ironic_python_agent import encoding
26+
from ironic_python_agent import errors
27+
from ironic_python_agent import hardware
28+
from ironic_python_agent import utils
29+
30+
31+
LOG = logging.getLogger(__name__)
32+
CONF = cfg.CONF
33+
DEFAULT_COLLECTOR = 'default'
34+
_COLLECTOR_NS = 'ironic_python_agent.inspector.collectors'
35+
36+
37+
def extension_manager(names):
38+
try:
39+
return stevedore.NamedExtensionManager(_COLLECTOR_NS,
40+
names=names,
41+
name_order=True)
42+
except KeyError as exc:
43+
raise errors.InspectionError('Failed to load collector %s' % exc)
44+
45+
46+
def inspect():
47+
"""Optionally run inspection on the current node.
48+
49+
If ``inspection_callback_url`` is set in the configuration, get
50+
the hardware inventory from the node and post it back to the inspector.
51+
52+
:return: node UUID if inspection was successful, None if associated node
53+
was not found in inspector cache. None is also returned if
54+
inspector support is not enabled.
55+
"""
56+
if not CONF.inspection_callback_url:
57+
LOG.info('Inspection is disabled, skipping')
58+
return
59+
collector_names = [x.strip() for x in CONF.inspection_collectors.split(',')
60+
if x.strip()]
61+
LOG.info('inspection is enabled with collectors %s', collector_names)
62+
63+
# NOTE(dtantsur): inspection process tries to delay raising any exceptions
64+
# until after we posted some data back to inspector. This is because
65+
# inspection is run automatically on (mostly) unknown nodes, so if it
66+
# fails, we don't have much information for debugging.
67+
failures = utils.AccumulatedFailures(exc_class=errors.InspectionError)
68+
data = {}
69+
70+
try:
71+
ext_mgr = extension_manager(collector_names)
72+
collectors = [(ext.name, ext.plugin) for ext in ext_mgr]
73+
except Exception as exc:
74+
with excutils.save_and_reraise_exception():
75+
failures.add(exc)
76+
call_inspector(data, failures)
77+
78+
for name, collector in collectors:
79+
try:
80+
collector(data, failures)
81+
except Exception as exc:
82+
# No reraise here, try to keep going
83+
failures.add('collector %s failed: %s', name, exc)
84+
85+
resp = call_inspector(data, failures)
86+
87+
# Now raise everything we were delaying
88+
failures.raise_if_needed()
89+
90+
if resp is None:
91+
LOG.info('stopping inspection, as inspector returned an error')
92+
return
93+
94+
# Optionally update IPMI credentials
95+
setup_ipmi_credentials(resp)
96+
97+
LOG.info('inspection finished successfully')
98+
return resp.get('uuid')
99+
100+
101+
def call_inspector(data, failures):
102+
"""Post data to inspector."""
103+
data['error'] = failures.get_error()
104+
105+
LOG.info('posting collected data to %s', CONF.inspection_callback_url)
106+
LOG.debug('collected data: %s', data)
107+
108+
encoder = encoding.RESTJSONEncoder()
109+
data = encoder.encode(data)
110+
111+
resp = requests.post(CONF.inspection_callback_url, data=data)
112+
if resp.status_code >= 400:
113+
LOG.error('inspector error %d: %s, proceeding with lookup',
114+
resp.status_code, resp.content.decode('utf-8'))
115+
return
116+
117+
return resp.json()
118+
119+
120+
def setup_ipmi_credentials(resp):
121+
"""Setup IPMI credentials, if requested.
122+
123+
:param resp: JSON response from inspector.
124+
"""
125+
if not resp.get('ipmi_setup_credentials'):
126+
LOG.info('setting IPMI credentials was not requested')
127+
return
128+
129+
user, password = resp['ipmi_username'], resp['ipmi_password']
130+
LOG.debug('setting IPMI credentials: user %s', user)
131+
132+
commands = [
133+
('user', 'set', 'name', '2', user),
134+
('user', 'set', 'password', '2', password),
135+
('user', 'enable', '2'),
136+
('channel', 'setaccess', '1', '2',
137+
'link=on', 'ipmi=on', 'callin=on', 'privilege=4'),
138+
]
139+
140+
for cmd in commands:
141+
try:
142+
utils.execute('ipmitool', *cmd)
143+
except processutils.ProcessExecutionError:
144+
LOG.exception('failed to update IPMI credentials')
145+
raise errors.InspectionError('failed to update IPMI credentials')
146+
147+
LOG.info('successfully set IPMI credentials: user %s', user)
148+
149+
150+
def discover_network_properties(inventory, data, failures):
151+
"""Discover network and BMC related properties.
152+
153+
Populates 'boot_interface', 'ipmi_address' and 'interfaces' keys.
154+
"""
155+
# Both boot interface and IPMI address might not be present,
156+
# we don't count it as failure
157+
data['boot_interface'] = utils.get_agent_params().get('BOOTIF')
158+
LOG.info('boot devices was %s', data['boot_interface'])
159+
data['ipmi_address'] = inventory.get('bmc_address')
160+
LOG.info('BMC IP address: %s', data['ipmi_address'])
161+
162+
data.setdefault('interfaces', {})
163+
for iface in inventory['interfaces']:
164+
if iface.name == 'lo' or iface.ipv4_address == '127.0.0.1':
165+
LOG.debug('ignoring local network interface %s', iface.name)
166+
continue
167+
168+
LOG.debug('found network interface %s', iface.name)
169+
170+
if not iface.mac_address:
171+
LOG.debug('no link information for interface %s', iface.name)
172+
continue
173+
174+
if not iface.ipv4_address:
175+
LOG.debug('no IP address for interface %s', iface.name)
176+
177+
data['interfaces'][iface.name] = {'mac': iface.mac_address,
178+
'ip': iface.ipv4_address}
179+
180+
if data['interfaces']:
181+
LOG.info('network interfaces: %s', data['interfaces'])
182+
else:
183+
failures.add('no network interfaces found')
184+
185+
186+
def discover_scheduling_properties(inventory, data):
187+
data['cpus'] = inventory['cpu'].count
188+
data['cpu_arch'] = inventory['cpu'].architecture
189+
data['memory_mb'] = inventory['memory'].physical_mb
190+
191+
# Replicate the same logic as in deploy. This logic will be moved to
192+
# inspector itself, but we need it for backward compatibility.
193+
try:
194+
disk = utils.guess_root_disk(inventory['disks'])
195+
except errors.DeviceNotFound:
196+
LOG.warn('no suitable root device detected')
197+
else:
198+
# -1 is required to give Ironic some spacing for partitioning
199+
data['local_gb'] = disk.size / units.Gi - 1
200+
201+
for key in ('cpus', 'local_gb', 'memory_mb'):
202+
try:
203+
data[key] = int(data[key])
204+
except (KeyError, ValueError, TypeError):
205+
LOG.warn('value for %s is missing or malformed: %s',
206+
key, data.get(key))
207+
else:
208+
LOG.info('value for %s is %s', key, data[key])
209+
210+
211+
def collect_default(data, failures):
212+
inventory = hardware.dispatch_to_managers('list_hardware_info')
213+
# These 2 calls are required for backward compatibility and should be
214+
# dropped after inspector is ready (probably in Mitaka cycle).
215+
discover_network_properties(inventory, data, failures)
216+
discover_scheduling_properties(inventory, data)
217+
# In the future we will only need the current version of inventory,
218+
# everything else will be done by inspector itself and its plugins
219+
data['inventory'] = inventory

ironic_python_agent/tests/unit/test_agent.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import time
1717

1818
import mock
19+
from oslo_config import cfg
1920
from oslotest import base as test_base
2021
import pkg_resources
2122
from stevedore import extension
@@ -26,9 +27,12 @@
2627
from ironic_python_agent import errors
2728
from ironic_python_agent.extensions import base
2829
from ironic_python_agent import hardware
30+
from ironic_python_agent import inspector
2931

3032
EXPECTED_ERROR = RuntimeError('command execution failed')
3133

34+
CONF = cfg.CONF
35+
3236

3337
def foo_execute(*args, **kwargs):
3438
if kwargs['fail']:
@@ -164,6 +168,7 @@ def test_get_status(self):
164168
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
165169
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
166170
def test_run(self, mocked_list_hardware, wsgi_server_cls):
171+
CONF.set_override('inspection_callback_url', '')
167172
wsgi_server = wsgi_server_cls.return_value
168173
wsgi_server.start.side_effect = KeyboardInterrupt()
169174

@@ -187,6 +192,43 @@ def test_run(self, mocked_list_hardware, wsgi_server_cls):
187192

188193
self.agent.heartbeater.start.assert_called_once_with()
189194

195+
@mock.patch.object(inspector, 'inspect', autospec=True)
196+
@mock.patch('wsgiref.simple_server.make_server', autospec=True)
197+
@mock.patch.object(hardware.HardwareManager, 'list_hardware_info')
198+
def test_run_with_inspection(self, mocked_list_hardware, wsgi_server_cls,
199+
mocked_inspector):
200+
CONF.set_override('inspection_callback_url', 'http://foo/bar')
201+
202+
wsgi_server = wsgi_server_cls.return_value
203+
wsgi_server.start.side_effect = KeyboardInterrupt()
204+
205+
mocked_inspector.return_value = 'uuid'
206+
207+
self.agent.heartbeater = mock.Mock()
208+
self.agent.api_client.lookup_node = mock.Mock()
209+
self.agent.api_client.lookup_node.return_value = {
210+
'node': {
211+
'uuid': 'deadbeef-dabb-ad00-b105-f00d00bab10c'
212+
},
213+
'heartbeat_timeout': 300,
214+
}
215+
self.agent.run()
216+
217+
listen_addr = ('192.0.2.1', 9999)
218+
wsgi_server_cls.assert_called_once_with(
219+
listen_addr[0],
220+
listen_addr[1],
221+
self.agent.api,
222+
server_class=simple_server.WSGIServer)
223+
wsgi_server.serve_forever.assert_called_once_with()
224+
mocked_inspector.assert_called_once_with()
225+
self.assertEqual(1, self.agent.api_client.lookup_node.call_count)
226+
self.assertEqual(
227+
'uuid',
228+
self.agent.api_client.lookup_node.call_args[1]['node_uuid'])
229+
230+
self.agent.heartbeater.start.assert_called_once_with()
231+
190232
@mock.patch('os.read')
191233
@mock.patch('select.poll')
192234
@mock.patch('time.sleep', return_value=None)

0 commit comments

Comments
 (0)