Skip to content

Commit 5f4fa7f

Browse files
author
Josh Gachnang
committed
Add cleaning/zapping support to IPA
This will add support for in band cleaning operations to IPA and replace the decom API that was unused. Adds API support for get_clean_steps, which returns a list of supported clean steps for the node, execute_clean_step, to execute one of the steps returned by get_clean_steps. Adds versioning and naming for hardware managers, so if a new hardware manager version is deployed in the middle of cleaning/zapping, the cleaning/zapping will be restarted to avoid incompatabilities. blueprint implement-cleaning-states blueprint inband-raid-configuration blueprint implement-zaping-states Depends-On: Ia2500ed5afb72058b4c5e8f41307169381cbce48 Change-Id: I750b80b9bf98b3ddc5643bb4c14a67d2052239af
1 parent be44ac8 commit 5f4fa7f

File tree

8 files changed

+351
-60
lines changed

8 files changed

+351
-60
lines changed

ironic_python_agent/errors.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,38 @@ def __init__(self, details=None):
284284
super(IncompatibleHardwareMethodError, self).__init__(details)
285285

286286

287+
class CleanVersionMismatch(RESTError):
288+
"""Error raised when Ironic and the Agent have different versions.
289+
290+
If the agent version has changed since get_clean_steps was called by
291+
the Ironic conductor, it indicates the agent has been updated (either
292+
on purpose, or a new agent was deployed and the node was rebooted).
293+
Since we cannot know if the upgraded IPA will work with cleaning as it
294+
stands (steps could have different priorities, either in IPA or in
295+
other Ironic interfaces), we should restart cleaning from the start.
296+
297+
"""
298+
message = 'Clean version mismatch, reload agent with correct version'
299+
300+
def __init__(self, agent_version, node_version):
301+
self.status_code = 409
302+
details = ('Agent clean version: {0}, node clean version: {1}'
303+
.format(agent_version, node_version))
304+
super(CleanVersionMismatch, self).__init__(details)
305+
306+
307+
class CleaningError(RESTError):
308+
"""Error raised when a cleaning step fails."""
309+
message = 'Clean step failed.'
310+
311+
def __init__(self, details=None):
312+
if details is not None:
313+
details = details
314+
else:
315+
details = self.message
316+
super(CleaningError, self).__init__(details)
317+
318+
287319
class ISCSIError(RESTError):
288320
"""Error raised when an image cannot be written to a device."""
289321

ironic_python_agent/extensions/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class AgentCommandStatus(object):
3030
RUNNING = u'RUNNING'
3131
SUCCEEDED = u'SUCCEEDED'
3232
FAILED = u'FAILED'
33+
CLEAN_VERSION_MISMATCH = u'CLEAN_VERSION_MISMATCH'
3334

3435

3536
class BaseCommandResult(encoding.Serializable):
@@ -153,6 +154,11 @@ def run(self):
153154
with self.command_state_lock:
154155
self.command_result = result
155156
self.command_status = AgentCommandStatus.SUCCEEDED
157+
except errors.CleanVersionMismatch as e:
158+
with self.command_state_lock:
159+
self.command_error = e
160+
self.command_status = AgentCommandStatus.CLEAN_VERSION_MISMATCH
161+
self.command_result = None
156162
except Exception as e:
157163
if not isinstance(e, errors.RESTError):
158164
e = errors.CommandExecutionError(str(e))
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2015 Rackspace, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from ironic_python_agent import errors
16+
from ironic_python_agent.extensions import base
17+
from ironic_python_agent import hardware
18+
19+
20+
class CleanExtension(base.BaseAgentExtension):
21+
@base.sync_command('get_clean_steps')
22+
def get_clean_steps(self, node, ports):
23+
"""Get the list of clean steps supported for the node and ports
24+
25+
:param node: A dict representation of a node
26+
:param ports: A dict representation of ports attached to node
27+
28+
:returns: A list of clean steps with keys step, priority, and
29+
reboot_requested
30+
"""
31+
# Results should be a dict, not a list
32+
steps = hardware.dispatch_to_all_managers('get_clean_steps',
33+
node, ports)
34+
35+
return {
36+
'clean_steps': steps,
37+
'hardware_manager_version': _get_current_clean_version()
38+
}
39+
40+
@base.async_command('execute_clean_step')
41+
def execute_clean_step(self, step, node, ports, clean_version=None,
42+
**kwargs):
43+
"""Execute a clean step
44+
:param step: A clean step with 'step', 'priority' and 'interface' keys
45+
:param node: A dict representation of a node
46+
:param ports: A dict representation of ports attached to node
47+
:param clean_version: The clean version as returned by
48+
_get_current_clean_version() at the beginning of cleaning/zapping
49+
:returns: a CommandResult object with command_result set to whatever
50+
the step returns.
51+
"""
52+
# Ensure the agent is still the same version, or raise an exception
53+
_check_clean_version(clean_version)
54+
55+
if 'step' not in step:
56+
raise ValueError('Malformed clean_step, no "step" key: %s'.format(
57+
step))
58+
try:
59+
result = hardware.dispatch_to_managers(step['step'], node, ports)
60+
except Exception as e:
61+
raise errors.CleaningError(
62+
'Error performing clean_step %(step)s: %(err)s' %
63+
{'step': step['step'], 'err': e})
64+
# Return the step that was executed so we can dispatch
65+
# to the appropriate Ironic interface
66+
return {
67+
'clean_result': result,
68+
'clean_step': step
69+
}
70+
71+
72+
def _check_clean_version(clean_version=None):
73+
"""Ensure the clean version hasn't changed."""
74+
# If the version is None, assume this is the first run
75+
if clean_version is None:
76+
return
77+
agent_version = _get_current_clean_version()
78+
if clean_version != agent_version:
79+
raise errors.CleanVersionMismatch(agent_version=agent_version,
80+
node_version=clean_version)
81+
82+
83+
def _get_current_clean_version():
84+
return {version.get('name'): version.get('version')
85+
for version in hardware.dispatch_to_all_managers(
86+
'get_version').values()}

ironic_python_agent/extensions/decom.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

ironic_python_agent/hardware.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,16 @@ def erase_block_device(self, block_device):
137137
"""
138138
raise errors.IncompatibleHardwareMethodError
139139

140-
def erase_devices(self):
140+
def erase_devices(self, node, ports):
141141
"""Erase any device that holds user data.
142142
143143
By default this will attempt to erase block devices. This method can be
144144
overridden in an implementation-specific hardware manager in order to
145145
erase additional hardware, although backwards-compatible upstream
146146
submissions are encouraged.
147+
148+
:param node: Ironic node object
149+
:param ports: list of Ironic port objects
147150
"""
148151
block_devices = self.list_block_devices()
149152
for block_device in block_devices:
@@ -157,8 +160,73 @@ def list_hardware_info(self):
157160
hardware_info['memory'] = self.get_memory()
158161
return hardware_info
159162

163+
def get_clean_steps(self, node, ports):
164+
"""Get a list of clean steps with priority.
165+
166+
Returns a list of dicts of the following form:
167+
{'step': the HardwareManager function to call.
168+
'priority': the order steps will be run in. Ironic will sort all the
169+
clean steps from all the drivers, with the largest priority
170+
step being run first. If priority is set to 0, the step will
171+
not be run during cleaning, but may be run during zapping.
172+
'reboot_requested': Whether the agent should request Ironic reboots
173+
the node via the power driver after the operation completes.
174+
}
175+
176+
Note: multiple hardware managers may return the same step name. The
177+
priority of the step will be the largest priority of steps with
178+
the same name. The steps will be called using
179+
`hardware.dispatch_to_managers` and handled by the best suited
180+
hardware manager. If you need a step to be executed by only your
181+
hardware manager, ensure it has a unique step name.
182+
183+
`node` and `ports` can be used by other hardware managers to further
184+
determine if a clean step is supported for the node.
185+
186+
:param node: Ironic node object
187+
:param ports: list of Ironic port objects
188+
:return: a default list of decommission steps, as a list of
189+
dictionaries
190+
"""
191+
return [
192+
{
193+
'step': 'erase_devices',
194+
'priority': 10,
195+
'interface': 'deploy',
196+
'reboot_requested': False
197+
}
198+
]
199+
200+
def get_version(self):
201+
"""Get a name and version for this hardware manager.
202+
203+
In order to avoid errors and make agent upgrades painless, cleaning
204+
will check the version of all hardware managers during get_clean_steps
205+
at the beginning of cleaning and before executing each step in the
206+
agent.
207+
208+
The agent isn't aware of the steps being taken before or after via
209+
out of band steps, so it can never know if a new step is safe to run.
210+
Therefore, we default to restarting the whole process.
211+
212+
:returns: a dictionary with two keys: `name` and
213+
`version`, where `name` is a string identifying the hardware
214+
manager and `version` is an arbitrary version string. `name` will
215+
be a class variable called HARDWARE_MANAGER_NAME, or default to
216+
the class name and `version` will be a class variable called
217+
HARDWARE_MANAGER_VERSION or default to '1.0'.
218+
"""
219+
return {
220+
'name': getattr(self, 'HARDWARE_MANAGER_NAME',
221+
type(self).__name__),
222+
'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0')
223+
}
224+
160225

161226
class GenericHardwareManager(HardwareManager):
227+
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
228+
HARDWARE_MANAGER_VERSION = '1.0'
229+
162230
def __init__(self):
163231
self.sys_path = '/sys'
164232

0 commit comments

Comments
 (0)