Skip to content

Commit 20c5894

Browse files
committed
Burn-in: Add disk step
Add a clean step for disk burn-in via fio. Get basic run parameters from the node's driver_info. Story: #2007523 Task: #42384 Change-Id: I5f5e336bd629846b3d779fd0fc7a2060b385b035
1 parent 6fc5a14 commit 20c5894

File tree

6 files changed

+122
-0
lines changed

6 files changed

+122
-0
lines changed

doc/source/admin/hardware_managers.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ Clean steps
7777
``deploy.burnin_cpu``
7878
Stress-test the CPUs of a node via stress-ng for a configurable
7979
amount of time. Disabled by default.
80+
``deploy.burnin_disk``
81+
Stress-test the disks of a node via fio. Disabled by default.
8082
``deploy.burnin_memory``
8183
Stress-test the memory of a node via stress-ng for a configurable
8284
amount of time. Disabled by default.

ironic_python_agent/burnin.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from oslo_log import log
1616

1717
from ironic_python_agent import errors
18+
from ironic_python_agent import hardware
1819

1920
LOG = log.getLogger(__name__)
2021

@@ -78,3 +79,39 @@ def stress_ng_vm(node):
7879
{'err': e})
7980
LOG.error(error_msg)
8081
raise errors.CommandExecutionError(error_msg)
82+
83+
84+
def fio_disk(node):
85+
"""Burn-in the disks with fio
86+
87+
Run an fio randrw job for a configurable number of iterations
88+
or a given amount of time.
89+
90+
:param node: Ironic node object
91+
:raises: CommandExecutionError if the execution of fio fails.
92+
"""
93+
info = node.get('driver_info', {})
94+
# 4 iterations, same as badblock's default
95+
loops = info.get('agent_burnin_fio_disk_loops', 4)
96+
runtime = info.get('agent_burnin_fio_disk_runtime', 0)
97+
98+
args = ['fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
99+
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
100+
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
101+
'--loops', loops, '--runtime', runtime, '--time_based']
102+
103+
devices = hardware.list_all_block_devices()
104+
for device in devices:
105+
args.extend(['--name', device.name])
106+
107+
LOG.debug('Burn-in fio disk command: %s', ' '.join(map(str, args)))
108+
109+
try:
110+
out, _ = utils.execute(*args)
111+
# fio reports on stdout
112+
LOG.info(out)
113+
except (processutils.ProcessExecutionError, OSError) as e:
114+
error_msg = ("fio (disk) failed with error %(err)s",
115+
{'err': e})
116+
LOG.error(error_msg)
117+
raise errors.CommandExecutionError(error_msg)

ironic_python_agent/hardware.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,6 +1402,14 @@ def burnin_cpu(self, node, ports):
14021402
"""
14031403
burnin.stress_ng_cpu(node)
14041404

1405+
def burnin_disk(self, node, ports):
1406+
"""Burn-in the disk
1407+
1408+
:param node: Ironic node object
1409+
:param ports: list of Ironic port objects
1410+
"""
1411+
burnin.fio_disk(node)
1412+
14051413
def burnin_memory(self, node, ports):
14061414
"""Burn-in the memory
14071415
@@ -1890,6 +1898,13 @@ def get_clean_steps(self, node, ports):
18901898
'reboot_requested': False,
18911899
'abortable': True
18921900
},
1901+
{
1902+
'step': 'burnin_disk',
1903+
'priority': 0,
1904+
'interface': 'deploy',
1905+
'reboot_requested': False,
1906+
'abortable': True
1907+
},
18931908
{
18941909
'step': 'burnin_memory',
18951910
'priority': 0,

ironic_python_agent/tests/unit/test_burnin.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from ironic_python_agent import burnin
1919
from ironic_python_agent import errors
20+
from ironic_python_agent import hardware
2021
from ironic_python_agent.tests.unit import base
2122

2223

@@ -63,6 +64,7 @@ def test_stress_ng_vm_default(self, mock_execute):
6364
burnin.stress_ng_vm(node)
6465

6566
mock_execute.assert_called_once_with(
67+
6668
'stress-ng', '--vm', 0, '--vm-bytes', '98%',
6769
'--timeout', 86400, '--metrics-brief')
6870

@@ -89,3 +91,56 @@ def test_stress_ng_vm_no_stress_ng(self, mock_execute):
8991

9092
self.assertRaises(errors.CommandExecutionError,
9193
burnin.stress_ng_vm, node)
94+
95+
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
96+
def test_fio_disk_default(self, mock_list, mock_execute):
97+
98+
node = {'driver_info': {}}
99+
100+
mock_list.return_value = [
101+
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
102+
hardware.BlockDevice('/dev/hdaa', 'small', 65535, False),
103+
]
104+
mock_execute.return_value = (['out', 'err'])
105+
106+
burnin.fio_disk(node)
107+
108+
mock_execute.assert_called_once_with(
109+
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
110+
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
111+
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
112+
'--loops', 4, '--runtime', 0, '--time_based', '--name',
113+
'/dev/sdj', '--name', '/dev/hdaa')
114+
115+
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
116+
def test_fio_disk_no_default(self, mock_list, mock_execute):
117+
118+
node = {'driver_info': {'agent_burnin_fio_disk_runtime': 600,
119+
'agent_burnin_fio_disk_loops': 5}}
120+
121+
mock_list.return_value = [
122+
hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True),
123+
hardware.BlockDevice('/dev/hdaa', 'small', 65535, False),
124+
]
125+
mock_execute.return_value = (['out', 'err'])
126+
127+
burnin.fio_disk(node)
128+
129+
mock_execute.assert_called_once_with(
130+
'fio', '--rw', 'readwrite', '--bs', '4k', '--direct', 1,
131+
'--ioengine', 'libaio', '--iodepth', '32', '--verify',
132+
'crc32c', '--verify_dump', 1, '--continue_on_error', 'verify',
133+
'--loops', 5, '--runtime', 600, '--time_based', '--name',
134+
'/dev/sdj', '--name', '/dev/hdaa')
135+
136+
@mock.patch.object(hardware, 'list_all_block_devices', autospec=True)
137+
def test_fio_disk_no_fio(self, mock_list, mock_execute):
138+
139+
node = {'driver_info': {}}
140+
mock_execute.side_effect = (['out', 'err'],
141+
processutils.ProcessExecutionError())
142+
143+
burnin.fio_disk(node)
144+
145+
self.assertRaises(errors.CommandExecutionError,
146+
burnin.fio_disk, node)

ironic_python_agent/tests/unit/test_hardware.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ def test_get_clean_steps(self):
157157
'reboot_requested': False,
158158
'abortable': True
159159
},
160+
{
161+
'step': 'burnin_disk',
162+
'priority': 0,
163+
'interface': 'deploy',
164+
'reboot_requested': False,
165+
'abortable': True
166+
},
160167
{
161168
'step': 'burnin_memory',
162169
'priority': 0,
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
features:
3+
- |
4+
Adds a burn-in cleaning step 'burnin_disk' to stress test disks for a
5+
configurable number of iterations or a configurable amount of time with
6+
fio. To use this step, stress-ng needs to be installed on the RAM disk.

0 commit comments

Comments
 (0)