Skip to content

Commit db76c89

Browse files
Jenkinsopenstack-gerrit
authored andcommitted
Merge "NUMA-topology collector"
2 parents 2deef86 + cc9e05d commit db76c89

File tree

5 files changed

+616
-0
lines changed

5 files changed

+616
-0
lines changed

ironic_python_agent/errors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,12 @@ def __init__(self, error_msg):
293293
super(ISCSIError, self).__init__(details)
294294

295295

296+
class IncompatibleNumaFormatError(RESTError):
297+
"""Error raised when unexpected format data in NUMA node."""
298+
299+
message = 'Error in NUMA node data format'
300+
301+
296302
class ISCSICommandError(ISCSIError):
297303
"""Error executing TGT command."""
298304

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
# Copyright 2017 Red Hat, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12+
# implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import os
17+
18+
from oslo_log import log
19+
import pint
20+
21+
from ironic_python_agent import errors
22+
23+
LOG = log.getLogger(__name__)
24+
25+
UNIT_CONVERTER = pint.UnitRegistry(filename=None)
26+
UNIT_CONVERTER.define('kB = []')
27+
UNIT_CONVERTER.define('KB = []')
28+
UNIT_CONVERTER.define('MB = 1024 KB')
29+
UNIT_CONVERTER.define('GB = 1048576 KB')
30+
31+
32+
def get_numa_node_id(numa_node_dir):
33+
"""Provides the NUMA node id from NUMA node directory
34+
35+
:param numa_node_dir: NUMA node directory
36+
:raises: IncompatibleNumaFormatError: when unexpected format data
37+
in NUMA node dir
38+
39+
:return: NUMA node id
40+
"""
41+
try:
42+
return int(os.path.basename(numa_node_dir)[4:])
43+
except (IOError, ValueError, IndexError) as exc:
44+
msg = ('Failed to get NUMA node id for %(node)s: '
45+
'%(error)s' % {'node': numa_node_dir, 'error': exc})
46+
raise errors.IncompatibleNumaFormatError(msg)
47+
48+
49+
def get_nodes_memory_info(numa_node_dirs):
50+
"""Collect the NUMA nodes memory information.
51+
52+
"ram": [{"numa_node": <numa_node_id>, "size_kb": <memory_in_kb>}, ...]
53+
54+
:param numa_node_dirs: A list of NUMA node directories
55+
:raises: IncompatibleNumaFormatError: when unexpected format data
56+
in NUMA node
57+
58+
:return: A list of memory information with NUMA node id
59+
"""
60+
ram = []
61+
for numa_node_dir in numa_node_dirs:
62+
numa_node_memory = {}
63+
numa_node_id = get_numa_node_id(numa_node_dir)
64+
try:
65+
with open(os.path.join(numa_node_dir,
66+
'meminfo')) as meminfo_file:
67+
for line in meminfo_file:
68+
if 'MemTotal' in line:
69+
break
70+
else:
71+
msg = ('Memory information is not available for '
72+
'%(node)s' % {'node': numa_node_dir})
73+
raise errors.IncompatibleNumaFormatError(msg)
74+
except IOError as exc:
75+
msg = ('Failed to get memory information '
76+
'for %(node)s: %(error)s' %
77+
{'node': numa_node_dir, 'error': exc})
78+
raise errors.IncompatibleNumaFormatError(msg)
79+
try:
80+
# To get memory size with unit from memory info line
81+
# Memory info sample line format 'Node 0 MemTotal: 1560000 kB'
82+
value = line.split(":")[1].strip()
83+
memory_kb = int(UNIT_CONVERTER(value).to_base_units())
84+
except (ValueError, IndexError, pint.errors.UndefinedUnitError) as exc:
85+
msg = ('Failed to get memory information for %(node)s: '
86+
'%(error)s' % {'node': numa_node_dir, 'error': exc})
87+
raise errors.IncompatibleNumaFormatError(msg)
88+
numa_node_memory['numa_node'] = numa_node_id
89+
numa_node_memory['size_kb'] = memory_kb
90+
LOG.debug('Found memory available %d KB in NUMA node %d',
91+
memory_kb, numa_node_id)
92+
ram.append(numa_node_memory)
93+
return ram
94+
95+
96+
def get_nodes_cores_info(numa_node_dirs):
97+
"""Collect the NUMA nodes cpu's and thread's information.
98+
99+
"cpus": [
100+
{
101+
"cpu": <cpu_id>, "numa_node": <numa_node_id>,
102+
"thread_siblings": [<list of sibling threads>]
103+
},
104+
...,
105+
]
106+
NUMA nodes path: /sys/devices/system/node/node<node_id>
107+
108+
Thread dirs path: /sys/devices/system/node/node<node_id>/cpu<thread_id>
109+
110+
CPU id file path: /sys/devices/system/node/node<node_id>/cpu<thread_id>/
111+
topology/core_id
112+
113+
:param numa_node_dirs: A list of NUMA node directories
114+
:raises: IncompatibleNumaFormatError: when unexpected format data
115+
in NUMA node
116+
117+
:return: A list of cpu information with NUMA node id and thread siblings
118+
"""
119+
dict_cpus = {}
120+
for numa_node_dir in numa_node_dirs:
121+
numa_node_id = get_numa_node_id(numa_node_dir)
122+
try:
123+
thread_dirs = os.listdir(numa_node_dir)
124+
except OSError as exc:
125+
msg = ('Failed to get list of threads for %(node)s: '
126+
'%(error)s' % {'node': numa_node_dir, 'error': exc})
127+
raise errors.IncompatibleNumaFormatError(msg)
128+
for thread_dir in thread_dirs:
129+
if (not os.path.isdir(os.path.join(numa_node_dir, thread_dir))
130+
or not thread_dir.startswith("cpu")):
131+
continue
132+
try:
133+
thread_id = int(thread_dir[3:])
134+
except (ValueError, IndexError) as exc:
135+
msg = ('Failed to get cores information for '
136+
'%(node)s: %(error)s' %
137+
{'node': numa_node_dir, 'error': exc})
138+
raise errors.IncompatibleNumaFormatError(msg)
139+
try:
140+
with open(os.path.join(numa_node_dir, thread_dir, 'topology',
141+
'core_id')) as core_id_file:
142+
cpu_id = int(core_id_file.read().strip())
143+
except (IOError, ValueError) as exc:
144+
msg = ('Failed to gather cpu_id for thread'
145+
'%(thread)s NUMA node %(node)s: %(error)s' %
146+
{'thread': thread_dir, 'node': numa_node_dir,
147+
'error': exc})
148+
raise errors.IncompatibleNumaFormatError(msg)
149+
# CPU and NUMA node together forms a unique value, as cpu_id is
150+
# specific to a NUMA node
151+
# NUMA node id and cpu id tuple is used for unique key
152+
dict_key = numa_node_id, cpu_id
153+
if dict_key in dict_cpus:
154+
if thread_id not in dict_cpus[dict_key]['thread_siblings']:
155+
dict_cpus[dict_key]['thread_siblings'].append(thread_id)
156+
else:
157+
cpu_item = {}
158+
cpu_item['thread_siblings'] = [thread_id]
159+
cpu_item['cpu'] = cpu_id
160+
cpu_item['numa_node'] = numa_node_id
161+
dict_cpus[dict_key] = cpu_item
162+
LOG.debug('Found a thread sibling %d for CPU %d in NUMA node %d',
163+
thread_id, cpu_id, numa_node_id)
164+
return list(dict_cpus.values())
165+
166+
167+
def get_nodes_nics_info(nic_device_path):
168+
"""Collect the NUMA nodes nics information.
169+
170+
"nics": [
171+
{"name": "<network interface name>", "numa_node": <numa_node_id>},
172+
...,
173+
]
174+
175+
:param nic_device_path: nic device directory path
176+
:raises: IncompatibleNumaFormatError: when unexpected format data
177+
in NUMA node
178+
179+
:return: A list of nics information with NUMA node id
180+
"""
181+
nics = []
182+
if not os.path.isdir(nic_device_path):
183+
msg = ('Failed to get list of NIC\'s, NIC device path '
184+
'does not exist: %(nic_device_path)s' %
185+
{'nic_device_path': nic_device_path})
186+
raise errors.IncompatibleNumaFormatError(msg)
187+
for nic_dir in os.listdir(nic_device_path):
188+
if not os.path.isdir(os.path.join(nic_device_path, nic_dir, 'device')):
189+
continue
190+
try:
191+
with open(os.path.join(nic_device_path, nic_dir, 'device',
192+
'numa_node')) as nicsinfo_file:
193+
numa_node_id = int(nicsinfo_file.read().strip())
194+
except (IOError, ValueError) as exc:
195+
msg = ('Failed to gather NIC\'s for NUMA node %(node)s: '
196+
'%(error)s' % {'node': nic_dir, 'error': exc})
197+
raise errors.IncompatibleNumaFormatError(msg)
198+
numa_node_nics = {}
199+
numa_node_nics['name'] = nic_dir
200+
numa_node_nics['numa_node'] = numa_node_id
201+
LOG.debug('Found a NIC %s in NUMA node %d', nic_dir,
202+
numa_node_id)
203+
nics.append(numa_node_nics)
204+
return nics
205+
206+
207+
def collect_numa_topology_info(data, failures):
208+
"""Collect the NUMA topology information.
209+
210+
{
211+
"numa_topology": {
212+
"ram": [{"numa_node": <numa_node_id>, "size_kb": <memory_in_kb>}, ...],
213+
"cpus": [
214+
{
215+
"cpu": <cpu_id>, "numa_node": <numa_node_id>,
216+
"thread_siblings": [<list of sibling threads>]
217+
},
218+
...,
219+
],
220+
"nics": [
221+
{"name": "<network interface name>", "numa_node": <numa_node_id>},
222+
...,
223+
]
224+
}
225+
}
226+
227+
The data is gathered from /sys/devices/system/node/node<X> and
228+
/sys/class/net/ directories.
229+
230+
:param data: mutable data that we'll send to inspector
231+
:param failures: AccumulatedFailures object
232+
233+
:return: None
234+
"""
235+
numa_node_path = '/sys/devices/system/node/'
236+
nic_device_path = '/sys/class/net/'
237+
numa_info = {}
238+
numa_node_dirs = []
239+
if not os.path.isdir(numa_node_path):
240+
LOG.warning('Failed to get list of NUMA nodes, NUMA node path '
241+
'does not exist: %s', numa_node_path)
242+
return
243+
for numa_node_dir in os.listdir(numa_node_path):
244+
numa_node_dir_path = os.path.join(numa_node_path, numa_node_dir)
245+
if (os.path.isdir(numa_node_dir_path)
246+
and numa_node_dir.startswith("node")):
247+
numa_node_dirs.append(numa_node_dir_path)
248+
try:
249+
numa_info['ram'] = get_nodes_memory_info(numa_node_dirs)
250+
numa_info['cpus'] = get_nodes_cores_info(numa_node_dirs)
251+
numa_info['nics'] = get_nodes_nics_info(nic_device_path)
252+
except errors.IncompatibleNumaFormatError as exc:
253+
LOG.warning('Failed to get some NUMA information (%s)', exc)
254+
return
255+
data['numa_topology'] = numa_info

0 commit comments

Comments
 (0)