forked from thesofproject/sof
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathperf_cnt.h
More file actions
188 lines (168 loc) · 6.41 KB
/
perf_cnt.h
File metadata and controls
188 lines (168 loc) · 6.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright(c) 2019 Intel Corporation. All rights reserved.
*
* Author: Marcin Maka <marcin.maka@linux.intel.com>
*/
/**
* \file xtos/include/sof/lib/perf_cnt.h
* \brief Simple performance counters
* \author Marcin Maka <marcin.maka@linux.intel.com>
*/
#ifndef __SOF_LIB_PERF_CNT_H__
#define __SOF_LIB_PERF_CNT_H__
#include <rtos/timer.h>
struct perf_cnt_data {
uint32_t plat_ts;
uint32_t cpu_ts;
uint32_t plat_delta_last;
uint32_t plat_delta_peak;
uint32_t cpu_delta_last;
uint32_t cpu_delta_peak;
uint32_t cpu_delta_sum;
uint32_t sample_cnt;
};
#if CONFIG_PERFORMANCE_COUNTERS
#define perf_cnt_trace(ctx, pcd) \
tr_info(ctx, "perf plat last %u peak %u cpu last %u, peak %u", \
(uint32_t)((pcd)->plat_delta_last), \
(uint32_t)((pcd)->plat_delta_peak), \
(uint32_t)((pcd)->cpu_delta_last), \
(uint32_t)((pcd)->cpu_delta_peak))
/** \brief Clears performance counters data. */
#define perf_cnt_clear(pcd) memset((pcd), 0, sizeof(struct perf_cnt_data))
/* NOTE: Zephyr's arch_timing_counter_get() might not be implemented
* for a particular platform. In this case let's fallback to use
* Zephyr's k_cycle_get_64(). This will result in both "platform" and
* "cpu" timestamps to be equal.
*/
#ifdef __ZEPHYR__
#ifdef CONFIG_TIMING_FUNCTIONS
#define perf_cnt_get_cpu_ts arch_timing_counter_get
#else
#define perf_cnt_get_cpu_ts sof_cycle_get_64
#endif /* CONFIG_TIMING_FUNCTIONS */
#else
#define perf_cnt_get_cpu_ts() timer_get_system(cpu_timer_get())
#endif /* __ZEPHYR__ */
/** \brief Initializes timestamps with current timer values. */
#define perf_cnt_init(pcd) do { \
(pcd)->plat_ts = sof_cycle_get_64(); \
(pcd)->cpu_ts = perf_cnt_get_cpu_ts(); \
} while (0)
/* Trace macros that can be used as trace_m argument of the perf_cnt_stamp()
* to trace PCD values if the last arch timer reading exceeds the previous
* peak value.
*
* arg passed to perf_cnt_stamp() is forwarded to the trace_m() macro
* as the second argument.
*/
/** \brief No trace when detecting peak value. */
#define perf_trace_null(pcd, arg)
/** \brief Simple trace, all values are printed, arg should be a tr_ctx address.
*/
#define perf_trace_simple(pcd, arg) perf_cnt_trace(arg, pcd)
/* perf measurement windows size 2^x */
#define PERF_CNT_CHECK_WINDOW_SIZE 10
#define task_perf_avg_info(pcd, task_p, class) \
tr_info(task_p, "perf_cycle task %p, %pU cpu avg %u peak %u",\
class, (class)->uid, \
(uint32_t)((pcd)->cpu_delta_sum), \
(uint32_t)((pcd)->cpu_delta_peak))
#define task_perf_cnt_avg(pcd, trace_m, arg, class) do { \
(pcd)->cpu_delta_sum += (pcd)->cpu_delta_last; \
if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) { \
(pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE; \
trace_m(pcd, arg, class); \
(pcd)->cpu_delta_sum = 0; \
(pcd)->sample_cnt = 0; \
(pcd)->plat_delta_peak = 0; \
(pcd)->cpu_delta_peak = 0; \
} \
} while (0)
/** \brief Accumulates cpu timer delta samples calculated by perf_cnt_stamp().
*
* If current sample count reaches the window size, compute the average and run trace_m.
* \param pcd Performance counters data.
* \param trace_m Trace function trace_m(pcd, arg) or trace macro if a
* more precise line number is desired in the logs.
* \param arg Argument passed to trace_m as arg.
*/
#define perf_cnt_average(pcd, trace_m, arg) do { \
(pcd)->cpu_delta_sum += (pcd)->cpu_delta_last; \
if (++(pcd)->sample_cnt == 1 << PERF_CNT_CHECK_WINDOW_SIZE) {\
(pcd)->cpu_delta_sum >>= PERF_CNT_CHECK_WINDOW_SIZE; \
trace_m(pcd, arg); \
(pcd)->cpu_delta_sum = 0; \
(pcd)->sample_cnt = 0; \
(pcd)->plat_delta_peak = 0; \
(pcd)->cpu_delta_peak = 0; \
} \
} while (0)
/** \brief Reads the timers and computes delta to the previous readings.
*
* If current arch delta exceeds the previous peak value, trace_m is run.
* \param pcd Performance counters data.
* \param trace_m Trace function trace_m(pcd, arg) or trace macro if a
* more precise line number is desired in the logs.
* \param arg Argument passed to trace_m as arg.
*/
#define perf_cnt_stamp(pcd, trace_m, arg) do { \
uint32_t plat_ts = \
(uint32_t)sof_cycle_get_64(); \
uint32_t cpu_ts = \
(uint32_t)perf_cnt_get_cpu_ts(); \
if (plat_ts > (pcd)->plat_ts) \
(pcd)->plat_delta_last = plat_ts - (pcd)->plat_ts; \
else \
(pcd)->plat_delta_last = UINT32_MAX - (pcd)->plat_ts \
+ plat_ts; \
if (cpu_ts > (pcd)->cpu_ts) \
(pcd)->cpu_delta_last = cpu_ts - (pcd)->cpu_ts; \
else \
(pcd)->cpu_delta_last = UINT32_MAX - (pcd)->cpu_ts \
+ cpu_ts;\
if ((pcd)->plat_delta_last > (pcd)->plat_delta_peak) \
(pcd)->plat_delta_peak = (pcd)->plat_delta_last; \
if ((pcd)->cpu_delta_last > (pcd)->cpu_delta_peak) { \
(pcd)->cpu_delta_peak = (pcd)->cpu_delta_last; \
trace_m(pcd, arg); \
} \
} while (0)
/**
* For simple performance measurement and optimization in development stage,
* tic-toc api is provided. Performance data are traced at each tok call,
* to allow fast clocks usage deviation estimation. Example:
*
* \code{.c}
* void foo(struct comp_dev *dev) {
* static struct perf_cnt_data pcd;
*
* perf_tic(&pcd);
* bar();
* perf_toc(&pcd, dev);
* }
* \endcode
*/
/** \brief Save start timestamp in pcd structure
*
* \param pcd Performance counters data.
*/
#define perf_tic(pcd) \
perf_cnt_init(pcd)
/** \brief Save start timestamp in pcd structure
*
* \param pcd Performance counters data.
* \param comp Component used to get corresponding trace context.
*/
#define perf_toc(pcd, comp) do { \
perf_cnt_stamp(pcd, perf_trace_null, NULL); \
perf_trace_simple(pcd, trace_comp_get_tr_ctx(comp)); \
} while (0)
#else
#define perf_cnt_clear(pcd)
#define perf_cnt_init(pcd)
#define perf_cnt_stamp(pcd, trace_m, arg)
#define perf_cnt_average(pcd, trace_m, arg)
#endif
#endif /* __SOF_LIB_PERF_CNT_H__ */