1// SPDX-License-Identifier: GPL-2.0
2/*
3 * OS Noise Tracer: computes the OS Noise suffered by a running thread.
4 * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread.
5 *
6 * Based on "hwlat_detector" tracer by:
7 * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
8 * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
9 * With feedback from Clark Williams <williams@redhat.com>
10 *
11 * And also based on the rtsl tracer presented on:
12 * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux
13 * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems
14 * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020.
15 *
16 * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com>
17 */
18
19#include <linux/kthread.h>
20#include <linux/tracefs.h>
21#include <linux/uaccess.h>
22#include <linux/cpumask.h>
23#include <linux/delay.h>
24#include <linux/sched/clock.h>
25#include <uapi/linux/sched/types.h>
26#include <linux/sched.h>
27#include <linux/string.h>
28#include "trace.h"
29
30#ifdef CONFIG_X86_LOCAL_APIC
31#include <asm/trace/irq_vectors.h>
32#undef TRACE_INCLUDE_PATH
33#undef TRACE_INCLUDE_FILE
34#endif /* CONFIG_X86_LOCAL_APIC */
35
36#include <trace/events/irq.h>
37#include <trace/events/sched.h>
38
39#define CREATE_TRACE_POINTS
40#include <trace/events/osnoise.h>
41
42/*
43 * Default values.
44 */
45#define BANNER "osnoise: "
46#define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */
47#define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */
48
49#define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */
50#define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */
51
52/*
53 * osnoise/options entries.
54 */
55enum osnoise_options_index {
56 OSN_DEFAULTS = 0,
57 OSN_WORKLOAD,
58 OSN_PANIC_ON_STOP,
59 OSN_PREEMPT_DISABLE,
60 OSN_IRQ_DISABLE,
61 OSN_MAX
62};
63
64static const char * const osnoise_options_str[OSN_MAX] = {
65 "DEFAULTS",
66 "OSNOISE_WORKLOAD",
67 "PANIC_ON_STOP",
68 "OSNOISE_PREEMPT_DISABLE",
69 "OSNOISE_IRQ_DISABLE" };
70
71#define OSN_DEFAULT_OPTIONS 0x2
72static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS;
73
74/*
75 * trace_array of the enabled osnoise/timerlat instances.
76 */
77struct osnoise_instance {
78 struct list_head list;
79 struct trace_array *tr;
80};
81
82static struct list_head osnoise_instances;
83
84static bool osnoise_has_registered_instances(void)
85{
86 return !!list_first_or_null_rcu(&osnoise_instances,
87 struct osnoise_instance,
88 list);
89}
90
91/*
92 * osnoise_instance_registered - check if a tr is already registered
93 */
94static int osnoise_instance_registered(struct trace_array *tr)
95{
96 struct osnoise_instance *inst;
97 int found = 0;
98
99 rcu_read_lock();
100 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
101 if (inst->tr == tr)
102 found = 1;
103 }
104 rcu_read_unlock();
105
106 return found;
107}
108
109/*
110 * osnoise_register_instance - register a new trace instance
111 *
112 * Register a trace_array *tr in the list of instances running
113 * osnoise/timerlat tracers.
114 */
115static int osnoise_register_instance(struct trace_array *tr)
116{
117 struct osnoise_instance *inst;
118
119 /*
120 * register/unregister serialization is provided by trace's
121 * trace_types_lock.
122 */
123 lockdep_assert_held(&trace_types_lock);
124
125 inst = kmalloc(sizeof(*inst), GFP_KERNEL);
126 if (!inst)
127 return -ENOMEM;
128
129 INIT_LIST_HEAD_RCU(list: &inst->list);
130 inst->tr = tr;
131 list_add_tail_rcu(new: &inst->list, head: &osnoise_instances);
132
133 return 0;
134}
135
136/*
137 * osnoise_unregister_instance - unregister a registered trace instance
138 *
139 * Remove the trace_array *tr from the list of instances running
140 * osnoise/timerlat tracers.
141 */
142static void osnoise_unregister_instance(struct trace_array *tr)
143{
144 struct osnoise_instance *inst;
145 int found = 0;
146
147 /*
148 * register/unregister serialization is provided by trace's
149 * trace_types_lock.
150 */
151 list_for_each_entry_rcu(inst, &osnoise_instances, list,
152 lockdep_is_held(&trace_types_lock)) {
153 if (inst->tr == tr) {
154 list_del_rcu(entry: &inst->list);
155 found = 1;
156 break;
157 }
158 }
159
160 if (!found)
161 return;
162
163 kvfree_rcu_mightsleep(inst);
164}
165
166/*
167 * NMI runtime info.
168 */
169struct osn_nmi {
170 u64 count;
171 u64 delta_start;
172};
173
174/*
175 * IRQ runtime info.
176 */
177struct osn_irq {
178 u64 count;
179 u64 arrival_time;
180 u64 delta_start;
181};
182
183#define IRQ_CONTEXT 0
184#define THREAD_CONTEXT 1
185#define THREAD_URET 2
186/*
187 * sofirq runtime info.
188 */
189struct osn_softirq {
190 u64 count;
191 u64 arrival_time;
192 u64 delta_start;
193};
194
195/*
196 * thread runtime info.
197 */
198struct osn_thread {
199 u64 count;
200 u64 arrival_time;
201 u64 delta_start;
202};
203
204/*
205 * Runtime information: this structure saves the runtime information used by
206 * one sampling thread.
207 */
208struct osnoise_variables {
209 struct task_struct *kthread;
210 bool sampling;
211 pid_t pid;
212 struct osn_nmi nmi;
213 struct osn_irq irq;
214 struct osn_softirq softirq;
215 struct osn_thread thread;
216 local_t int_counter;
217};
218
219/*
220 * Per-cpu runtime information.
221 */
222static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
223
224/*
225 * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
226 */
227static inline struct osnoise_variables *this_cpu_osn_var(void)
228{
229 return this_cpu_ptr(&per_cpu_osnoise_var);
230}
231
232/*
233 * Protect the interface.
234 */
235static struct mutex interface_lock;
236
237#ifdef CONFIG_TIMERLAT_TRACER
238/*
239 * Runtime information for the timer mode.
240 */
241struct timerlat_variables {
242 struct task_struct *kthread;
243 struct hrtimer timer;
244 u64 rel_period;
245 u64 abs_period;
246 bool tracing_thread;
247 u64 count;
248 bool uthread_migrate;
249};
250
251static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
252
253/*
254 * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
255 */
256static inline struct timerlat_variables *this_cpu_tmr_var(void)
257{
258 return this_cpu_ptr(&per_cpu_timerlat_var);
259}
260
261/*
262 * tlat_var_reset - Reset the values of the given timerlat_variables
263 */
264static inline void tlat_var_reset(void)
265{
266 struct timerlat_variables *tlat_var;
267 int cpu;
268
269 /* Synchronize with the timerlat interfaces */
270 mutex_lock(&interface_lock);
271 /*
272 * So far, all the values are initialized as 0, so
273 * zeroing the structure is perfect.
274 */
275 for_each_online_cpu(cpu) {
276 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
277 if (tlat_var->kthread)
278 hrtimer_cancel(timer: &tlat_var->timer);
279 memset(tlat_var, 0, sizeof(*tlat_var));
280 }
281 mutex_unlock(lock: &interface_lock);
282}
283#else /* CONFIG_TIMERLAT_TRACER */
284#define tlat_var_reset() do {} while (0)
285#endif /* CONFIG_TIMERLAT_TRACER */
286
287/*
288 * osn_var_reset - Reset the values of the given osnoise_variables
289 */
290static inline void osn_var_reset(void)
291{
292 struct osnoise_variables *osn_var;
293 int cpu;
294
295 /*
296 * So far, all the values are initialized as 0, so
297 * zeroing the structure is perfect.
298 */
299 for_each_online_cpu(cpu) {
300 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
301 memset(osn_var, 0, sizeof(*osn_var));
302 }
303}
304
305/*
306 * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables
307 */
308static inline void osn_var_reset_all(void)
309{
310 osn_var_reset();
311 tlat_var_reset();
312}
313
314/*
315 * Tells NMIs to call back to the osnoise tracer to record timestamps.
316 */
317bool trace_osnoise_callback_enabled;
318
319/*
320 * Tracer data.
321 */
322static struct osnoise_data {
323 u64 sample_period; /* total sampling period */
324 u64 sample_runtime; /* active sampling portion of period */
325 u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */
326 u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */
327#ifdef CONFIG_TIMERLAT_TRACER
328 u64 timerlat_period; /* timerlat period */
329 u64 print_stack; /* print IRQ stack if total > */
330 int timerlat_tracer; /* timerlat tracer */
331#endif
332 bool tainted; /* info users and developers about a problem */
333} osnoise_data = {
334 .sample_period = DEFAULT_SAMPLE_PERIOD,
335 .sample_runtime = DEFAULT_SAMPLE_RUNTIME,
336 .stop_tracing = 0,
337 .stop_tracing_total = 0,
338#ifdef CONFIG_TIMERLAT_TRACER
339 .print_stack = 0,
340 .timerlat_period = DEFAULT_TIMERLAT_PERIOD,
341 .timerlat_tracer = 0,
342#endif
343};
344
345#ifdef CONFIG_TIMERLAT_TRACER
346static inline bool timerlat_enabled(void)
347{
348 return osnoise_data.timerlat_tracer;
349}
350
351static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
352{
353 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
354 /*
355 * If the timerlat is enabled, but the irq handler did
356 * not run yet enabling timerlat_tracer, do not trace.
357 */
358 if (!tlat_var->tracing_thread) {
359 osn_var->softirq.arrival_time = 0;
360 osn_var->softirq.delta_start = 0;
361 return 0;
362 }
363 return 1;
364}
365
366static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
367{
368 struct timerlat_variables *tlat_var = this_cpu_tmr_var();
369 /*
370 * If the timerlat is enabled, but the irq handler did
371 * not run yet enabling timerlat_tracer, do not trace.
372 */
373 if (!tlat_var->tracing_thread) {
374 osn_var->thread.delta_start = 0;
375 osn_var->thread.arrival_time = 0;
376 return 0;
377 }
378 return 1;
379}
380#else /* CONFIG_TIMERLAT_TRACER */
381static inline bool timerlat_enabled(void)
382{
383 return false;
384}
385
386static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var)
387{
388 return 1;
389}
390static inline int timerlat_thread_exit(struct osnoise_variables *osn_var)
391{
392 return 1;
393}
394#endif
395
396#ifdef CONFIG_PREEMPT_RT
397/*
398 * Print the osnoise header info.
399 */
400static void print_osnoise_headers(struct seq_file *s)
401{
402 if (osnoise_data.tainted)
403 seq_puts(s, "# osnoise is tainted!\n");
404
405 seq_puts(s, "# _-------=> irqs-off\n");
406 seq_puts(s, "# / _------=> need-resched\n");
407 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
408 seq_puts(s, "# || / _----=> hardirq/softirq\n");
409 seq_puts(s, "# ||| / _---=> preempt-depth\n");
410 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
411 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
412
413 seq_puts(s, "# |||||| / ");
414 seq_puts(s, " MAX\n");
415
416 seq_puts(s, "# ||||| / ");
417 seq_puts(s, " SINGLE Interference counters:\n");
418
419 seq_puts(s, "# ||||||| RUNTIME ");
420 seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n");
421
422 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US ");
423 seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
424
425 seq_puts(s, "# | | | ||||||| | | ");
426 seq_puts(s, " | | | | | | | |\n");
427}
428#else /* CONFIG_PREEMPT_RT */
429static void print_osnoise_headers(struct seq_file *s)
430{
431 if (osnoise_data.tainted)
432 seq_puts(m: s, s: "# osnoise is tainted!\n");
433
434 seq_puts(m: s, s: "# _-----=> irqs-off\n");
435 seq_puts(m: s, s: "# / _----=> need-resched\n");
436 seq_puts(m: s, s: "# | / _---=> hardirq/softirq\n");
437 seq_puts(m: s, s: "# || / _--=> preempt-depth\n");
438 seq_puts(m: s, s: "# ||| / _-=> migrate-disable ");
439 seq_puts(m: s, s: " MAX\n");
440 seq_puts(m: s, s: "# |||| / delay ");
441 seq_puts(m: s, s: " SINGLE Interference counters:\n");
442
443 seq_puts(m: s, s: "# ||||| RUNTIME ");
444 seq_puts(m: s, s: " NOISE %% OF CPU NOISE +-----------------------------+\n");
445
446 seq_puts(m: s, s: "# TASK-PID CPU# ||||| TIMESTAMP IN US ");
447 seq_puts(m: s, s: " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n");
448
449 seq_puts(m: s, s: "# | | | ||||| | | ");
450 seq_puts(m: s, s: " | | | | | | | |\n");
451}
452#endif /* CONFIG_PREEMPT_RT */
453
454/*
455 * osnoise_taint - report an osnoise error.
456 */
457#define osnoise_taint(msg) ({ \
458 struct osnoise_instance *inst; \
459 struct trace_buffer *buffer; \
460 \
461 rcu_read_lock(); \
462 list_for_each_entry_rcu(inst, &osnoise_instances, list) { \
463 buffer = inst->tr->array_buffer.buffer; \
464 trace_array_printk_buf(buffer, _THIS_IP_, msg); \
465 } \
466 rcu_read_unlock(); \
467 osnoise_data.tainted = true; \
468})
469
470/*
471 * Record an osnoise_sample into the tracer buffer.
472 */
473static void
474__record_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer)
475{
476 struct ring_buffer_event *event;
477 struct osnoise_entry *entry;
478
479 event = trace_buffer_lock_reserve(buffer, type: TRACE_OSNOISE, len: sizeof(*entry),
480 trace_ctx: tracing_gen_ctx());
481 if (!event)
482 return;
483 entry = ring_buffer_event_data(event);
484 entry->runtime = sample->runtime;
485 entry->noise = sample->noise;
486 entry->max_sample = sample->max_sample;
487 entry->hw_count = sample->hw_count;
488 entry->nmi_count = sample->nmi_count;
489 entry->irq_count = sample->irq_count;
490 entry->softirq_count = sample->softirq_count;
491 entry->thread_count = sample->thread_count;
492
493 trace_buffer_unlock_commit_nostack(buffer, event);
494}
495
496/*
497 * Record an osnoise_sample on all osnoise instances and fire trace event.
498 */
499static void record_osnoise_sample(struct osnoise_sample *sample)
500{
501 struct osnoise_instance *inst;
502 struct trace_buffer *buffer;
503
504 trace_osnoise_sample(s: sample);
505
506 rcu_read_lock();
507 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
508 buffer = inst->tr->array_buffer.buffer;
509 __record_osnoise_sample(sample, buffer);
510 }
511 rcu_read_unlock();
512}
513
514#ifdef CONFIG_TIMERLAT_TRACER
515/*
516 * Print the timerlat header info.
517 */
518#ifdef CONFIG_PREEMPT_RT
519static void print_timerlat_headers(struct seq_file *s)
520{
521 seq_puts(s, "# _-------=> irqs-off\n");
522 seq_puts(s, "# / _------=> need-resched\n");
523 seq_puts(s, "# | / _-----=> need-resched-lazy\n");
524 seq_puts(s, "# || / _----=> hardirq/softirq\n");
525 seq_puts(s, "# ||| / _---=> preempt-depth\n");
526 seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n");
527 seq_puts(s, "# ||||| / _-=> migrate-disable\n");
528 seq_puts(s, "# |||||| /\n");
529 seq_puts(s, "# ||||||| ACTIVATION\n");
530 seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID ");
531 seq_puts(s, " CONTEXT LATENCY\n");
532 seq_puts(s, "# | | | ||||||| | | ");
533 seq_puts(s, " | |\n");
534}
535#else /* CONFIG_PREEMPT_RT */
536static void print_timerlat_headers(struct seq_file *s)
537{
538 seq_puts(m: s, s: "# _-----=> irqs-off\n");
539 seq_puts(m: s, s: "# / _----=> need-resched\n");
540 seq_puts(m: s, s: "# | / _---=> hardirq/softirq\n");
541 seq_puts(m: s, s: "# || / _--=> preempt-depth\n");
542 seq_puts(m: s, s: "# ||| / _-=> migrate-disable\n");
543 seq_puts(m: s, s: "# |||| / delay\n");
544 seq_puts(m: s, s: "# ||||| ACTIVATION\n");
545 seq_puts(m: s, s: "# TASK-PID CPU# ||||| TIMESTAMP ID ");
546 seq_puts(m: s, s: " CONTEXT LATENCY\n");
547 seq_puts(m: s, s: "# | | | ||||| | | ");
548 seq_puts(m: s, s: " | |\n");
549}
550#endif /* CONFIG_PREEMPT_RT */
551
552static void
553__record_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer)
554{
555 struct ring_buffer_event *event;
556 struct timerlat_entry *entry;
557
558 event = trace_buffer_lock_reserve(buffer, type: TRACE_TIMERLAT, len: sizeof(*entry),
559 trace_ctx: tracing_gen_ctx());
560 if (!event)
561 return;
562 entry = ring_buffer_event_data(event);
563 entry->seqnum = sample->seqnum;
564 entry->context = sample->context;
565 entry->timer_latency = sample->timer_latency;
566
567 trace_buffer_unlock_commit_nostack(buffer, event);
568}
569
570/*
571 * Record an timerlat_sample into the tracer buffer.
572 */
573static void record_timerlat_sample(struct timerlat_sample *sample)
574{
575 struct osnoise_instance *inst;
576 struct trace_buffer *buffer;
577
578 trace_timerlat_sample(s: sample);
579
580 rcu_read_lock();
581 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
582 buffer = inst->tr->array_buffer.buffer;
583 __record_timerlat_sample(sample, buffer);
584 }
585 rcu_read_unlock();
586}
587
588#ifdef CONFIG_STACKTRACE
589
590#define MAX_CALLS 256
591
592/*
593 * Stack trace will take place only at IRQ level, so, no need
594 * to control nesting here.
595 */
596struct trace_stack {
597 int stack_size;
598 int nr_entries;
599 unsigned long calls[MAX_CALLS];
600};
601
602static DEFINE_PER_CPU(struct trace_stack, trace_stack);
603
604/*
605 * timerlat_save_stack - save a stack trace without printing
606 *
607 * Save the current stack trace without printing. The
608 * stack will be printed later, after the end of the measurement.
609 */
610static void timerlat_save_stack(int skip)
611{
612 unsigned int size, nr_entries;
613 struct trace_stack *fstack;
614
615 fstack = this_cpu_ptr(&trace_stack);
616
617 size = ARRAY_SIZE(fstack->calls);
618
619 nr_entries = stack_trace_save(store: fstack->calls, size, skipnr: skip);
620
621 fstack->stack_size = nr_entries * sizeof(unsigned long);
622 fstack->nr_entries = nr_entries;
623
624 return;
625
626}
627
628static void
629__timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size)
630{
631 struct ring_buffer_event *event;
632 struct stack_entry *entry;
633
634 event = trace_buffer_lock_reserve(buffer, type: TRACE_STACK, len: sizeof(*entry) + size,
635 trace_ctx: tracing_gen_ctx());
636 if (!event)
637 return;
638
639 entry = ring_buffer_event_data(event);
640
641 entry->size = fstack->nr_entries;
642 memcpy(&entry->caller, fstack->calls, size);
643
644 trace_buffer_unlock_commit_nostack(buffer, event);
645}
646
647/*
648 * timerlat_dump_stack - dump a stack trace previously saved
649 */
650static void timerlat_dump_stack(u64 latency)
651{
652 struct osnoise_instance *inst;
653 struct trace_buffer *buffer;
654 struct trace_stack *fstack;
655 unsigned int size;
656
657 /*
658 * trace only if latency > print_stack config, if enabled.
659 */
660 if (!osnoise_data.print_stack || osnoise_data.print_stack > latency)
661 return;
662
663 preempt_disable_notrace();
664 fstack = this_cpu_ptr(&trace_stack);
665 size = fstack->stack_size;
666
667 rcu_read_lock();
668 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
669 buffer = inst->tr->array_buffer.buffer;
670 __timerlat_dump_stack(buffer, fstack, size);
671
672 }
673 rcu_read_unlock();
674 preempt_enable_notrace();
675}
676#else /* CONFIG_STACKTRACE */
677#define timerlat_dump_stack(u64 latency) do {} while (0)
678#define timerlat_save_stack(a) do {} while (0)
679#endif /* CONFIG_STACKTRACE */
680#endif /* CONFIG_TIMERLAT_TRACER */
681
682/*
683 * Macros to encapsulate the time capturing infrastructure.
684 */
685#define time_get() trace_clock_local()
686#define time_to_us(x) div_u64(x, 1000)
687#define time_sub(a, b) ((a) - (b))
688
689/*
690 * cond_move_irq_delta_start - Forward the delta_start of a running IRQ
691 *
692 * If an IRQ is preempted by an NMI, its delta_start is pushed forward
693 * to discount the NMI interference.
694 *
695 * See get_int_safe_duration().
696 */
697static inline void
698cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration)
699{
700 if (osn_var->irq.delta_start)
701 osn_var->irq.delta_start += duration;
702}
703
704#ifndef CONFIG_PREEMPT_RT
705/*
706 * cond_move_softirq_delta_start - Forward the delta_start of a running softirq.
707 *
708 * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed
709 * forward to discount the interference.
710 *
711 * See get_int_safe_duration().
712 */
713static inline void
714cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration)
715{
716 if (osn_var->softirq.delta_start)
717 osn_var->softirq.delta_start += duration;
718}
719#else /* CONFIG_PREEMPT_RT */
720#define cond_move_softirq_delta_start(osn_var, duration) do {} while (0)
721#endif
722
723/*
724 * cond_move_thread_delta_start - Forward the delta_start of a running thread
725 *
726 * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start
727 * is pushed forward to discount the interference.
728 *
729 * See get_int_safe_duration().
730 */
731static inline void
732cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration)
733{
734 if (osn_var->thread.delta_start)
735 osn_var->thread.delta_start += duration;
736}
737
738/*
739 * get_int_safe_duration - Get the duration of a window
740 *
741 * The irq, softirq and thread variables need to have its duration without
742 * the interference from higher priority interrupts. Instead of keeping a
743 * variable to discount the interrupt interference from these variables, the
744 * starting time of these variables are pushed forward with the interrupt's
745 * duration. In this way, a single variable is used to:
746 *
747 * - Know if a given window is being measured.
748 * - Account its duration.
749 * - Discount the interference.
750 *
751 * To avoid getting inconsistent values, e.g.,:
752 *
753 * now = time_get()
754 * ---> interrupt!
755 * delta_start -= int duration;
756 * <---
757 * duration = now - delta_start;
758 *
759 * result: negative duration if the variable duration before the
760 * interrupt was smaller than the interrupt execution.
761 *
762 * A counter of interrupts is used. If the counter increased, try
763 * to capture an interference safe duration.
764 */
765static inline s64
766get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start)
767{
768 u64 int_counter, now;
769 s64 duration;
770
771 do {
772 int_counter = local_read(&osn_var->int_counter);
773 /* synchronize with interrupts */
774 barrier();
775
776 now = time_get();
777 duration = (now - *delta_start);
778
779 /* synchronize with interrupts */
780 barrier();
781 } while (int_counter != local_read(&osn_var->int_counter));
782
783 /*
784 * This is an evidence of race conditions that cause
785 * a value to be "discounted" too much.
786 */
787 if (duration < 0)
788 osnoise_taint("Negative duration!\n");
789
790 *delta_start = 0;
791
792 return duration;
793}
794
795/*
796 *
797 * set_int_safe_time - Save the current time on *time, aware of interference
798 *
799 * Get the time, taking into consideration a possible interference from
800 * higher priority interrupts.
801 *
802 * See get_int_safe_duration() for an explanation.
803 */
804static u64
805set_int_safe_time(struct osnoise_variables *osn_var, u64 *time)
806{
807 u64 int_counter;
808
809 do {
810 int_counter = local_read(&osn_var->int_counter);
811 /* synchronize with interrupts */
812 barrier();
813
814 *time = time_get();
815
816 /* synchronize with interrupts */
817 barrier();
818 } while (int_counter != local_read(&osn_var->int_counter));
819
820 return int_counter;
821}
822
823#ifdef CONFIG_TIMERLAT_TRACER
824/*
825 * copy_int_safe_time - Copy *src into *desc aware of interference
826 */
827static u64
828copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src)
829{
830 u64 int_counter;
831
832 do {
833 int_counter = local_read(&osn_var->int_counter);
834 /* synchronize with interrupts */
835 barrier();
836
837 *dst = *src;
838
839 /* synchronize with interrupts */
840 barrier();
841 } while (int_counter != local_read(&osn_var->int_counter));
842
843 return int_counter;
844}
845#endif /* CONFIG_TIMERLAT_TRACER */
846
847/*
848 * trace_osnoise_callback - NMI entry/exit callback
849 *
850 * This function is called at the entry and exit NMI code. The bool enter
851 * distinguishes between either case. This function is used to note a NMI
852 * occurrence, compute the noise caused by the NMI, and to remove the noise
853 * it is potentially causing on other interference variables.
854 */
855void trace_osnoise_callback(bool enter)
856{
857 struct osnoise_variables *osn_var = this_cpu_osn_var();
858 u64 duration;
859
860 if (!osn_var->sampling)
861 return;
862
863 /*
864 * Currently trace_clock_local() calls sched_clock() and the
865 * generic version is not NMI safe.
866 */
867 if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
868 if (enter) {
869 osn_var->nmi.delta_start = time_get();
870 local_inc(l: &osn_var->int_counter);
871 } else {
872 duration = time_get() - osn_var->nmi.delta_start;
873
874 trace_nmi_noise(start: osn_var->nmi.delta_start, duration);
875
876 cond_move_irq_delta_start(osn_var, duration);
877 cond_move_softirq_delta_start(osn_var, duration);
878 cond_move_thread_delta_start(osn_var, duration);
879 }
880 }
881
882 if (enter)
883 osn_var->nmi.count++;
884}
885
886/*
887 * osnoise_trace_irq_entry - Note the starting of an IRQ
888 *
889 * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs,
890 * it is safe to use a single variable (ons_var->irq) to save the statistics.
891 * The arrival_time is used to report... the arrival time. The delta_start
892 * is used to compute the duration at the IRQ exit handler. See
893 * cond_move_irq_delta_start().
894 */
895void osnoise_trace_irq_entry(int id)
896{
897 struct osnoise_variables *osn_var = this_cpu_osn_var();
898
899 if (!osn_var->sampling)
900 return;
901 /*
902 * This value will be used in the report, but not to compute
903 * the execution time, so it is safe to get it unsafe.
904 */
905 osn_var->irq.arrival_time = time_get();
906 set_int_safe_time(osn_var, time: &osn_var->irq.delta_start);
907 osn_var->irq.count++;
908
909 local_inc(l: &osn_var->int_counter);
910}
911
912/*
913 * osnoise_irq_exit - Note the end of an IRQ, sava data and trace
914 *
915 * Computes the duration of the IRQ noise, and trace it. Also discounts the
916 * interference from other sources of noise could be currently being accounted.
917 */
918void osnoise_trace_irq_exit(int id, const char *desc)
919{
920 struct osnoise_variables *osn_var = this_cpu_osn_var();
921 s64 duration;
922
923 if (!osn_var->sampling)
924 return;
925
926 duration = get_int_safe_duration(osn_var, delta_start: &osn_var->irq.delta_start);
927 trace_irq_noise(vector: id, desc, start: osn_var->irq.arrival_time, duration);
928 osn_var->irq.arrival_time = 0;
929 cond_move_softirq_delta_start(osn_var, duration);
930 cond_move_thread_delta_start(osn_var, duration);
931}
932
933/*
934 * trace_irqentry_callback - Callback to the irq:irq_entry traceevent
935 *
936 * Used to note the starting of an IRQ occurece.
937 */
938static void trace_irqentry_callback(void *data, int irq,
939 struct irqaction *action)
940{
941 osnoise_trace_irq_entry(id: irq);
942}
943
944/*
945 * trace_irqexit_callback - Callback to the irq:irq_exit traceevent
946 *
947 * Used to note the end of an IRQ occurece.
948 */
949static void trace_irqexit_callback(void *data, int irq,
950 struct irqaction *action, int ret)
951{
952 osnoise_trace_irq_exit(id: irq, desc: action->name);
953}
954
955/*
956 * arch specific register function.
957 */
958int __weak osnoise_arch_register(void)
959{
960 return 0;
961}
962
963/*
964 * arch specific unregister function.
965 */
966void __weak osnoise_arch_unregister(void)
967{
968 return;
969}
970
971/*
972 * hook_irq_events - Hook IRQ handling events
973 *
974 * This function hooks the IRQ related callbacks to the respective trace
975 * events.
976 */
977static int hook_irq_events(void)
978{
979 int ret;
980
981 ret = register_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL);
982 if (ret)
983 goto out_err;
984
985 ret = register_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL);
986 if (ret)
987 goto out_unregister_entry;
988
989 ret = osnoise_arch_register();
990 if (ret)
991 goto out_irq_exit;
992
993 return 0;
994
995out_irq_exit:
996 unregister_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL);
997out_unregister_entry:
998 unregister_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL);
999out_err:
1000 return -EINVAL;
1001}
1002
1003/*
1004 * unhook_irq_events - Unhook IRQ handling events
1005 *
1006 * This function unhooks the IRQ related callbacks to the respective trace
1007 * events.
1008 */
1009static void unhook_irq_events(void)
1010{
1011 osnoise_arch_unregister();
1012 unregister_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL);
1013 unregister_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL);
1014}
1015
1016#ifndef CONFIG_PREEMPT_RT
1017/*
1018 * trace_softirq_entry_callback - Note the starting of a softirq
1019 *
1020 * Save the starting time of a softirq. As softirqs are non-preemptive to
1021 * other softirqs, it is safe to use a single variable (ons_var->softirq)
1022 * to save the statistics. The arrival_time is used to report... the
1023 * arrival time. The delta_start is used to compute the duration at the
1024 * softirq exit handler. See cond_move_softirq_delta_start().
1025 */
1026static void trace_softirq_entry_callback(void *data, unsigned int vec_nr)
1027{
1028 struct osnoise_variables *osn_var = this_cpu_osn_var();
1029
1030 if (!osn_var->sampling)
1031 return;
1032 /*
1033 * This value will be used in the report, but not to compute
1034 * the execution time, so it is safe to get it unsafe.
1035 */
1036 osn_var->softirq.arrival_time = time_get();
1037 set_int_safe_time(osn_var, time: &osn_var->softirq.delta_start);
1038 osn_var->softirq.count++;
1039
1040 local_inc(l: &osn_var->int_counter);
1041}
1042
1043/*
1044 * trace_softirq_exit_callback - Note the end of an softirq
1045 *
1046 * Computes the duration of the softirq noise, and trace it. Also discounts the
1047 * interference from other sources of noise could be currently being accounted.
1048 */
1049static void trace_softirq_exit_callback(void *data, unsigned int vec_nr)
1050{
1051 struct osnoise_variables *osn_var = this_cpu_osn_var();
1052 s64 duration;
1053
1054 if (!osn_var->sampling)
1055 return;
1056
1057 if (unlikely(timerlat_enabled()))
1058 if (!timerlat_softirq_exit(osn_var))
1059 return;
1060
1061 duration = get_int_safe_duration(osn_var, delta_start: &osn_var->softirq.delta_start);
1062 trace_softirq_noise(vector: vec_nr, start: osn_var->softirq.arrival_time, duration);
1063 cond_move_thread_delta_start(osn_var, duration);
1064 osn_var->softirq.arrival_time = 0;
1065}
1066
1067/*
1068 * hook_softirq_events - Hook softirq handling events
1069 *
1070 * This function hooks the softirq related callbacks to the respective trace
1071 * events.
1072 */
1073static int hook_softirq_events(void)
1074{
1075 int ret;
1076
1077 ret = register_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL);
1078 if (ret)
1079 goto out_err;
1080
1081 ret = register_trace_softirq_exit(probe: trace_softirq_exit_callback, NULL);
1082 if (ret)
1083 goto out_unreg_entry;
1084
1085 return 0;
1086
1087out_unreg_entry:
1088 unregister_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL);
1089out_err:
1090 return -EINVAL;
1091}
1092
1093/*
1094 * unhook_softirq_events - Unhook softirq handling events
1095 *
1096 * This function hooks the softirq related callbacks to the respective trace
1097 * events.
1098 */
1099static void unhook_softirq_events(void)
1100{
1101 unregister_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL);
1102 unregister_trace_softirq_exit(probe: trace_softirq_exit_callback, NULL);
1103}
1104#else /* CONFIG_PREEMPT_RT */
1105/*
1106 * softirq are threads on the PREEMPT_RT mode.
1107 */
1108static int hook_softirq_events(void)
1109{
1110 return 0;
1111}
1112static void unhook_softirq_events(void)
1113{
1114}
1115#endif
1116
1117/*
1118 * thread_entry - Record the starting of a thread noise window
1119 *
1120 * It saves the context switch time for a noisy thread, and increments
1121 * the interference counters.
1122 */
1123static void
1124thread_entry(struct osnoise_variables *osn_var, struct task_struct *t)
1125{
1126 if (!osn_var->sampling)
1127 return;
1128 /*
1129 * The arrival time will be used in the report, but not to compute
1130 * the execution time, so it is safe to get it unsafe.
1131 */
1132 osn_var->thread.arrival_time = time_get();
1133
1134 set_int_safe_time(osn_var, time: &osn_var->thread.delta_start);
1135
1136 osn_var->thread.count++;
1137 local_inc(l: &osn_var->int_counter);
1138}
1139
1140/*
1141 * thread_exit - Report the end of a thread noise window
1142 *
1143 * It computes the total noise from a thread, tracing if needed.
1144 */
1145static void
1146thread_exit(struct osnoise_variables *osn_var, struct task_struct *t)
1147{
1148 s64 duration;
1149
1150 if (!osn_var->sampling)
1151 return;
1152
1153 if (unlikely(timerlat_enabled()))
1154 if (!timerlat_thread_exit(osn_var))
1155 return;
1156
1157 duration = get_int_safe_duration(osn_var, delta_start: &osn_var->thread.delta_start);
1158
1159 trace_thread_noise(t, start: osn_var->thread.arrival_time, duration);
1160
1161 osn_var->thread.arrival_time = 0;
1162}
1163
1164#ifdef CONFIG_TIMERLAT_TRACER
1165/*
1166 * osnoise_stop_exception - Stop tracing and the tracer.
1167 */
1168static __always_inline void osnoise_stop_exception(char *msg, int cpu)
1169{
1170 struct osnoise_instance *inst;
1171 struct trace_array *tr;
1172
1173 rcu_read_lock();
1174 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1175 tr = inst->tr;
1176 trace_array_printk_buf(buffer: tr->array_buffer.buffer, _THIS_IP_,
1177 fmt: "stop tracing hit on cpu %d due to exception: %s\n",
1178 smp_processor_id(),
1179 msg);
1180
1181 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1182 panic(fmt: "tracer hit on cpu %d due to exception: %s\n",
1183 smp_processor_id(),
1184 msg);
1185
1186 tracer_tracing_off(tr);
1187 }
1188 rcu_read_unlock();
1189}
1190
1191/*
1192 * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler
1193 *
1194 * his function is hooked to the sched:sched_migrate_task trace event, and monitors
1195 * timerlat user-space thread migration.
1196 */
1197static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu)
1198{
1199 struct osnoise_variables *osn_var;
1200 long cpu = task_cpu(p);
1201
1202 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
1203 if (osn_var->pid == p->pid && dest_cpu != cpu) {
1204 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
1205 osnoise_taint("timerlat user-thread migrated\n");
1206 osnoise_stop_exception(msg: "timerlat user-thread migrated", cpu);
1207 }
1208}
1209
1210static bool monitor_enabled;
1211
1212static int register_migration_monitor(void)
1213{
1214 int ret = 0;
1215
1216 /*
1217 * Timerlat thread migration check is only required when running timerlat in user-space.
1218 * Thus, enable callback only if timerlat is set with no workload.
1219 */
1220 if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) {
1221 if (WARN_ON_ONCE(monitor_enabled))
1222 return 0;
1223
1224 ret = register_trace_sched_migrate_task(probe: trace_sched_migrate_callback, NULL);
1225 if (!ret)
1226 monitor_enabled = true;
1227 }
1228
1229 return ret;
1230}
1231
1232static void unregister_migration_monitor(void)
1233{
1234 if (!monitor_enabled)
1235 return;
1236
1237 unregister_trace_sched_migrate_task(probe: trace_sched_migrate_callback, NULL);
1238 monitor_enabled = false;
1239}
1240#else
1241static int register_migration_monitor(void)
1242{
1243 return 0;
1244}
1245static void unregister_migration_monitor(void) {}
1246#endif
1247/*
1248 * trace_sched_switch - sched:sched_switch trace event handler
1249 *
1250 * This function is hooked to the sched:sched_switch trace event, and it is
1251 * used to record the beginning and to report the end of a thread noise window.
1252 */
1253static void
1254trace_sched_switch_callback(void *data, bool preempt,
1255 struct task_struct *p,
1256 struct task_struct *n,
1257 unsigned int prev_state)
1258{
1259 struct osnoise_variables *osn_var = this_cpu_osn_var();
1260 int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
1261
1262 if ((p->pid != osn_var->pid) || !workload)
1263 thread_exit(osn_var, t: p);
1264
1265 if ((n->pid != osn_var->pid) || !workload)
1266 thread_entry(osn_var, t: n);
1267}
1268
1269/*
1270 * hook_thread_events - Hook the instrumentation for thread noise
1271 *
1272 * Hook the osnoise tracer callbacks to handle the noise from other
1273 * threads on the necessary kernel events.
1274 */
1275static int hook_thread_events(void)
1276{
1277 int ret;
1278
1279 ret = register_trace_sched_switch(probe: trace_sched_switch_callback, NULL);
1280 if (ret)
1281 return -EINVAL;
1282
1283 ret = register_migration_monitor();
1284 if (ret)
1285 goto out_unreg;
1286
1287 return 0;
1288
1289out_unreg:
1290 unregister_trace_sched_switch(probe: trace_sched_switch_callback, NULL);
1291 return -EINVAL;
1292}
1293
1294/*
1295 * unhook_thread_events - unhook the instrumentation for thread noise
1296 *
1297 * Unook the osnoise tracer callbacks to handle the noise from other
1298 * threads on the necessary kernel events.
1299 */
1300static void unhook_thread_events(void)
1301{
1302 unregister_trace_sched_switch(probe: trace_sched_switch_callback, NULL);
1303 unregister_migration_monitor();
1304}
1305
1306/*
1307 * save_osn_sample_stats - Save the osnoise_sample statistics
1308 *
1309 * Save the osnoise_sample statistics before the sampling phase. These
1310 * values will be used later to compute the diff betwneen the statistics
1311 * before and after the osnoise sampling.
1312 */
1313static void
1314save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1315{
1316 s->nmi_count = osn_var->nmi.count;
1317 s->irq_count = osn_var->irq.count;
1318 s->softirq_count = osn_var->softirq.count;
1319 s->thread_count = osn_var->thread.count;
1320}
1321
1322/*
1323 * diff_osn_sample_stats - Compute the osnoise_sample statistics
1324 *
1325 * After a sample period, compute the difference on the osnoise_sample
1326 * statistics. The struct osnoise_sample *s contains the statistics saved via
1327 * save_osn_sample_stats() before the osnoise sampling.
1328 */
1329static void
1330diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s)
1331{
1332 s->nmi_count = osn_var->nmi.count - s->nmi_count;
1333 s->irq_count = osn_var->irq.count - s->irq_count;
1334 s->softirq_count = osn_var->softirq.count - s->softirq_count;
1335 s->thread_count = osn_var->thread.count - s->thread_count;
1336}
1337
1338/*
1339 * osnoise_stop_tracing - Stop tracing and the tracer.
1340 */
1341static __always_inline void osnoise_stop_tracing(void)
1342{
1343 struct osnoise_instance *inst;
1344 struct trace_array *tr;
1345
1346 rcu_read_lock();
1347 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1348 tr = inst->tr;
1349 trace_array_printk_buf(buffer: tr->array_buffer.buffer, _THIS_IP_,
1350 fmt: "stop tracing hit on cpu %d\n", smp_processor_id());
1351
1352 if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
1353 panic(fmt: "tracer hit stop condition on CPU %d\n", smp_processor_id());
1354
1355 tracer_tracing_off(tr);
1356 }
1357 rcu_read_unlock();
1358}
1359
1360/*
1361 * osnoise_has_tracing_on - Check if there is at least one instance on
1362 */
1363static __always_inline int osnoise_has_tracing_on(void)
1364{
1365 struct osnoise_instance *inst;
1366 int trace_is_on = 0;
1367
1368 rcu_read_lock();
1369 list_for_each_entry_rcu(inst, &osnoise_instances, list)
1370 trace_is_on += tracer_tracing_is_on(tr: inst->tr);
1371 rcu_read_unlock();
1372
1373 return trace_is_on;
1374}
1375
1376/*
1377 * notify_new_max_latency - Notify a new max latency via fsnotify interface.
1378 */
1379static void notify_new_max_latency(u64 latency)
1380{
1381 struct osnoise_instance *inst;
1382 struct trace_array *tr;
1383
1384 rcu_read_lock();
1385 list_for_each_entry_rcu(inst, &osnoise_instances, list) {
1386 tr = inst->tr;
1387 if (tracer_tracing_is_on(tr) && tr->max_latency < latency) {
1388 tr->max_latency = latency;
1389 latency_fsnotify(tr);
1390 }
1391 }
1392 rcu_read_unlock();
1393}
1394
1395/*
1396 * run_osnoise - Sample the time and look for osnoise
1397 *
1398 * Used to capture the time, looking for potential osnoise latency repeatedly.
1399 * Different from hwlat_detector, it is called with preemption and interrupts
1400 * enabled. This allows irqs, softirqs and threads to run, interfering on the
1401 * osnoise sampling thread, as they would do with a regular thread.
1402 */
1403static int run_osnoise(void)
1404{
1405 bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
1406 struct osnoise_variables *osn_var = this_cpu_osn_var();
1407 u64 start, sample, last_sample;
1408 u64 last_int_count, int_count;
1409 s64 noise = 0, max_noise = 0;
1410 s64 total, last_total = 0;
1411 struct osnoise_sample s;
1412 bool disable_preemption;
1413 unsigned int threshold;
1414 u64 runtime, stop_in;
1415 u64 sum_noise = 0;
1416 int hw_count = 0;
1417 int ret = -1;
1418
1419 /*
1420 * Disabling preemption is only required if IRQs are enabled,
1421 * and the options is set on.
1422 */
1423 disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
1424
1425 /*
1426 * Considers the current thread as the workload.
1427 */
1428 osn_var->pid = current->pid;
1429
1430 /*
1431 * Save the current stats for the diff
1432 */
1433 save_osn_sample_stats(osn_var, s: &s);
1434
1435 /*
1436 * if threshold is 0, use the default value of 1 us.
1437 */
1438 threshold = tracing_thresh ? : 1000;
1439
1440 /*
1441 * Apply PREEMPT and IRQ disabled options.
1442 */
1443 if (disable_irq)
1444 local_irq_disable();
1445
1446 if (disable_preemption)
1447 preempt_disable();
1448
1449 /*
1450 * Make sure NMIs see sampling first
1451 */
1452 osn_var->sampling = true;
1453 barrier();
1454
1455 /*
1456 * Transform the *_us config to nanoseconds to avoid the
1457 * division on the main loop.
1458 */
1459 runtime = osnoise_data.sample_runtime * NSEC_PER_USEC;
1460 stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC;
1461
1462 /*
1463 * Start timestamp
1464 */
1465 start = time_get();
1466
1467 /*
1468 * "previous" loop.
1469 */
1470 last_int_count = set_int_safe_time(osn_var, time: &last_sample);
1471
1472 do {
1473 /*
1474 * Get sample!
1475 */
1476 int_count = set_int_safe_time(osn_var, time: &sample);
1477
1478 noise = time_sub(sample, last_sample);
1479
1480 /*
1481 * This shouldn't happen.
1482 */
1483 if (noise < 0) {
1484 osnoise_taint("negative noise!");
1485 goto out;
1486 }
1487
1488 /*
1489 * Sample runtime.
1490 */
1491 total = time_sub(sample, start);
1492
1493 /*
1494 * Check for possible overflows.
1495 */
1496 if (total < last_total) {
1497 osnoise_taint("total overflow!");
1498 break;
1499 }
1500
1501 last_total = total;
1502
1503 if (noise >= threshold) {
1504 int interference = int_count - last_int_count;
1505
1506 if (noise > max_noise)
1507 max_noise = noise;
1508
1509 if (!interference)
1510 hw_count++;
1511
1512 sum_noise += noise;
1513
1514 trace_sample_threshold(start: last_sample, duration: noise, interference);
1515
1516 if (osnoise_data.stop_tracing)
1517 if (noise > stop_in)
1518 osnoise_stop_tracing();
1519 }
1520
1521 /*
1522 * In some cases, notably when running on a nohz_full CPU with
1523 * a stopped tick PREEMPT_RCU or PREEMPT_LAZY have no way to
1524 * account for QSs. This will eventually cause unwarranted
1525 * noise as RCU forces preemption as the means of ending the
1526 * current grace period. We avoid this by calling
1527 * rcu_momentary_eqs(), which performs a zero duration EQS
1528 * allowing RCU to end the current grace period. This call
1529 * shouldn't be wrapped inside an RCU critical section.
1530 *
1531 * Normally QSs for other cases are handled through cond_resched().
1532 * For simplicity, however, we call rcu_momentary_eqs() for all
1533 * configurations here.
1534 */
1535 if (!disable_irq)
1536 local_irq_disable();
1537
1538 rcu_momentary_eqs();
1539
1540 if (!disable_irq)
1541 local_irq_enable();
1542
1543 /*
1544 * For the non-preemptive kernel config: let threads runs, if
1545 * they so wish, unless set not do to so.
1546 */
1547 if (!disable_irq && !disable_preemption)
1548 cond_resched();
1549
1550 last_sample = sample;
1551 last_int_count = int_count;
1552
1553 } while (total < runtime && !kthread_should_stop());
1554
1555 /*
1556 * Finish the above in the view for interrupts.
1557 */
1558 barrier();
1559
1560 osn_var->sampling = false;
1561
1562 /*
1563 * Make sure sampling data is no longer updated.
1564 */
1565 barrier();
1566
1567 /*
1568 * Return to the preemptive state.
1569 */
1570 if (disable_preemption)
1571 preempt_enable();
1572
1573 if (disable_irq)
1574 local_irq_enable();
1575
1576 /*
1577 * Save noise info.
1578 */
1579 s.noise = time_to_us(sum_noise);
1580 s.runtime = time_to_us(total);
1581 s.max_sample = time_to_us(max_noise);
1582 s.hw_count = hw_count;
1583
1584 /* Save interference stats info */
1585 diff_osn_sample_stats(osn_var, s: &s);
1586
1587 record_osnoise_sample(sample: &s);
1588
1589 notify_new_max_latency(latency: max_noise);
1590
1591 if (osnoise_data.stop_tracing_total)
1592 if (s.noise > osnoise_data.stop_tracing_total)
1593 osnoise_stop_tracing();
1594
1595 return 0;
1596out:
1597 return ret;
1598}
1599
1600static struct cpumask osnoise_cpumask;
1601static struct cpumask save_cpumask;
1602static struct cpumask kthread_cpumask;
1603
1604/*
1605 * osnoise_sleep - sleep until the next period
1606 */
1607static void osnoise_sleep(bool skip_period)
1608{
1609 u64 interval;
1610 ktime_t wake_time;
1611
1612 mutex_lock(&interface_lock);
1613 if (skip_period)
1614 interval = osnoise_data.sample_period;
1615 else
1616 interval = osnoise_data.sample_period - osnoise_data.sample_runtime;
1617 mutex_unlock(lock: &interface_lock);
1618
1619 /*
1620 * differently from hwlat_detector, the osnoise tracer can run
1621 * without a pause because preemption is on.
1622 */
1623 if (!interval) {
1624 /* Let synchronize_rcu_tasks() make progress */
1625 cond_resched_tasks_rcu_qs();
1626 return;
1627 }
1628
1629 wake_time = ktime_add_us(kt: ktime_get(), usec: interval);
1630 __set_current_state(TASK_INTERRUPTIBLE);
1631
1632 while (schedule_hrtimeout(expires: &wake_time, mode: HRTIMER_MODE_ABS)) {
1633 if (kthread_should_stop())
1634 break;
1635 }
1636}
1637
1638/*
1639 * osnoise_migration_pending - checks if the task needs to migrate
1640 *
1641 * osnoise/timerlat threads are per-cpu. If there is a pending request to
1642 * migrate the thread away from the current CPU, something bad has happened.
1643 * Play the good citizen and leave.
1644 *
1645 * Returns 0 if it is safe to continue, 1 otherwise.
1646 */
1647static inline int osnoise_migration_pending(void)
1648{
1649 if (!current->migration_pending)
1650 return 0;
1651
1652 /*
1653 * If migration is pending, there is a task waiting for the
1654 * tracer to enable migration. The tracer does not allow migration,
1655 * thus: taint and leave to unblock the blocked thread.
1656 */
1657 osnoise_taint("migration requested to osnoise threads, leaving.");
1658
1659 /*
1660 * Unset this thread from the threads managed by the interface.
1661 * The tracers are responsible for cleaning their env before
1662 * exiting.
1663 */
1664 mutex_lock(&interface_lock);
1665 this_cpu_osn_var()->kthread = NULL;
1666 cpumask_clear_cpu(smp_processor_id(), dstp: &kthread_cpumask);
1667 mutex_unlock(lock: &interface_lock);
1668
1669 return 1;
1670}
1671
1672/*
1673 * osnoise_main - The osnoise detection kernel thread
1674 *
1675 * Calls run_osnoise() function to measure the osnoise for the configured runtime,
1676 * every period.
1677 */
1678static int osnoise_main(void *data)
1679{
1680 unsigned long flags;
1681
1682 /*
1683 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1684 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1685 *
1686 * To work around this limitation, disable migration and remove the
1687 * flag.
1688 */
1689 migrate_disable();
1690 raw_spin_lock_irqsave(&current->pi_lock, flags);
1691 current->flags &= ~(PF_NO_SETAFFINITY);
1692 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1693
1694 while (!kthread_should_stop()) {
1695 if (osnoise_migration_pending())
1696 break;
1697
1698 /* skip a period if tracing is off on all instances */
1699 if (!osnoise_has_tracing_on()) {
1700 osnoise_sleep(skip_period: true);
1701 continue;
1702 }
1703
1704 run_osnoise();
1705 osnoise_sleep(skip_period: false);
1706 }
1707
1708 migrate_enable();
1709 return 0;
1710}
1711
1712#ifdef CONFIG_TIMERLAT_TRACER
1713/*
1714 * timerlat_irq - hrtimer handler for timerlat.
1715 */
1716static enum hrtimer_restart timerlat_irq(struct hrtimer *timer)
1717{
1718 struct osnoise_variables *osn_var = this_cpu_osn_var();
1719 struct timerlat_variables *tlat;
1720 struct timerlat_sample s;
1721 u64 now;
1722 u64 diff;
1723
1724 /*
1725 * I am not sure if the timer was armed for this CPU. So, get
1726 * the timerlat struct from the timer itself, not from this
1727 * CPU.
1728 */
1729 tlat = container_of(timer, struct timerlat_variables, timer);
1730
1731 now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer));
1732
1733 /*
1734 * Enable the osnoise: events for thread an softirq.
1735 */
1736 tlat->tracing_thread = true;
1737
1738 osn_var->thread.arrival_time = time_get();
1739
1740 /*
1741 * A hardirq is running: the timer IRQ. It is for sure preempting
1742 * a thread, and potentially preempting a softirq.
1743 *
1744 * At this point, it is not interesting to know the duration of the
1745 * preempted thread (and maybe softirq), but how much time they will
1746 * delay the beginning of the execution of the timer thread.
1747 *
1748 * To get the correct (net) delay added by the softirq, its delta_start
1749 * is set as the IRQ one. In this way, at the return of the IRQ, the delta
1750 * start of the sofitrq will be zeroed, accounting then only the time
1751 * after that.
1752 *
1753 * The thread follows the same principle. However, if a softirq is
1754 * running, the thread needs to receive the softirq delta_start. The
1755 * reason being is that the softirq will be the last to be unfolded,
1756 * resseting the thread delay to zero.
1757 *
1758 * The PREEMPT_RT is a special case, though. As softirqs run as threads
1759 * on RT, moving the thread is enough.
1760 */
1761 if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) {
1762 copy_int_safe_time(osn_var, dst: &osn_var->thread.delta_start,
1763 src: &osn_var->softirq.delta_start);
1764
1765 copy_int_safe_time(osn_var, dst: &osn_var->softirq.delta_start,
1766 src: &osn_var->irq.delta_start);
1767 } else {
1768 copy_int_safe_time(osn_var, dst: &osn_var->thread.delta_start,
1769 src: &osn_var->irq.delta_start);
1770 }
1771
1772 /*
1773 * Compute the current time with the expected time.
1774 */
1775 diff = now - tlat->abs_period;
1776
1777 tlat->count++;
1778 s.seqnum = tlat->count;
1779 s.timer_latency = diff;
1780 s.context = IRQ_CONTEXT;
1781
1782 record_timerlat_sample(sample: &s);
1783
1784 if (osnoise_data.stop_tracing) {
1785 if (time_to_us(diff) >= osnoise_data.stop_tracing) {
1786
1787 /*
1788 * At this point, if stop_tracing is set and <= print_stack,
1789 * print_stack is set and would be printed in the thread handler.
1790 *
1791 * Thus, print the stack trace as it is helpful to define the
1792 * root cause of an IRQ latency.
1793 */
1794 if (osnoise_data.stop_tracing <= osnoise_data.print_stack) {
1795 timerlat_save_stack(skip: 0);
1796 timerlat_dump_stack(time_to_us(diff));
1797 }
1798
1799 osnoise_stop_tracing();
1800 notify_new_max_latency(latency: diff);
1801
1802 wake_up_process(tsk: tlat->kthread);
1803
1804 return HRTIMER_NORESTART;
1805 }
1806 }
1807
1808 wake_up_process(tsk: tlat->kthread);
1809
1810 if (osnoise_data.print_stack)
1811 timerlat_save_stack(skip: 0);
1812
1813 return HRTIMER_NORESTART;
1814}
1815
1816/*
1817 * wait_next_period - Wait for the next period for timerlat
1818 */
1819static int wait_next_period(struct timerlat_variables *tlat)
1820{
1821 ktime_t next_abs_period, now;
1822 u64 rel_period = osnoise_data.timerlat_period * 1000;
1823
1824 now = hrtimer_cb_get_time(timer: &tlat->timer);
1825 next_abs_period = ns_to_ktime(ns: tlat->abs_period + rel_period);
1826
1827 /*
1828 * Save the next abs_period.
1829 */
1830 tlat->abs_period = (u64) ktime_to_ns(kt: next_abs_period);
1831
1832 /*
1833 * If the new abs_period is in the past, skip the activation.
1834 */
1835 while (ktime_compare(cmp1: now, cmp2: next_abs_period) > 0) {
1836 next_abs_period = ns_to_ktime(ns: tlat->abs_period + rel_period);
1837 tlat->abs_period = (u64) ktime_to_ns(kt: next_abs_period);
1838 }
1839
1840 set_current_state(TASK_INTERRUPTIBLE);
1841
1842 hrtimer_start(timer: &tlat->timer, tim: next_abs_period, mode: HRTIMER_MODE_ABS_PINNED_HARD);
1843 schedule();
1844 return 1;
1845}
1846
1847/*
1848 * timerlat_main- Timerlat main
1849 */
1850static int timerlat_main(void *data)
1851{
1852 struct osnoise_variables *osn_var = this_cpu_osn_var();
1853 struct timerlat_variables *tlat = this_cpu_tmr_var();
1854 struct timerlat_sample s;
1855 struct sched_param sp;
1856 unsigned long flags;
1857 u64 now, diff;
1858
1859 /*
1860 * Make the thread RT, that is how cyclictest is usually used.
1861 */
1862 sp.sched_priority = DEFAULT_TIMERLAT_PRIO;
1863 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1864
1865 /*
1866 * This thread was created pinned to the CPU using PF_NO_SETAFFINITY.
1867 * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread.
1868 *
1869 * To work around this limitation, disable migration and remove the
1870 * flag.
1871 */
1872 migrate_disable();
1873 raw_spin_lock_irqsave(&current->pi_lock, flags);
1874 current->flags &= ~(PF_NO_SETAFFINITY);
1875 raw_spin_unlock_irqrestore(&current->pi_lock, flags);
1876
1877 tlat->count = 0;
1878 tlat->tracing_thread = false;
1879
1880 hrtimer_setup(timer: &tlat->timer, function: timerlat_irq, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS_PINNED_HARD);
1881 tlat->kthread = current;
1882 osn_var->pid = current->pid;
1883 /*
1884 * Annotate the arrival time.
1885 */
1886 tlat->abs_period = hrtimer_cb_get_time(timer: &tlat->timer);
1887
1888 wait_next_period(tlat);
1889
1890 osn_var->sampling = 1;
1891
1892 while (!kthread_should_stop()) {
1893
1894 now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer));
1895 diff = now - tlat->abs_period;
1896
1897 s.seqnum = tlat->count;
1898 s.timer_latency = diff;
1899 s.context = THREAD_CONTEXT;
1900
1901 record_timerlat_sample(sample: &s);
1902
1903 notify_new_max_latency(latency: diff);
1904
1905 timerlat_dump_stack(time_to_us(diff));
1906
1907 tlat->tracing_thread = false;
1908 if (osnoise_data.stop_tracing_total)
1909 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
1910 osnoise_stop_tracing();
1911
1912 if (osnoise_migration_pending())
1913 break;
1914
1915 wait_next_period(tlat);
1916 }
1917
1918 hrtimer_cancel(timer: &tlat->timer);
1919 migrate_enable();
1920 return 0;
1921}
1922#else /* CONFIG_TIMERLAT_TRACER */
1923static int timerlat_main(void *data)
1924{
1925 return 0;
1926}
1927#endif /* CONFIG_TIMERLAT_TRACER */
1928
1929/*
1930 * stop_kthread - stop a workload thread
1931 */
1932static void stop_kthread(unsigned int cpu)
1933{
1934 struct task_struct *kthread;
1935
1936 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
1937 if (kthread) {
1938 if (cpumask_test_and_clear_cpu(cpu, cpumask: &kthread_cpumask) &&
1939 !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
1940 kthread_stop(k: kthread);
1941 } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
1942 /*
1943 * This is a user thread waiting on the timerlat_fd. We need
1944 * to close all users, and the best way to guarantee this is
1945 * by killing the thread. NOTE: this is a purpose specific file.
1946 */
1947 kill_pid(pid: kthread->thread_pid, SIGKILL, priv: 1);
1948 put_task_struct(t: kthread);
1949 }
1950 } else {
1951 /* if no workload, just return */
1952 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1953 /*
1954 * This is set in the osnoise tracer case.
1955 */
1956 per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
1957 barrier();
1958 }
1959 }
1960}
1961
1962/*
1963 * stop_per_cpu_kthread - Stop per-cpu threads
1964 *
1965 * Stop the osnoise sampling htread. Use this on unload and at system
1966 * shutdown.
1967 */
1968static void stop_per_cpu_kthreads(void)
1969{
1970 int cpu;
1971
1972 cpus_read_lock();
1973
1974 for_each_online_cpu(cpu)
1975 stop_kthread(cpu);
1976
1977 cpus_read_unlock();
1978}
1979
1980/*
1981 * start_kthread - Start a workload thread
1982 */
1983static int start_kthread(unsigned int cpu)
1984{
1985 struct task_struct *kthread;
1986 void *main = osnoise_main;
1987 char comm[24];
1988
1989 /* Do not start a new thread if it is already running */
1990 if (per_cpu(per_cpu_osnoise_var, cpu).kthread)
1991 return 0;
1992
1993 if (timerlat_enabled()) {
1994 snprintf(buf: comm, size: 24, fmt: "timerlat/%d", cpu);
1995 main = timerlat_main;
1996 } else {
1997 /* if no workload, just return */
1998 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
1999 per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
2000 barrier();
2001 return 0;
2002 }
2003 snprintf(buf: comm, size: 24, fmt: "osnoise/%d", cpu);
2004 }
2005
2006 kthread = kthread_run_on_cpu(threadfn: main, NULL, cpu, namefmt: comm);
2007
2008 if (IS_ERR(ptr: kthread)) {
2009 pr_err(BANNER "could not start sampling thread\n");
2010 return -ENOMEM;
2011 }
2012
2013 per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
2014 cpumask_set_cpu(cpu, dstp: &kthread_cpumask);
2015
2016 return 0;
2017}
2018
2019/*
2020 * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads
2021 *
2022 * This starts the kernel thread that will look for osnoise on many
2023 * cpus.
2024 */
2025static int start_per_cpu_kthreads(void)
2026{
2027 struct cpumask *current_mask = &save_cpumask;
2028 int retval = 0;
2029 int cpu;
2030
2031 if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
2032 if (timerlat_enabled())
2033 return 0;
2034 }
2035
2036 cpus_read_lock();
2037 /*
2038 * Run only on online CPUs in which osnoise is allowed to run.
2039 */
2040 cpumask_and(dstp: current_mask, cpu_online_mask, src2p: &osnoise_cpumask);
2041
2042 for_each_possible_cpu(cpu) {
2043 if (cpumask_test_and_clear_cpu(cpu, cpumask: &kthread_cpumask)) {
2044 struct task_struct *kthread;
2045
2046 kthread = xchg_relaxed(&(per_cpu(per_cpu_osnoise_var, cpu).kthread), NULL);
2047 if (!WARN_ON(!kthread))
2048 kthread_stop(k: kthread);
2049 }
2050 }
2051
2052 for_each_cpu(cpu, current_mask) {
2053 retval = start_kthread(cpu);
2054 if (retval) {
2055 cpus_read_unlock();
2056 stop_per_cpu_kthreads();
2057 return retval;
2058 }
2059 }
2060
2061 cpus_read_unlock();
2062
2063 return retval;
2064}
2065
2066#ifdef CONFIG_HOTPLUG_CPU
2067static void osnoise_hotplug_workfn(struct work_struct *dummy)
2068{
2069 unsigned int cpu = smp_processor_id();
2070
2071 guard(mutex)(T: &trace_types_lock);
2072
2073 if (!osnoise_has_registered_instances())
2074 return;
2075
2076 guard(mutex)(T: &interface_lock);
2077 guard(cpus_read_lock)();
2078
2079 if (!cpu_online(cpu))
2080 return;
2081
2082 if (!cpumask_test_cpu(cpu, cpumask: &osnoise_cpumask))
2083 return;
2084
2085 start_kthread(cpu);
2086}
2087
2088static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn);
2089
2090/*
2091 * osnoise_cpu_init - CPU hotplug online callback function
2092 */
2093static int osnoise_cpu_init(unsigned int cpu)
2094{
2095 schedule_work_on(cpu, work: &osnoise_hotplug_work);
2096 return 0;
2097}
2098
2099/*
2100 * osnoise_cpu_die - CPU hotplug offline callback function
2101 */
2102static int osnoise_cpu_die(unsigned int cpu)
2103{
2104 stop_kthread(cpu);
2105 return 0;
2106}
2107
2108static void osnoise_init_hotplug_support(void)
2109{
2110 int ret;
2111
2112 ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "trace/osnoise:online",
2113 startup: osnoise_cpu_init, teardown: osnoise_cpu_die);
2114 if (ret < 0)
2115 pr_warn(BANNER "Error to init cpu hotplug support\n");
2116
2117 return;
2118}
2119#else /* CONFIG_HOTPLUG_CPU */
2120static void osnoise_init_hotplug_support(void)
2121{
2122 return;
2123}
2124#endif /* CONFIG_HOTPLUG_CPU */
2125
2126/*
2127 * seq file functions for the osnoise/options file.
2128 */
2129static void *s_options_start(struct seq_file *s, loff_t *pos)
2130{
2131 int option = *pos;
2132
2133 mutex_lock(&interface_lock);
2134
2135 if (option >= OSN_MAX)
2136 return NULL;
2137
2138 return pos;
2139}
2140
2141static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
2142{
2143 int option = ++(*pos);
2144
2145 if (option >= OSN_MAX)
2146 return NULL;
2147
2148 return pos;
2149}
2150
2151static int s_options_show(struct seq_file *s, void *v)
2152{
2153 loff_t *pos = v;
2154 int option = *pos;
2155
2156 if (option == OSN_DEFAULTS) {
2157 if (osnoise_options == OSN_DEFAULT_OPTIONS)
2158 seq_printf(m: s, fmt: "%s", osnoise_options_str[option]);
2159 else
2160 seq_printf(m: s, fmt: "NO_%s", osnoise_options_str[option]);
2161 goto out;
2162 }
2163
2164 if (test_bit(option, &osnoise_options))
2165 seq_printf(m: s, fmt: "%s", osnoise_options_str[option]);
2166 else
2167 seq_printf(m: s, fmt: "NO_%s", osnoise_options_str[option]);
2168
2169out:
2170 if (option != OSN_MAX)
2171 seq_puts(m: s, s: " ");
2172
2173 return 0;
2174}
2175
2176static void s_options_stop(struct seq_file *s, void *v)
2177{
2178 seq_puts(m: s, s: "\n");
2179 mutex_unlock(lock: &interface_lock);
2180}
2181
2182static const struct seq_operations osnoise_options_seq_ops = {
2183 .start = s_options_start,
2184 .next = s_options_next,
2185 .show = s_options_show,
2186 .stop = s_options_stop
2187};
2188
2189static int osnoise_options_open(struct inode *inode, struct file *file)
2190{
2191 return seq_open(file, &osnoise_options_seq_ops);
2192};
2193
2194/**
2195 * osnoise_options_write - Write function for "options" entry
2196 * @filp: The active open file structure
2197 * @ubuf: The user buffer that contains the value to write
2198 * @cnt: The maximum number of bytes to write to "file"
2199 * @ppos: The current position in @file
2200 *
2201 * Writing the option name sets the option, writing the "NO_"
2202 * prefix in front of the option name disables it.
2203 *
2204 * Writing "DEFAULTS" resets the option values to the default ones.
2205 */
2206static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
2207 size_t cnt, loff_t *ppos)
2208{
2209 int running, option, enable, retval;
2210 char buf[256], *option_str;
2211
2212 if (cnt >= 256)
2213 return -EINVAL;
2214
2215 if (copy_from_user(to: buf, from: ubuf, n: cnt))
2216 return -EFAULT;
2217
2218 buf[cnt] = 0;
2219
2220 if (strncmp(buf, "NO_", 3)) {
2221 option_str = strstrip(str: buf);
2222 enable = true;
2223 } else {
2224 option_str = strstrip(str: &buf[3]);
2225 enable = false;
2226 }
2227
2228 option = match_string(array: osnoise_options_str, n: OSN_MAX, string: option_str);
2229 if (option < 0)
2230 return -EINVAL;
2231
2232 /*
2233 * trace_types_lock is taken to avoid concurrency on start/stop.
2234 */
2235 mutex_lock(&trace_types_lock);
2236 running = osnoise_has_registered_instances();
2237 if (running)
2238 stop_per_cpu_kthreads();
2239
2240 mutex_lock(&interface_lock);
2241 /*
2242 * avoid CPU hotplug operations that might read options.
2243 */
2244 cpus_read_lock();
2245
2246 retval = cnt;
2247
2248 if (enable) {
2249 if (option == OSN_DEFAULTS)
2250 osnoise_options = OSN_DEFAULT_OPTIONS;
2251 else
2252 set_bit(nr: option, addr: &osnoise_options);
2253 } else {
2254 if (option == OSN_DEFAULTS)
2255 retval = -EINVAL;
2256 else
2257 clear_bit(nr: option, addr: &osnoise_options);
2258 }
2259
2260 cpus_read_unlock();
2261 mutex_unlock(lock: &interface_lock);
2262
2263 if (running)
2264 start_per_cpu_kthreads();
2265 mutex_unlock(lock: &trace_types_lock);
2266
2267 return retval;
2268}
2269
2270/*
2271 * osnoise_cpus_read - Read function for reading the "cpus" file
2272 * @filp: The active open file structure
2273 * @ubuf: The userspace provided buffer to read value into
2274 * @cnt: The maximum number of bytes to read
2275 * @ppos: The current "file" position
2276 *
2277 * Prints the "cpus" output into the user-provided buffer.
2278 */
2279static ssize_t
2280osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count,
2281 loff_t *ppos)
2282{
2283 char *mask_str __free(kfree) = NULL;
2284 int len;
2285
2286 guard(mutex)(T: &interface_lock);
2287
2288 len = snprintf(NULL, size: 0, fmt: "%*pbl\n", cpumask_pr_args(&osnoise_cpumask)) + 1;
2289 mask_str = kmalloc(len, GFP_KERNEL);
2290 if (!mask_str)
2291 return -ENOMEM;
2292
2293 len = snprintf(buf: mask_str, size: len, fmt: "%*pbl\n", cpumask_pr_args(&osnoise_cpumask));
2294 if (len >= count)
2295 return -EINVAL;
2296
2297 count = simple_read_from_buffer(to: ubuf, count, ppos, from: mask_str, available: len);
2298
2299 return count;
2300}
2301
2302/*
2303 * osnoise_cpus_write - Write function for "cpus" entry
2304 * @filp: The active open file structure
2305 * @ubuf: The user buffer that contains the value to write
2306 * @count: The maximum number of bytes to write to "file"
2307 * @ppos: The current position in @file
2308 *
2309 * This function provides a write implementation for the "cpus"
2310 * interface to the osnoise trace. By default, it lists all CPUs,
2311 * in this way, allowing osnoise threads to run on any online CPU
2312 * of the system. It serves to restrict the execution of osnoise to the
2313 * set of CPUs writing via this interface. Why not use "tracing_cpumask"?
2314 * Because the user might be interested in tracing what is running on
2315 * other CPUs. For instance, one might run osnoise in one HT CPU
2316 * while observing what is running on the sibling HT CPU.
2317 */
2318static ssize_t
2319osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count,
2320 loff_t *ppos)
2321{
2322 cpumask_var_t osnoise_cpumask_new;
2323 int running, err;
2324 char *buf __free(kfree) = NULL;
2325
2326 if (count < 1)
2327 return 0;
2328
2329 buf = memdup_user_nul(ubuf, count);
2330 if (IS_ERR(ptr: buf))
2331 return PTR_ERR(ptr: buf);
2332
2333 if (!zalloc_cpumask_var(mask: &osnoise_cpumask_new, GFP_KERNEL))
2334 return -ENOMEM;
2335
2336 err = cpulist_parse(buf, dstp: osnoise_cpumask_new);
2337 if (err)
2338 goto err_free;
2339
2340 /*
2341 * trace_types_lock is taken to avoid concurrency on start/stop.
2342 */
2343 mutex_lock(&trace_types_lock);
2344 running = osnoise_has_registered_instances();
2345 if (running)
2346 stop_per_cpu_kthreads();
2347
2348 mutex_lock(&interface_lock);
2349 /*
2350 * osnoise_cpumask is read by CPU hotplug operations.
2351 */
2352 cpus_read_lock();
2353
2354 cpumask_copy(dstp: &osnoise_cpumask, srcp: osnoise_cpumask_new);
2355
2356 cpus_read_unlock();
2357 mutex_unlock(lock: &interface_lock);
2358
2359 if (running)
2360 start_per_cpu_kthreads();
2361 mutex_unlock(lock: &trace_types_lock);
2362
2363 free_cpumask_var(mask: osnoise_cpumask_new);
2364 return count;
2365
2366err_free:
2367 free_cpumask_var(mask: osnoise_cpumask_new);
2368
2369 return err;
2370}
2371
2372#ifdef CONFIG_TIMERLAT_TRACER
2373static int timerlat_fd_open(struct inode *inode, struct file *file)
2374{
2375 struct osnoise_variables *osn_var;
2376 struct timerlat_variables *tlat;
2377 long cpu = (long) inode->i_cdev;
2378
2379 mutex_lock(&interface_lock);
2380
2381 /*
2382 * This file is accessible only if timerlat is enabled, and
2383 * NO_OSNOISE_WORKLOAD is set.
2384 */
2385 if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) {
2386 mutex_unlock(lock: &interface_lock);
2387 return -EINVAL;
2388 }
2389
2390 migrate_disable();
2391
2392 osn_var = this_cpu_osn_var();
2393
2394 /*
2395 * The osn_var->pid holds the single access to this file.
2396 */
2397 if (osn_var->pid) {
2398 mutex_unlock(lock: &interface_lock);
2399 migrate_enable();
2400 return -EBUSY;
2401 }
2402
2403 /*
2404 * timerlat tracer is a per-cpu tracer. Check if the user-space too
2405 * is pinned to a single CPU. The tracer laters monitor if the task
2406 * migrates and then disables tracer if it does. However, it is
2407 * worth doing this basic acceptance test to avoid obviusly wrong
2408 * setup.
2409 */
2410 if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) {
2411 mutex_unlock(lock: &interface_lock);
2412 migrate_enable();
2413 return -EPERM;
2414 }
2415
2416 /*
2417 * From now on, it is good to go.
2418 */
2419 file->private_data = inode->i_cdev;
2420
2421 get_task_struct(current);
2422
2423 osn_var->kthread = current;
2424 osn_var->pid = current->pid;
2425
2426 /*
2427 * Setup is done.
2428 */
2429 mutex_unlock(lock: &interface_lock);
2430
2431 tlat = this_cpu_tmr_var();
2432 tlat->count = 0;
2433
2434 hrtimer_setup(timer: &tlat->timer, function: timerlat_irq, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS_PINNED_HARD);
2435
2436 migrate_enable();
2437 return 0;
2438};
2439
2440/*
2441 * timerlat_fd_read - Read function for "timerlat_fd" file
2442 * @file: The active open file structure
2443 * @ubuf: The userspace provided buffer to read value into
2444 * @cnt: The maximum number of bytes to read
2445 * @ppos: The current "file" position
2446 *
2447 * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error.
2448 */
2449static ssize_t
2450timerlat_fd_read(struct file *file, char __user *ubuf, size_t count,
2451 loff_t *ppos)
2452{
2453 long cpu = (long) file->private_data;
2454 struct osnoise_variables *osn_var;
2455 struct timerlat_variables *tlat;
2456 struct timerlat_sample s;
2457 s64 diff;
2458 u64 now;
2459
2460 migrate_disable();
2461
2462 tlat = this_cpu_tmr_var();
2463
2464 /*
2465 * While in user-space, the thread is migratable. There is nothing
2466 * we can do about it.
2467 * So, if the thread is running on another CPU, stop the machinery.
2468 */
2469 if (cpu == smp_processor_id()) {
2470 if (tlat->uthread_migrate) {
2471 migrate_enable();
2472 return -EINVAL;
2473 }
2474 } else {
2475 per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1;
2476 osnoise_taint("timerlat user thread migrate\n");
2477 osnoise_stop_tracing();
2478 migrate_enable();
2479 return -EINVAL;
2480 }
2481
2482 osn_var = this_cpu_osn_var();
2483
2484 /*
2485 * The timerlat in user-space runs in a different order:
2486 * the read() starts from the execution of the previous occurrence,
2487 * sleeping for the next occurrence.
2488 *
2489 * So, skip if we are entering on read() before the first wakeup
2490 * from timerlat IRQ:
2491 */
2492 if (likely(osn_var->sampling)) {
2493 now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer));
2494 diff = now - tlat->abs_period;
2495
2496 /*
2497 * it was not a timer firing, but some other signal?
2498 */
2499 if (diff < 0)
2500 goto out;
2501
2502 s.seqnum = tlat->count;
2503 s.timer_latency = diff;
2504 s.context = THREAD_URET;
2505
2506 record_timerlat_sample(sample: &s);
2507
2508 notify_new_max_latency(latency: diff);
2509
2510 tlat->tracing_thread = false;
2511 if (osnoise_data.stop_tracing_total)
2512 if (time_to_us(diff) >= osnoise_data.stop_tracing_total)
2513 osnoise_stop_tracing();
2514 } else {
2515 tlat->tracing_thread = false;
2516 tlat->kthread = current;
2517
2518 /* Annotate now to drift new period */
2519 tlat->abs_period = hrtimer_cb_get_time(timer: &tlat->timer);
2520
2521 osn_var->sampling = 1;
2522 }
2523
2524 /* wait for the next period */
2525 wait_next_period(tlat);
2526
2527 /* This is the wakeup from this cycle */
2528 now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer));
2529 diff = now - tlat->abs_period;
2530
2531 /*
2532 * it was not a timer firing, but some other signal?
2533 */
2534 if (diff < 0)
2535 goto out;
2536
2537 s.seqnum = tlat->count;
2538 s.timer_latency = diff;
2539 s.context = THREAD_CONTEXT;
2540
2541 record_timerlat_sample(sample: &s);
2542
2543 if (osnoise_data.stop_tracing_total) {
2544 if (time_to_us(diff) >= osnoise_data.stop_tracing_total) {
2545 timerlat_dump_stack(time_to_us(diff));
2546 notify_new_max_latency(latency: diff);
2547 osnoise_stop_tracing();
2548 }
2549 }
2550
2551out:
2552 migrate_enable();
2553 return 0;
2554}
2555
2556static int timerlat_fd_release(struct inode *inode, struct file *file)
2557{
2558 struct osnoise_variables *osn_var;
2559 struct timerlat_variables *tlat_var;
2560 long cpu = (long) file->private_data;
2561
2562 migrate_disable();
2563 mutex_lock(&interface_lock);
2564
2565 osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
2566 tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
2567
2568 if (tlat_var->kthread)
2569 hrtimer_cancel(timer: &tlat_var->timer);
2570 memset(tlat_var, 0, sizeof(*tlat_var));
2571
2572 osn_var->sampling = 0;
2573 osn_var->pid = 0;
2574
2575 /*
2576 * We are leaving, not being stopped... see stop_kthread();
2577 */
2578 if (osn_var->kthread) {
2579 put_task_struct(t: osn_var->kthread);
2580 osn_var->kthread = NULL;
2581 }
2582
2583 mutex_unlock(lock: &interface_lock);
2584 migrate_enable();
2585 return 0;
2586}
2587#endif
2588
2589/*
2590 * osnoise/runtime_us: cannot be greater than the period.
2591 */
2592static struct trace_min_max_param osnoise_runtime = {
2593 .lock = &interface_lock,
2594 .val = &osnoise_data.sample_runtime,
2595 .max = &osnoise_data.sample_period,
2596 .min = NULL,
2597};
2598
2599/*
2600 * osnoise/period_us: cannot be smaller than the runtime.
2601 */
2602static struct trace_min_max_param osnoise_period = {
2603 .lock = &interface_lock,
2604 .val = &osnoise_data.sample_period,
2605 .max = NULL,
2606 .min = &osnoise_data.sample_runtime,
2607};
2608
2609/*
2610 * osnoise/stop_tracing_us: no limit.
2611 */
2612static struct trace_min_max_param osnoise_stop_tracing_in = {
2613 .lock = &interface_lock,
2614 .val = &osnoise_data.stop_tracing,
2615 .max = NULL,
2616 .min = NULL,
2617};
2618
2619/*
2620 * osnoise/stop_tracing_total_us: no limit.
2621 */
2622static struct trace_min_max_param osnoise_stop_tracing_total = {
2623 .lock = &interface_lock,
2624 .val = &osnoise_data.stop_tracing_total,
2625 .max = NULL,
2626 .min = NULL,
2627};
2628
2629#ifdef CONFIG_TIMERLAT_TRACER
2630/*
2631 * osnoise/print_stack: print the stacktrace of the IRQ handler if the total
2632 * latency is higher than val.
2633 */
2634static struct trace_min_max_param osnoise_print_stack = {
2635 .lock = &interface_lock,
2636 .val = &osnoise_data.print_stack,
2637 .max = NULL,
2638 .min = NULL,
2639};
2640
2641/*
2642 * osnoise/timerlat_period: min 100 us, max 1 s
2643 */
2644static u64 timerlat_min_period = 100;
2645static u64 timerlat_max_period = 1000000;
2646static struct trace_min_max_param timerlat_period = {
2647 .lock = &interface_lock,
2648 .val = &osnoise_data.timerlat_period,
2649 .max = &timerlat_max_period,
2650 .min = &timerlat_min_period,
2651};
2652
2653static const struct file_operations timerlat_fd_fops = {
2654 .open = timerlat_fd_open,
2655 .read = timerlat_fd_read,
2656 .release = timerlat_fd_release,
2657 .llseek = generic_file_llseek,
2658};
2659#endif
2660
2661static const struct file_operations cpus_fops = {
2662 .open = tracing_open_generic,
2663 .read = osnoise_cpus_read,
2664 .write = osnoise_cpus_write,
2665 .llseek = generic_file_llseek,
2666};
2667
2668static const struct file_operations osnoise_options_fops = {
2669 .open = osnoise_options_open,
2670 .read = seq_read,
2671 .llseek = seq_lseek,
2672 .release = seq_release,
2673 .write = osnoise_options_write
2674};
2675
2676#ifdef CONFIG_TIMERLAT_TRACER
2677#ifdef CONFIG_STACKTRACE
2678static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2679{
2680 struct dentry *tmp;
2681
2682 tmp = tracefs_create_file(name: "print_stack", TRACE_MODE_WRITE, parent: top_dir,
2683 data: &osnoise_print_stack, fops: &trace_min_max_fops);
2684 if (!tmp)
2685 return -ENOMEM;
2686
2687 return 0;
2688}
2689#else /* CONFIG_STACKTRACE */
2690static int init_timerlat_stack_tracefs(struct dentry *top_dir)
2691{
2692 return 0;
2693}
2694#endif /* CONFIG_STACKTRACE */
2695
2696static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir)
2697{
2698 struct dentry *timerlat_fd;
2699 struct dentry *per_cpu;
2700 struct dentry *cpu_dir;
2701 char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */
2702 long cpu;
2703
2704 /*
2705 * Why not using tracing instance per_cpu/ dir?
2706 *
2707 * Because osnoise/timerlat have a single workload, having
2708 * multiple files like these are waste of memory.
2709 */
2710 per_cpu = tracefs_create_dir(name: "per_cpu", parent: top_dir);
2711 if (!per_cpu)
2712 return -ENOMEM;
2713
2714 for_each_possible_cpu(cpu) {
2715 snprintf(buf: cpu_str, size: 30, fmt: "cpu%ld", cpu);
2716 cpu_dir = tracefs_create_dir(name: cpu_str, parent: per_cpu);
2717 if (!cpu_dir)
2718 goto out_clean;
2719
2720 timerlat_fd = trace_create_file(name: "timerlat_fd", TRACE_MODE_READ,
2721 parent: cpu_dir, NULL, fops: &timerlat_fd_fops);
2722 if (!timerlat_fd)
2723 goto out_clean;
2724
2725 /* Record the CPU */
2726 d_inode(dentry: timerlat_fd)->i_cdev = (void *)(cpu);
2727 }
2728
2729 return 0;
2730
2731out_clean:
2732 tracefs_remove(dentry: per_cpu);
2733 return -ENOMEM;
2734}
2735
2736/*
2737 * init_timerlat_tracefs - A function to initialize the timerlat interface files
2738 */
2739static int init_timerlat_tracefs(struct dentry *top_dir)
2740{
2741 struct dentry *tmp;
2742 int retval;
2743
2744 tmp = tracefs_create_file(name: "timerlat_period_us", TRACE_MODE_WRITE, parent: top_dir,
2745 data: &timerlat_period, fops: &trace_min_max_fops);
2746 if (!tmp)
2747 return -ENOMEM;
2748
2749 retval = osnoise_create_cpu_timerlat_fd(top_dir);
2750 if (retval)
2751 return retval;
2752
2753 return init_timerlat_stack_tracefs(top_dir);
2754}
2755#else /* CONFIG_TIMERLAT_TRACER */
2756static int init_timerlat_tracefs(struct dentry *top_dir)
2757{
2758 return 0;
2759}
2760#endif /* CONFIG_TIMERLAT_TRACER */
2761
2762/*
2763 * init_tracefs - A function to initialize the tracefs interface files
2764 *
2765 * This function creates entries in tracefs for "osnoise" and "timerlat".
2766 * It creates these directories in the tracing directory, and within that
2767 * directory the use can change and view the configs.
2768 */
2769static int init_tracefs(void)
2770{
2771 struct dentry *top_dir;
2772 struct dentry *tmp;
2773 int ret;
2774
2775 ret = tracing_init_dentry();
2776 if (ret)
2777 return -ENOMEM;
2778
2779 top_dir = tracefs_create_dir(name: "osnoise", NULL);
2780 if (!top_dir)
2781 return 0;
2782
2783 tmp = tracefs_create_file(name: "period_us", TRACE_MODE_WRITE, parent: top_dir,
2784 data: &osnoise_period, fops: &trace_min_max_fops);
2785 if (!tmp)
2786 goto err;
2787
2788 tmp = tracefs_create_file(name: "runtime_us", TRACE_MODE_WRITE, parent: top_dir,
2789 data: &osnoise_runtime, fops: &trace_min_max_fops);
2790 if (!tmp)
2791 goto err;
2792
2793 tmp = tracefs_create_file(name: "stop_tracing_us", TRACE_MODE_WRITE, parent: top_dir,
2794 data: &osnoise_stop_tracing_in, fops: &trace_min_max_fops);
2795 if (!tmp)
2796 goto err;
2797
2798 tmp = tracefs_create_file(name: "stop_tracing_total_us", TRACE_MODE_WRITE, parent: top_dir,
2799 data: &osnoise_stop_tracing_total, fops: &trace_min_max_fops);
2800 if (!tmp)
2801 goto err;
2802
2803 tmp = trace_create_file(name: "cpus", TRACE_MODE_WRITE, parent: top_dir, NULL, fops: &cpus_fops);
2804 if (!tmp)
2805 goto err;
2806
2807 tmp = trace_create_file(name: "options", TRACE_MODE_WRITE, parent: top_dir, NULL,
2808 fops: &osnoise_options_fops);
2809 if (!tmp)
2810 goto err;
2811
2812 ret = init_timerlat_tracefs(top_dir);
2813 if (ret)
2814 goto err;
2815
2816 return 0;
2817
2818err:
2819 tracefs_remove(dentry: top_dir);
2820 return -ENOMEM;
2821}
2822
2823static int osnoise_hook_events(void)
2824{
2825 int retval;
2826
2827 /*
2828 * Trace is already hooked, we are re-enabling from
2829 * a stop_tracing_*.
2830 */
2831 if (trace_osnoise_callback_enabled)
2832 return 0;
2833
2834 retval = hook_irq_events();
2835 if (retval)
2836 return -EINVAL;
2837
2838 retval = hook_softirq_events();
2839 if (retval)
2840 goto out_unhook_irq;
2841
2842 retval = hook_thread_events();
2843 /*
2844 * All fine!
2845 */
2846 if (!retval)
2847 return 0;
2848
2849 unhook_softirq_events();
2850out_unhook_irq:
2851 unhook_irq_events();
2852 return -EINVAL;
2853}
2854
2855static void osnoise_unhook_events(void)
2856{
2857 unhook_thread_events();
2858 unhook_softirq_events();
2859 unhook_irq_events();
2860}
2861
2862/*
2863 * osnoise_workload_start - start the workload and hook to events
2864 */
2865static int osnoise_workload_start(void)
2866{
2867 int retval;
2868
2869 /*
2870 * Instances need to be registered after calling workload
2871 * start. Hence, if there is already an instance, the
2872 * workload was already registered. Otherwise, this
2873 * code is on the way to register the first instance,
2874 * and the workload will start.
2875 */
2876 if (osnoise_has_registered_instances())
2877 return 0;
2878
2879 osn_var_reset_all();
2880
2881 retval = osnoise_hook_events();
2882 if (retval)
2883 return retval;
2884
2885 /*
2886 * Make sure that ftrace_nmi_enter/exit() see reset values
2887 * before enabling trace_osnoise_callback_enabled.
2888 */
2889 barrier();
2890 trace_osnoise_callback_enabled = true;
2891
2892 retval = start_per_cpu_kthreads();
2893 if (retval) {
2894 trace_osnoise_callback_enabled = false;
2895 /*
2896 * Make sure that ftrace_nmi_enter/exit() see
2897 * trace_osnoise_callback_enabled as false before continuing.
2898 */
2899 barrier();
2900
2901 osnoise_unhook_events();
2902 return retval;
2903 }
2904
2905 return 0;
2906}
2907
2908/*
2909 * osnoise_workload_stop - stop the workload and unhook the events
2910 */
2911static void osnoise_workload_stop(void)
2912{
2913 /*
2914 * Instances need to be unregistered before calling
2915 * stop. Hence, if there is a registered instance, more
2916 * than one instance is running, and the workload will not
2917 * yet stop. Otherwise, this code is on the way to disable
2918 * the last instance, and the workload can stop.
2919 */
2920 if (osnoise_has_registered_instances())
2921 return;
2922
2923 /*
2924 * If callbacks were already disabled in a previous stop
2925 * call, there is no need to disable then again.
2926 *
2927 * For instance, this happens when tracing is stopped via:
2928 * echo 0 > tracing_on
2929 * echo nop > current_tracer.
2930 */
2931 if (!trace_osnoise_callback_enabled)
2932 return;
2933
2934 trace_osnoise_callback_enabled = false;
2935 /*
2936 * Make sure that ftrace_nmi_enter/exit() see
2937 * trace_osnoise_callback_enabled as false before continuing.
2938 */
2939 barrier();
2940
2941 stop_per_cpu_kthreads();
2942
2943 osnoise_unhook_events();
2944}
2945
2946static void osnoise_tracer_start(struct trace_array *tr)
2947{
2948 int retval;
2949
2950 /*
2951 * If the instance is already registered, there is no need to
2952 * register it again.
2953 */
2954 if (osnoise_instance_registered(tr))
2955 return;
2956
2957 retval = osnoise_workload_start();
2958 if (retval)
2959 pr_err(BANNER "Error starting osnoise tracer\n");
2960
2961 osnoise_register_instance(tr);
2962}
2963
2964static void osnoise_tracer_stop(struct trace_array *tr)
2965{
2966 osnoise_unregister_instance(tr);
2967 osnoise_workload_stop();
2968}
2969
2970static int osnoise_tracer_init(struct trace_array *tr)
2971{
2972 /*
2973 * Only allow osnoise tracer if timerlat tracer is not running
2974 * already.
2975 */
2976 if (timerlat_enabled())
2977 return -EBUSY;
2978
2979 tr->max_latency = 0;
2980
2981 osnoise_tracer_start(tr);
2982 return 0;
2983}
2984
2985static void osnoise_tracer_reset(struct trace_array *tr)
2986{
2987 osnoise_tracer_stop(tr);
2988}
2989
2990static struct tracer osnoise_tracer __read_mostly = {
2991 .name = "osnoise",
2992 .init = osnoise_tracer_init,
2993 .reset = osnoise_tracer_reset,
2994 .start = osnoise_tracer_start,
2995 .stop = osnoise_tracer_stop,
2996 .print_header = print_osnoise_headers,
2997 .allow_instances = true,
2998};
2999
3000#ifdef CONFIG_TIMERLAT_TRACER
3001static void timerlat_tracer_start(struct trace_array *tr)
3002{
3003 int retval;
3004
3005 /*
3006 * If the instance is already registered, there is no need to
3007 * register it again.
3008 */
3009 if (osnoise_instance_registered(tr))
3010 return;
3011
3012 retval = osnoise_workload_start();
3013 if (retval)
3014 pr_err(BANNER "Error starting timerlat tracer\n");
3015
3016 osnoise_register_instance(tr);
3017
3018 return;
3019}
3020
3021static void timerlat_tracer_stop(struct trace_array *tr)
3022{
3023 int cpu;
3024
3025 osnoise_unregister_instance(tr);
3026
3027 /*
3028 * Instruct the threads to stop only if this is the last instance.
3029 */
3030 if (!osnoise_has_registered_instances()) {
3031 for_each_online_cpu(cpu)
3032 per_cpu(per_cpu_osnoise_var, cpu).sampling = 0;
3033 }
3034
3035 osnoise_workload_stop();
3036}
3037
3038static int timerlat_tracer_init(struct trace_array *tr)
3039{
3040 /*
3041 * Only allow timerlat tracer if osnoise tracer is not running already.
3042 */
3043 if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer)
3044 return -EBUSY;
3045
3046 /*
3047 * If this is the first instance, set timerlat_tracer to block
3048 * osnoise tracer start.
3049 */
3050 if (!osnoise_has_registered_instances())
3051 osnoise_data.timerlat_tracer = 1;
3052
3053 tr->max_latency = 0;
3054 timerlat_tracer_start(tr);
3055
3056 return 0;
3057}
3058
3059static void timerlat_tracer_reset(struct trace_array *tr)
3060{
3061 timerlat_tracer_stop(tr);
3062
3063 /*
3064 * If this is the last instance, reset timerlat_tracer allowing
3065 * osnoise to be started.
3066 */
3067 if (!osnoise_has_registered_instances())
3068 osnoise_data.timerlat_tracer = 0;
3069}
3070
3071static struct tracer timerlat_tracer __read_mostly = {
3072 .name = "timerlat",
3073 .init = timerlat_tracer_init,
3074 .reset = timerlat_tracer_reset,
3075 .start = timerlat_tracer_start,
3076 .stop = timerlat_tracer_stop,
3077 .print_header = print_timerlat_headers,
3078 .allow_instances = true,
3079};
3080
3081__init static int init_timerlat_tracer(void)
3082{
3083 return register_tracer(type: &timerlat_tracer);
3084}
3085#else /* CONFIG_TIMERLAT_TRACER */
3086__init static int init_timerlat_tracer(void)
3087{
3088 return 0;
3089}
3090#endif /* CONFIG_TIMERLAT_TRACER */
3091
3092__init static int init_osnoise_tracer(void)
3093{
3094 int ret;
3095
3096 mutex_init(&interface_lock);
3097
3098 cpumask_copy(dstp: &osnoise_cpumask, cpu_all_mask);
3099
3100 ret = register_tracer(type: &osnoise_tracer);
3101 if (ret) {
3102 pr_err(BANNER "Error registering osnoise!\n");
3103 return ret;
3104 }
3105
3106 ret = init_timerlat_tracer();
3107 if (ret) {
3108 pr_err(BANNER "Error registering timerlat!\n");
3109 return ret;
3110 }
3111
3112 osnoise_init_hotplug_support();
3113
3114 INIT_LIST_HEAD_RCU(list: &osnoise_instances);
3115
3116 init_tracefs();
3117
3118 return 0;
3119}
3120late_initcall(init_osnoise_tracer);
3121

source code of linux/kernel/trace/trace_osnoise.c