Skip to content

Commit 5168ae5

Browse files
Steven Rostedtrostedt
authored andcommitted
tracing: Remove ftrace_preempt_disable/enable
The ftrace_preempt_disable/enable functions were to address a recursive race caused by the function tracer. The function tracer traces all functions which makes it easily susceptible to recursion. One area was preempt_enable(). This would call the scheduler and the schedulre would call the function tracer and loop. (So was it thought). The ftrace_preempt_disable/enable was made to protect against recursion inside the scheduler by storing the NEED_RESCHED flag. If it was set before the ftrace_preempt_disable() it would not call schedule on ftrace_preempt_enable(), thinking that if it was set before then it would have already scheduled unless it was already in the scheduler. This worked fine except in the case of SMP, where another task would set the NEED_RESCHED flag for a task on another CPU, and then kick off an IPI to trigger it. This could cause the NEED_RESCHED to be saved at ftrace_preempt_disable() but the IPI to arrive in the the preempt disabled section. The ftrace_preempt_enable() would not call the scheduler because the flag was already set before entring the section. This bug would cause a missed preemption check and cause lower latencies. Investigating further, I found that the recusion caused by the function tracer was not due to schedule(), but due to preempt_schedule(). Now that preempt_schedule is completely annotated with notrace, the recusion no longer is an issue. Reported-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
1 parent d1f74e2 commit 5168ae5

9 files changed

Lines changed: 24 additions & 99 deletions

File tree

kernel/trace/ftrace.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
18831883
struct hlist_head *hhd;
18841884
struct hlist_node *n;
18851885
unsigned long key;
1886-
int resched;
18871886

18881887
key = hash_long(ip, FTRACE_HASH_BITS);
18891888

@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
18971896
* period. This syncs the hash iteration and freeing of items
18981897
* on the hash. rcu_read_lock is too dangerous here.
18991898
*/
1900-
resched = ftrace_preempt_disable();
1899+
preempt_disable_notrace();
19011900
hlist_for_each_entry_rcu(entry, n, hhd, node) {
19021901
if (entry->ip == ip)
19031902
entry->ops->func(ip, parent_ip, &entry->data);
19041903
}
1905-
ftrace_preempt_enable(resched);
1904+
preempt_enable_notrace();
19061905
}
19071906

19081907
static struct ftrace_ops trace_probe_ops __read_mostly =

kernel/trace/ring_buffer.c

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2234,8 +2234,6 @@ static void trace_recursive_unlock(void)
22342234

22352235
#endif
22362236

2237-
static DEFINE_PER_CPU(int, rb_need_resched);
2238-
22392237
/**
22402238
* ring_buffer_lock_reserve - reserve a part of the buffer
22412239
* @buffer: the ring buffer to reserve from
@@ -2256,13 +2254,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
22562254
{
22572255
struct ring_buffer_per_cpu *cpu_buffer;
22582256
struct ring_buffer_event *event;
2259-
int cpu, resched;
2257+
int cpu;
22602258

22612259
if (ring_buffer_flags != RB_BUFFERS_ON)
22622260
return NULL;
22632261

22642262
/* If we are tracing schedule, we don't want to recurse */
2265-
resched = ftrace_preempt_disable();
2263+
preempt_disable_notrace();
22662264

22672265
if (atomic_read(&buffer->record_disabled))
22682266
goto out_nocheck;
@@ -2287,21 +2285,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
22872285
if (!event)
22882286
goto out;
22892287

2290-
/*
2291-
* Need to store resched state on this cpu.
2292-
* Only the first needs to.
2293-
*/
2294-
2295-
if (preempt_count() == 1)
2296-
per_cpu(rb_need_resched, cpu) = resched;
2297-
22982288
return event;
22992289

23002290
out:
23012291
trace_recursive_unlock();
23022292

23032293
out_nocheck:
2304-
ftrace_preempt_enable(resched);
2294+
preempt_enable_notrace();
23052295
return NULL;
23062296
}
23072297
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2347,13 +2337,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
23472337

23482338
trace_recursive_unlock();
23492339

2350-
/*
2351-
* Only the last preempt count needs to restore preemption.
2352-
*/
2353-
if (preempt_count() == 1)
2354-
ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2355-
else
2356-
preempt_enable_no_resched_notrace();
2340+
preempt_enable_notrace();
23572341

23582342
return 0;
23592343
}
@@ -2461,13 +2445,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
24612445

24622446
trace_recursive_unlock();
24632447

2464-
/*
2465-
* Only the last preempt count needs to restore preemption.
2466-
*/
2467-
if (preempt_count() == 1)
2468-
ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
2469-
else
2470-
preempt_enable_no_resched_notrace();
2448+
preempt_enable_notrace();
24712449

24722450
}
24732451
EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2493,12 +2471,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
24932471
struct ring_buffer_event *event;
24942472
void *body;
24952473
int ret = -EBUSY;
2496-
int cpu, resched;
2474+
int cpu;
24972475

24982476
if (ring_buffer_flags != RB_BUFFERS_ON)
24992477
return -EBUSY;
25002478

2501-
resched = ftrace_preempt_disable();
2479+
preempt_disable_notrace();
25022480

25032481
if (atomic_read(&buffer->record_disabled))
25042482
goto out;
@@ -2528,7 +2506,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
25282506

25292507
ret = 0;
25302508
out:
2531-
ftrace_preempt_enable(resched);
2509+
preempt_enable_notrace();
25322510

25332511
return ret;
25342512
}

kernel/trace/trace.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1404,7 +1404,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
14041404
struct bprint_entry *entry;
14051405
unsigned long flags;
14061406
int disable;
1407-
int resched;
14081407
int cpu, len = 0, size, pc;
14091408

14101409
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1414,7 +1413,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
14141413
pause_graph_tracing();
14151414

14161415
pc = preempt_count();
1417-
resched = ftrace_preempt_disable();
1416+
preempt_disable_notrace();
14181417
cpu = raw_smp_processor_id();
14191418
data = tr->data[cpu];
14201419

@@ -1452,7 +1451,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
14521451

14531452
out:
14541453
atomic_dec_return(&data->disabled);
1455-
ftrace_preempt_enable(resched);
1454+
preempt_enable_notrace();
14561455
unpause_graph_tracing();
14571456

14581457
return len;

kernel/trace/trace.h

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -628,54 +628,6 @@ enum trace_iterator_flags {
628628

629629
extern struct tracer nop_trace;
630630

631-
/**
632-
* ftrace_preempt_disable - disable preemption scheduler safe
633-
*
634-
* When tracing can happen inside the scheduler, there exists
635-
* cases that the tracing might happen before the need_resched
636-
* flag is checked. If this happens and the tracer calls
637-
* preempt_enable (after a disable), a schedule might take place
638-
* causing an infinite recursion.
639-
*
640-
* To prevent this, we read the need_resched flag before
641-
* disabling preemption. When we want to enable preemption we
642-
* check the flag, if it is set, then we call preempt_enable_no_resched.
643-
* Otherwise, we call preempt_enable.
644-
*
645-
* The rational for doing the above is that if need_resched is set
646-
* and we have yet to reschedule, we are either in an atomic location
647-
* (where we do not need to check for scheduling) or we are inside
648-
* the scheduler and do not want to resched.
649-
*/
650-
static inline int ftrace_preempt_disable(void)
651-
{
652-
int resched;
653-
654-
resched = need_resched();
655-
preempt_disable_notrace();
656-
657-
return resched;
658-
}
659-
660-
/**
661-
* ftrace_preempt_enable - enable preemption scheduler safe
662-
* @resched: the return value from ftrace_preempt_disable
663-
*
664-
* This is a scheduler safe way to enable preemption and not miss
665-
* any preemption checks. The disabled saved the state of preemption.
666-
* If resched is set, then we are either inside an atomic or
667-
* are inside the scheduler (we would have already scheduled
668-
* otherwise). In this case, we do not want to call normal
669-
* preempt_enable, but preempt_enable_no_resched instead.
670-
*/
671-
static inline void ftrace_preempt_enable(int resched)
672-
{
673-
if (resched)
674-
preempt_enable_no_resched_notrace();
675-
else
676-
preempt_enable_notrace();
677-
}
678-
679631
#ifdef CONFIG_BRANCH_TRACER
680632
extern int enable_branch_tracing(struct trace_array *tr);
681633
extern void disable_branch_tracing(void);

kernel/trace/trace_clock.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,15 @@
3232
u64 notrace trace_clock_local(void)
3333
{
3434
u64 clock;
35-
int resched;
3635

3736
/*
3837
* sched_clock() is an architecture implemented, fast, scalable,
3938
* lockless clock. It is not guaranteed to be coherent across
4039
* CPUs, nor across CPU idle events.
4140
*/
42-
resched = ftrace_preempt_disable();
41+
preempt_disable_notrace();
4342
clock = sched_clock();
44-
ftrace_preempt_enable(resched);
43+
preempt_enable_notrace();
4544

4645
return clock;
4746
}

kernel/trace/trace_events.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1524,12 +1524,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
15241524
struct ftrace_entry *entry;
15251525
unsigned long flags;
15261526
long disabled;
1527-
int resched;
15281527
int cpu;
15291528
int pc;
15301529

15311530
pc = preempt_count();
1532-
resched = ftrace_preempt_disable();
1531+
preempt_disable_notrace();
15331532
cpu = raw_smp_processor_id();
15341533
disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
15351534

@@ -1551,7 +1550,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
15511550

15521551
out:
15531552
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1554-
ftrace_preempt_enable(resched);
1553+
preempt_enable_notrace();
15551554
}
15561555

15571556
static struct ftrace_ops trace_ops __initdata =

kernel/trace/trace_functions.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
5454
struct trace_array_cpu *data;
5555
unsigned long flags;
5656
long disabled;
57-
int cpu, resched;
57+
int cpu;
5858
int pc;
5959

6060
if (unlikely(!ftrace_function_enabled))
6161
return;
6262

6363
pc = preempt_count();
64-
resched = ftrace_preempt_disable();
64+
preempt_disable_notrace();
6565
local_save_flags(flags);
6666
cpu = raw_smp_processor_id();
6767
data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
7171
trace_function(tr, ip, parent_ip, flags, pc);
7272

7373
atomic_dec(&data->disabled);
74-
ftrace_preempt_enable(resched);
74+
preempt_enable_notrace();
7575
}
7676

7777
static void

kernel/trace/trace_sched_wakeup.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,14 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
4646
struct trace_array_cpu *data;
4747
unsigned long flags;
4848
long disabled;
49-
int resched;
5049
int cpu;
5150
int pc;
5251

5352
if (likely(!wakeup_task))
5453
return;
5554

5655
pc = preempt_count();
57-
resched = ftrace_preempt_disable();
56+
preempt_disable_notrace();
5857

5958
cpu = raw_smp_processor_id();
6059
if (cpu != wakeup_current_cpu)
@@ -74,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
7473
out:
7574
atomic_dec(&data->disabled);
7675
out_enable:
77-
ftrace_preempt_enable(resched);
76+
preempt_enable_notrace();
7877
}
7978

8079
static struct ftrace_ops trace_ops __read_mostly =

kernel/trace/trace_stack.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ static inline void check_stack(void)
110110
static void
111111
stack_trace_call(unsigned long ip, unsigned long parent_ip)
112112
{
113-
int cpu, resched;
113+
int cpu;
114114

115115
if (unlikely(!ftrace_enabled || stack_trace_disabled))
116116
return;
117117

118-
resched = ftrace_preempt_disable();
118+
preempt_disable_notrace();
119119

120120
cpu = raw_smp_processor_id();
121121
/* no atomic needed, we only modify this variable by this cpu */
@@ -127,7 +127,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip)
127127
out:
128128
per_cpu(trace_active, cpu)--;
129129
/* prevent recursion in schedule */
130-
ftrace_preempt_enable(resched);
130+
preempt_enable_notrace();
131131
}
132132

133133
static struct ftrace_ops trace_ops __read_mostly =

0 commit comments

Comments
 (0)