sleep_timeout.c source code [linux/kernel/time/sleep_timeout.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Kernel internal schedule timeout and sleeping functions
4	*/
5
6	#include <linux/delay.h>
7	#include <linux/jiffies.h>
8	#include <linux/timer.h>
9	#include <linux/sched/signal.h>
10	#include <linux/sched/debug.h>
11
12	#include "tick-internal.h"
13
14	/*
15	* Since schedule_timeout()'s timer is defined on the stack, it must store
16	* the target task on the stack as well.
17	*/
18	struct process_timer {
19	struct timer_list timer;
20	struct task_struct *task;
21	};
22
23	static void process_timeout(struct timer_list *t)
24	{
25	struct process_timer *timeout = timer_container_of(timeout, t, timer);
26
27	wake_up_process(tsk: timeout->task);
28	}
29
30	/**
31	* schedule_timeout - sleep until timeout
32	* @timeout: timeout value in jiffies
33	*
34	* Make the current task sleep until @timeout jiffies have elapsed.
35	* The function behavior depends on the current task state
36	* (see also set_current_state() description):
37	*
38	* %TASK_RUNNING - the scheduler is called, but the task does not sleep
39	* at all. That happens because sched_submit_work() does nothing for
40	* tasks in %TASK_RUNNING state.
41	*
42	* %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
43	* pass before the routine returns unless the current task is explicitly
44	* woken up, (e.g. by wake_up_process()).
45	*
46	* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
47	* delivered to the current task or the current task is explicitly woken
48	* up.
49	*
50	* The current task state is guaranteed to be %TASK_RUNNING when this
51	* routine returns.
52	*
53	* Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
54	* the CPU away without a bound on the timeout. In this case the return
55	* value will be %MAX_SCHEDULE_TIMEOUT.
56	*
57	* Returns: 0 when the timer has expired otherwise the remaining time in
58	* jiffies will be returned. In all cases the return value is guaranteed
59	* to be non-negative.
60	*/
61	signed long __sched schedule_timeout(signed long timeout)
62	{
63	struct process_timer timer;
64	unsigned long expire;
65
66	switch (timeout) {
67	case MAX_SCHEDULE_TIMEOUT:
68	/*
69	* These two special cases are useful to be comfortable
70	* in the caller. Nothing more. We could take
71	* MAX_SCHEDULE_TIMEOUT from one of the negative value
72	* but I' d like to return a valid offset (>=0) to allow
73	* the caller to do everything it want with the retval.
74	*/
75	schedule();
76	goto out;
77	default:
78	/*
79	* Another bit of PARANOID. Note that the retval will be
80	* 0 since no piece of kernel is supposed to do a check
81	* for a negative retval of schedule_timeout() (since it
82	* should never happens anyway). You just have the printk()
83	* that will tell you if something is gone wrong and where.
84	*/
85	if (timeout < `0`) {
86	pr_err("%s: wrong timeout value %lx\n", __func__, timeout);
87	dump_stack();
88	__set_current_state(TASK_RUNNING);
89	goto out;
90	}
91	}
92
93	expire = timeout + jiffies;
94
95	timer.task = current;
96	timer_setup_on_stack(&timer.timer, process_timeout, `0`);
97	timer.timer.expires = expire;
98	add_timer(timer: &timer.timer);
99	schedule();
100	timer_delete_sync(timer: &timer.timer);
101
102	/ Remove the timer from the object tracker /
103	timer_destroy_on_stack(timer: &timer.timer);
104
105	timeout = expire - jiffies;
106
107	out:
108	return timeout < `0` ? `0` : timeout;
109	}
110	EXPORT_SYMBOL(schedule_timeout);
111
112	/*
113	* __set_current_state() can be used in schedule_timeout_*() functions, because
114	* schedule_timeout() calls schedule() unconditionally.
115	*/
116
117	/**
118	* schedule_timeout_interruptible - sleep until timeout (interruptible)
119	* @timeout: timeout value in jiffies
120	*
121	* See schedule_timeout() for details.
122	*
123	* Task state is set to TASK_INTERRUPTIBLE before starting the timeout.
124	*/
125	signed long __sched schedule_timeout_interruptible(signed long timeout)
126	{
127	__set_current_state(TASK_INTERRUPTIBLE);
128	return schedule_timeout(timeout);
129	}
130	EXPORT_SYMBOL(schedule_timeout_interruptible);
131
132	/**
133	* schedule_timeout_killable - sleep until timeout (killable)
134	* @timeout: timeout value in jiffies
135	*
136	* See schedule_timeout() for details.
137	*
138	* Task state is set to TASK_KILLABLE before starting the timeout.
139	*/
140	signed long __sched schedule_timeout_killable(signed long timeout)
141	{
142	__set_current_state(TASK_KILLABLE);
143	return schedule_timeout(timeout);
144	}
145	EXPORT_SYMBOL(schedule_timeout_killable);
146
147	/**
148	* schedule_timeout_uninterruptible - sleep until timeout (uninterruptible)
149	* @timeout: timeout value in jiffies
150	*
151	* See schedule_timeout() for details.
152	*
153	* Task state is set to TASK_UNINTERRUPTIBLE before starting the timeout.
154	*/
155	signed long __sched schedule_timeout_uninterruptible(signed long timeout)
156	{
157	__set_current_state(TASK_UNINTERRUPTIBLE);
158	return schedule_timeout(timeout);
159	}
160	EXPORT_SYMBOL(schedule_timeout_uninterruptible);
161
162	/**
163	* schedule_timeout_idle - sleep until timeout (idle)
164	* @timeout: timeout value in jiffies
165	*
166	* See schedule_timeout() for details.
167	*
168	* Task state is set to TASK_IDLE before starting the timeout. It is similar to
169	* schedule_timeout_uninterruptible(), except this task will not contribute to
170	* load average.
171	*/
172	signed long __sched schedule_timeout_idle(signed long timeout)
173	{
174	__set_current_state(TASK_IDLE);
175	return schedule_timeout(timeout);
176	}
177	EXPORT_SYMBOL(schedule_timeout_idle);
178
179	/**
180	* schedule_hrtimeout_range_clock - sleep until timeout
181	* @expires: timeout value (ktime_t)
182	* @delta: slack in expires timeout (ktime_t)
183	* @mode: timer mode
184	* @clock_id: timer clock to be used
185	*
186	* Details are explained in schedule_hrtimeout_range() function description as
187	* this function is commonly used.
188	*/
189	int __sched schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
190	const enum hrtimer_mode mode, clockid_t clock_id)
191	{
192	struct hrtimer_sleeper t;
193
194	/*
195	* Optimize when a zero timeout value is given. It does not
196	* matter whether this is an absolute or a relative time.
197	*/
198	if (expires && *expires == `0`) {
199	__set_current_state(TASK_RUNNING);
200	return `0`;
201	}
202
203	/*
204	* A NULL parameter means "infinite"
205	*/
206	if (!expires) {
207	schedule();
208	return -EINTR;
209	}
210
211	hrtimer_setup_sleeper_on_stack(sl: &t, clock_id, mode);
212	hrtimer_set_expires_range_ns(timer: &t.timer, time: *expires, delta);
213	hrtimer_sleeper_start_expires(sl: &t, mode);
214
215	if (likely(t.task))
216	schedule();
217
218	hrtimer_cancel(timer: &t.timer);
219	destroy_hrtimer_on_stack(timer: &t.timer);
220
221	__set_current_state(TASK_RUNNING);
222
223	return !t.task ? `0` : -EINTR;
224	}
225	EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
226
227	/**
228	* schedule_hrtimeout_range - sleep until timeout
229	* @expires: timeout value (ktime_t)
230	* @delta: slack in expires timeout (ktime_t)
231	* @mode: timer mode
232	*
233	* Make the current task sleep until the given expiry time has
234	* elapsed. The routine will return immediately unless
235	* the current task state has been set (see set_current_state()).
236	*
237	* The @delta argument gives the kernel the freedom to schedule the
238	* actual wakeup to a time that is both power and performance friendly
239	* for regular (non RT/DL) tasks.
240	* The kernel give the normal best effort behavior for "@expires+@delta",
241	* but may decide to fire the timer earlier, but no earlier than @expires.
242	*
243	* You can set the task state as follows -
244	*
245	* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
246	* pass before the routine returns unless the current task is explicitly
247	* woken up, (e.g. by wake_up_process()).
248	*
249	* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
250	* delivered to the current task or the current task is explicitly woken
251	* up.
252	*
253	* The current task state is guaranteed to be TASK_RUNNING when this
254	* routine returns.
255	*
256	* Returns: 0 when the timer has expired. If the task was woken before the
257	* timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
258	* by an explicit wakeup, it returns -EINTR.
259	*/
260	int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
261	const enum hrtimer_mode mode)
262	{
263	return schedule_hrtimeout_range_clock(expires, delta, mode,
264	CLOCK_MONOTONIC);
265	}
266	EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
267
268	/**
269	* schedule_hrtimeout - sleep until timeout
270	* @expires: timeout value (ktime_t)
271	* @mode: timer mode
272	*
273	* See schedule_hrtimeout_range() for details. @delta argument of
274	* schedule_hrtimeout_range() is set to 0 and has therefore no impact.
275	*/
276	int __sched schedule_hrtimeout(ktime_t expires, const* enum hrtimer_mode mode)
277	{
278	return schedule_hrtimeout_range(expires, `0`, mode);
279	}
280	EXPORT_SYMBOL_GPL(schedule_hrtimeout);
281
282	/**
283	* msleep - sleep safely even with waitqueue interruptions
284	* @msecs: Requested sleep duration in milliseconds
285	*
286	* msleep() uses jiffy based timeouts for the sleep duration. Because of the
287	* design of the timer wheel, the maximum additional percentage delay (slack) is
288	* 12.5%. This is only valid for timers which will end up in level 1 or a higher
289	* level of the timer wheel. For explanation of those 12.5% please check the
290	* detailed description about the basics of the timer wheel.
291	*
292	* The slack of timers which will end up in level 0 depends on sleep duration
293	* (msecs) and HZ configuration and can be calculated in the following way (with
294	* the timer wheel design restriction that the slack is not less than 12.5%):
295	*
296	* ``slack = MSECS_PER_TICK / msecs``
297	*
298	* When the allowed slack of the callsite is known, the calculation could be
299	* turned around to find the minimal allowed sleep duration to meet the
300	* constraints. For example:
301	*
302	* * ``HZ=1000`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 1 / (1/4) = 4``:
303	* all sleep durations greater or equal 4ms will meet the constraints.
304	* * ``HZ=1000`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 1 / (1/8) = 8``:
305	* all sleep durations greater or equal 8ms will meet the constraints.
306	* * ``HZ=250`` with ``slack=25%``: ``MSECS_PER_TICK / slack = 4 / (1/4) = 16``:
307	* all sleep durations greater or equal 16ms will meet the constraints.
308	* * ``HZ=250`` with ``slack=12.5%``: ``MSECS_PER_TICK / slack = 4 / (1/8) = 32``:
309	* all sleep durations greater or equal 32ms will meet the constraints.
310	*
311	* See also the signal aware variant msleep_interruptible().
312	*/
313	void msleep(unsigned int msecs)
314	{
315	unsigned long timeout = msecs_to_jiffies(m: msecs);
316
317	while (timeout)
318	timeout = schedule_timeout_uninterruptible(timeout);
319	}
320	EXPORT_SYMBOL(msleep);
321
322	/**
323	* msleep_interruptible - sleep waiting for signals
324	* @msecs: Requested sleep duration in milliseconds
325	*
326	* See msleep() for some basic information.
327	*
328	* The difference between msleep() and msleep_interruptible() is that the sleep
329	* could be interrupted by a signal delivery and then returns early.
330	*
331	* Returns: The remaining time of the sleep duration transformed to msecs (see
332	* schedule_timeout() for details).
333	*/
334	unsigned long msleep_interruptible(unsigned int msecs)
335	{
336	unsigned long timeout = msecs_to_jiffies(m: msecs);
337
338	while (timeout && !signal_pending(current))
339	timeout = schedule_timeout_interruptible(timeout);
340	return jiffies_to_msecs(j: timeout);
341	}
342	EXPORT_SYMBOL(msleep_interruptible);
343
344	/**
345	* usleep_range_state - Sleep for an approximate time in a given state
346	* @min: Minimum time in usecs to sleep
347	* @max: Maximum time in usecs to sleep
348	* @state: State of the current task that will be while sleeping
349	*
350	* usleep_range_state() sleeps at least for the minimum specified time but not
351	* longer than the maximum specified amount of time. The range might reduce
352	* power usage by allowing hrtimers to coalesce an already scheduled interrupt
353	* with this hrtimer. In the worst case, an interrupt is scheduled for the upper
354	* bound.
355	*
356	* The sleeping task is set to the specified state before starting the sleep.
357	*
358	* In non-atomic context where the exact wakeup time is flexible, use
359	* usleep_range() or its variants instead of udelay(). The sleep improves
360	* responsiveness by avoiding the CPU-hogging busy-wait of udelay().
361	*/
362	void __sched usleep_range_state(unsigned long min, unsigned long max, unsigned int state)
363	{
364	ktime_t exp = ktime_add_us(kt: ktime_get(), usec: min);
365	u64 delta = (u64)(max - min) * NSEC_PER_USEC;
366
367	if (WARN_ON_ONCE(max < min))
368	delta = `0`;
369
370	for (;;) {
371	__set_current_state(state);
372	/ Do not return before the requested sleep time has elapsed /
373	if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
374	break;
375	}
376	}
377	EXPORT_SYMBOL(usleep_range_state);
378

source code of linux/kernel/time/sleep_timeout.c