watchdog.c source code [linux/kernel/watchdog.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Detect hard and soft lockups on a system
4	*
5	* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
6	*
7	* Note: Most of this code is borrowed heavily from the original softlockup
8	* detector, so thanks to Ingo for the initial implementation.
9	* Some chunks also taken from the old x86-specific nmi watchdog code, thanks
10	* to those contributors as well.
11	*/
12
13	#define pr_fmt(fmt) "watchdog: " fmt
14
15	#include <linux/cpu.h>
16	#include <linux/init.h>
17	#include <linux/irq.h>
18	#include <linux/irqdesc.h>
19	#include <linux/kernel_stat.h>
20	#include <linux/kvm_para.h>
21	#include <linux/math64.h>
22	#include <linux/mm.h>
23	#include <linux/module.h>
24	#include <linux/nmi.h>
25	#include <linux/stop_machine.h>
26	#include <linux/sysctl.h>
27	#include <linux/tick.h>
28	#include <linux/sys_info.h>
29
30	#include <linux/sched/clock.h>
31	#include <linux/sched/debug.h>
32	#include <linux/sched/isolation.h>
33
34	#include <asm/irq_regs.h>
35
36	static DEFINE_MUTEX(watchdog_mutex);
37
38	#if defined(CONFIG_HARDLOCKUP_DETECTOR) \|\| defined(CONFIG_HARDLOCKUP_DETECTOR_SPARC64)
39	# define WATCHDOG_HARDLOCKUP_DEFAULT 1
40	#else
41	# define WATCHDOG_HARDLOCKUP_DEFAULT 0
42	#endif
43
44	#define NUM_SAMPLE_PERIODS 5
45
46	unsigned long __read_mostly watchdog_enabled;
47	int __read_mostly watchdog_user_enabled = `1`;
48	static int __read_mostly watchdog_hardlockup_user_enabled = WATCHDOG_HARDLOCKUP_DEFAULT;
49	static int __read_mostly watchdog_softlockup_user_enabled = `1`;
50	int __read_mostly watchdog_thresh = `10`;
51	static int __read_mostly watchdog_thresh_next;
52	static int __read_mostly watchdog_hardlockup_available;
53
54	struct cpumask watchdog_cpumask __read_mostly;
55	unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
56
57	#ifdef CONFIG_HARDLOCKUP_DETECTOR
58
59	# ifdef CONFIG_SMP
60	int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
61	# endif /* CONFIG_SMP */
62
63	/*
64	* Should we panic when a soft-lockup or hard-lockup occurs:
65	*/
66	unsigned int __read_mostly hardlockup_panic =
67	IS_ENABLED(CONFIG_BOOTPARAM_HARDLOCKUP_PANIC);
68
69	/*
70	* bitmasks to control what kinds of system info to be printed when
71	* hard lockup is detected, it could be task, memory, lock etc.
72	* Refer include/linux/sys_info.h for detailed bit definition.
73	*/
74	unsigned long hardlockup_si_mask;
75
76	#ifdef CONFIG_SYSFS
77
78	static unsigned int hardlockup_count;
79
80	static ssize_t hardlockup_count_show(struct kobject kobj, struct* kobj_attribute *attr,
81	char *page)
82	{
83	return sysfs_emit(buf: page, fmt: "%u\n", hardlockup_count);
84	}
85
86	static struct kobj_attribute hardlockup_count_attr = __ATTR_RO(hardlockup_count);
87
88	static __init int kernel_hardlockup_sysfs_init(void)
89	{
90	sysfs_add_file_to_group(kobj: kernel_kobj, attr: &hardlockup_count_attr.attr, NULL);
91	return `0`;
92	}
93
94	late_initcall(kernel_hardlockup_sysfs_init);
95
96	#endif // CONFIG_SYSFS
97
98	/*
99	* We may not want to enable hard lockup detection by default in all cases,
100	* for example when running the kernel as a guest on a hypervisor. In these
101	* cases this function can be called to disable hard lockup detection. This
102	* function should only be executed once by the boot processor before the
103	* kernel command line parameters are parsed, because otherwise it is not
104	* possible to override this in hardlockup_panic_setup().
105	*/
106	void __init hardlockup_detector_disable(void)
107	{
108	watchdog_hardlockup_user_enabled = `0`;
109	}
110
111	static int __init hardlockup_panic_setup(char *str)
112	{
113	next:
114	if (!strncmp(str, "panic", `5`))
115	hardlockup_panic = `1`;
116	else if (!strncmp(str, "nopanic", `7`))
117	hardlockup_panic = `0`;
118	else if (!strncmp(str, "0", `1`))
119	watchdog_hardlockup_user_enabled = `0`;
120	else if (!strncmp(str, "1", `1`))
121	watchdog_hardlockup_user_enabled = `1`;
122	else if (!strncmp(str, "r", `1`))
123	hardlockup_config_perf_event(str: str + `1`);
124	while (*(str++)) {
125	if (*str == `','`) {
126	str++;
127	goto next;
128	}
129	}
130	return `1`;
131	}
132	__setup("nmi_watchdog=", hardlockup_panic_setup);
133
134	#endif /* CONFIG_HARDLOCKUP_DETECTOR */
135
136	#if defined(CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER)
137
138	static DEFINE_PER_CPU(atomic_t, hrtimer_interrupts);
139	static DEFINE_PER_CPU(int, hrtimer_interrupts_saved);
140	static DEFINE_PER_CPU(bool, watchdog_hardlockup_warned);
141	static DEFINE_PER_CPU(bool, watchdog_hardlockup_touched);
142	static unsigned long hard_lockup_nmi_warn;
143
144	notrace void arch_touch_nmi_watchdog(void)
145	{
146	/*
147	* Using __raw here because some code paths have
148	* preemption enabled. If preemption is enabled
149	* then interrupts should be enabled too, in which
150	* case we shouldn't have to worry about the watchdog
151	* going off.
152	*/
153	raw_cpu_write(watchdog_hardlockup_touched, true);
154	}
155	EXPORT_SYMBOL(arch_touch_nmi_watchdog);
156
157	void watchdog_hardlockup_touch_cpu(unsigned int cpu)
158	{
159	per_cpu(watchdog_hardlockup_touched, cpu) = true;
160	}
161
162	static bool is_hardlockup(unsigned int cpu)
163	{
164	int hrint = atomic_read(v: &per_cpu(hrtimer_interrupts, cpu));
165
166	if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
167	return true;
168
169	/*
170	* NOTE: we don't need any fancy atomic_t or READ_ONCE/WRITE_ONCE
171	* for hrtimer_interrupts_saved. hrtimer_interrupts_saved is
172	* written/read by a single CPU.
173	*/
174	per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
175
176	return false;
177	}
178
179	static void watchdog_hardlockup_kick(void)
180	{
181	int new_interrupts;
182
183	new_interrupts = atomic_inc_return(this_cpu_ptr(&hrtimer_interrupts));
184	watchdog_buddy_check_hardlockup(hrtimer_interrupts: new_interrupts);
185	}
186
187	void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs)
188	{
189	int hardlockup_all_cpu_backtrace;
190
191	if (per_cpu(watchdog_hardlockup_touched, cpu)) {
192	per_cpu(watchdog_hardlockup_touched, cpu) = false;
193	return;
194	}
195
196	hardlockup_all_cpu_backtrace = (hardlockup_si_mask & SYS_INFO_ALL_BT) ?
197	`1` : sysctl_hardlockup_all_cpu_backtrace;
198	/*
199	* Check for a hardlockup by making sure the CPU's timer
200	* interrupt is incrementing. The timer interrupt should have
201	* fired multiple times before we overflow'd. If it hasn't
202	* then this is a good indication the cpu is stuck
203	*/
204	if (is_hardlockup(cpu)) {
205	unsigned int this_cpu = smp_processor_id();
206	unsigned long flags;
207
208	#ifdef CONFIG_SYSFS
209	++hardlockup_count;
210	#endif
211	/*
212	* A poorly behaving BPF scheduler can trigger hard lockup by
213	* e.g. putting numerous affinitized tasks in a single queue and
214	* directing all CPUs at it. The following call can return true
215	* only once when sched_ext is enabled and will immediately
216	* abort the BPF scheduler and print out a warning message.
217	*/
218	if (scx_hardlockup(cpu))
219	return;
220
221	/ Only print hardlockups once. /
222	if (per_cpu(watchdog_hardlockup_warned, cpu))
223	return;
224
225	/*
226	* Prevent multiple hard-lockup reports if one cpu is already
227	* engaged in dumping all cpu back traces.
228	*/
229	if (hardlockup_all_cpu_backtrace) {
230	if (test_and_set_bit_lock(nr: `0`, addr: &hard_lockup_nmi_warn))
231	return;
232	}
233
234	/*
235	* NOTE: we call printk_cpu_sync_get_irqsave() after printing
236	* the lockup message. While it would be nice to serialize
237	* that printout, we really want to make sure that if some
238	* other CPU somehow locked up while holding the lock associated
239	* with printk_cpu_sync_get_irqsave() that we can still at least
240	* get the message about the lockup out.
241	*/
242	pr_emerg("CPU%u: Watchdog detected hard LOCKUP on cpu %u\n", this_cpu, cpu);
243	printk_cpu_sync_get_irqsave(flags);
244
245	print_modules();
246	print_irqtrace_events(current);
247	if (cpu == this_cpu) {
248	if (regs)
249	show_regs(regs);
250	else
251	dump_stack();
252	printk_cpu_sync_put_irqrestore(flags);
253	} else {
254	printk_cpu_sync_put_irqrestore(flags);
255	trigger_single_cpu_backtrace(cpu);
256	}
257
258	if (hardlockup_all_cpu_backtrace) {
259	trigger_allbutcpu_cpu_backtrace(exclude_cpu: cpu);
260	if (!hardlockup_panic)
261	clear_bit_unlock(nr: `0`, addr: &hard_lockup_nmi_warn);
262	}
263
264	sys_info(si_mask: hardlockup_si_mask & ~SYS_INFO_ALL_BT);
265	if (hardlockup_panic)
266	nmi_panic(regs, msg: "Hard LOCKUP");
267
268	per_cpu(watchdog_hardlockup_warned, cpu) = true;
269	} else {
270	per_cpu(watchdog_hardlockup_warned, cpu) = false;
271	}
272	}
273
274	#else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
275
276	static inline void watchdog_hardlockup_kick(void) { }
277
278	#endif /* !CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */
279
280	/*
281	* These functions can be overridden based on the configured hardlockdup detector.
282	*
283	* watchdog_hardlockup_enable/disable can be implemented to start and stop when
284	* softlockup watchdog start and stop. The detector must select the
285	* SOFTLOCKUP_DETECTOR Kconfig.
286	*/
287	void __weak watchdog_hardlockup_enable(unsigned int cpu) { }
288
289	void __weak watchdog_hardlockup_disable(unsigned int cpu) { }
290
291	/*
292	* Watchdog-detector specific API.
293	*
294	* Return 0 when hardlockup watchdog is available, negative value otherwise.
295	* Note that the negative value means that a delayed probe might
296	* succeed later.
297	*/
298	int __weak __init watchdog_hardlockup_probe(void)
299	{
300	return -ENODEV;
301	}
302
303	/**
304	* watchdog_hardlockup_stop - Stop the watchdog for reconfiguration
305	*
306	* The reconfiguration steps are:
307	* watchdog_hardlockup_stop();
308	* update_variables();
309	* watchdog_hardlockup_start();
310	*/
311	void __weak watchdog_hardlockup_stop(void) { }
312
313	/**
314	* watchdog_hardlockup_start - Start the watchdog after reconfiguration
315	*
316	* Counterpart to watchdog_hardlockup_stop().
317	*
318	* The following variables have been updated in update_variables() and
319	* contain the currently valid configuration:
320	* - watchdog_enabled
321	* - watchdog_thresh
322	* - watchdog_cpumask
323	*/
324	void __weak watchdog_hardlockup_start(void) { }
325
326	/**
327	* lockup_detector_update_enable - Update the sysctl enable bit
328	*
329	* Caller needs to make sure that the hard watchdogs are off, so this
330	* can't race with watchdog_hardlockup_disable().
331	*/
332	static void lockup_detector_update_enable(void)
333	{
334	watchdog_enabled = `0`;
335	if (!watchdog_user_enabled)
336	return;
337	if (watchdog_hardlockup_available && watchdog_hardlockup_user_enabled)
338	watchdog_enabled \|= WATCHDOG_HARDLOCKUP_ENABLED;
339	if (watchdog_softlockup_user_enabled)
340	watchdog_enabled \|= WATCHDOG_SOFTOCKUP_ENABLED;
341	}
342
343	#ifdef CONFIG_SOFTLOCKUP_DETECTOR
344
345	/*
346	* Delay the soflockup report when running a known slow code.
347	* It does _not_ affect the timestamp of the last successdul reschedule.
348	*/
349	#define SOFTLOCKUP_DELAY_REPORT ULONG_MAX
350
351	#ifdef CONFIG_SMP
352	int __read_mostly sysctl_softlockup_all_cpu_backtrace;
353	#endif
354
355	/*
356	* bitmasks to control what kinds of system info to be printed when
357	* soft lockup is detected, it could be task, memory, lock etc.
358	* Refer include/linux/sys_info.h for detailed bit definition.
359	*/
360	static unsigned long softlockup_si_mask;
361
362	static struct cpumask watchdog_allowed_mask __read_mostly;
363
364	/ Global variables, exported for sysctl /
365	unsigned int __read_mostly softlockup_panic =
366	IS_ENABLED(CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC);
367
368	static bool softlockup_initialized __read_mostly;
369	static u64 __read_mostly sample_period;
370
371	#ifdef CONFIG_SYSFS
372
373	static unsigned int softlockup_count;
374
375	static ssize_t softlockup_count_show(struct kobject kobj, struct* kobj_attribute *attr,
376	char *page)
377	{
378	return sysfs_emit(buf: page, fmt: "%u\n", softlockup_count);
379	}
380
381	static struct kobj_attribute softlockup_count_attr = __ATTR_RO(softlockup_count);
382
383	static __init int kernel_softlockup_sysfs_init(void)
384	{
385	sysfs_add_file_to_group(kobj: kernel_kobj, attr: &softlockup_count_attr.attr, NULL);
386	return `0`;
387	}
388
389	late_initcall(kernel_softlockup_sysfs_init);
390
391	#endif // CONFIG_SYSFS
392
393	/ Timestamp taken after the last successful reschedule. /
394	static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
395	/ Timestamp of the last softlockup report. /
396	static DEFINE_PER_CPU(unsigned long, watchdog_report_ts);
397	static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
398	static DEFINE_PER_CPU(bool, softlockup_touch_sync);
399	static unsigned long soft_lockup_nmi_warn;
400
401	static int __init softlockup_panic_setup(char *str)
402	{
403	softlockup_panic = simple_strtoul(str, NULL, `0`);
404	return `1`;
405	}
406	__setup("softlockup_panic=", softlockup_panic_setup);
407
408	static int __init nowatchdog_setup(char *str)
409	{
410	watchdog_user_enabled = `0`;
411	return `1`;
412	}
413	__setup("nowatchdog", nowatchdog_setup);
414
415	static int __init nosoftlockup_setup(char *str)
416	{
417	watchdog_softlockup_user_enabled = `0`;
418	return `1`;
419	}
420	__setup("nosoftlockup", nosoftlockup_setup);
421
422	static int __init watchdog_thresh_setup(char *str)
423	{
424	get_option(str: &str, pint: &watchdog_thresh);
425	return `1`;
426	}
427	__setup("watchdog_thresh=", watchdog_thresh_setup);
428
429	#ifdef CONFIG_SOFTLOCKUP_DETECTOR_INTR_STORM
430	enum stats_per_group {
431	STATS_SYSTEM,
432	STATS_SOFTIRQ,
433	STATS_HARDIRQ,
434	STATS_IDLE,
435	NUM_STATS_PER_GROUP,
436	};
437
438	static const enum cpu_usage_stat tracked_stats[NUM_STATS_PER_GROUP] = {
439	CPUTIME_SYSTEM,
440	CPUTIME_SOFTIRQ,
441	CPUTIME_IRQ,
442	CPUTIME_IDLE,
443	};
444
445	static DEFINE_PER_CPU(u16, cpustat_old[NUM_STATS_PER_GROUP]);
446	static DEFINE_PER_CPU(u8, cpustat_util[NUM_SAMPLE_PERIODS][NUM_STATS_PER_GROUP]);
447	static DEFINE_PER_CPU(u8, cpustat_tail);
448
449	/*
450	* We don't need nanosecond resolution. A granularity of 16ms is
451	* sufficient for our precision, allowing us to use u16 to store
452	* cpustats, which will roll over roughly every ~1000 seconds.
453	* 2^24 ~= 16 * 10^6
454	*/
455	static u16 get_16bit_precision(u64 data_ns)
456	{
457	/*
458	* 2^24ns ~= 16.8ms
459	* Round to the nearest multiple of 16.8 milliseconds.
460	*/
461	return (data_ns + (`1` << `23`)) >> `24LL`;
462	}
463
464	static void update_cpustat(void)
465	{
466	int i;
467	u8 util;
468	u16 old_stat, new_stat;
469	struct kernel_cpustat kcpustat;
470	u64 *cpustat = kcpustat.cpustat;
471	u8 tail = __this_cpu_read(cpustat_tail);
472	u16 sample_period_16 = get_16bit_precision(data_ns: sample_period);
473
474	kcpustat_cpu_fetch(dst: &kcpustat, smp_processor_id());
475
476	for (i = `0`; i < NUM_STATS_PER_GROUP; i++) {
477	old_stat = __this_cpu_read(cpustat_old[i]);
478	new_stat = get_16bit_precision(data_ns: cpustat[tracked_stats[i]]);
479	util = DIV_ROUND_UP(`100` * (new_stat - old_stat), sample_period_16);
480	/*
481	* Since we use 16-bit precision, the raw data will undergo
482	* integer division, which may sometimes result in data loss,
483	* and then result might exceed 100%. To avoid confusion,
484	* we enforce a 100% display cap when calculations exceed this threshold.
485	*/
486	if (util > `100`)
487	util = `100`;
488	__this_cpu_write(cpustat_util[tail][i], util);
489	__this_cpu_write(cpustat_old[i], new_stat);
490	}
491
492	__this_cpu_write(cpustat_tail, (tail + `1`) % NUM_SAMPLE_PERIODS);
493	}
494
495	static void print_cpustat(void)
496	{
497	int i, group;
498	u8 tail = __this_cpu_read(cpustat_tail);
499	u64 sample_period_msecond = sample_period;
500
501	do_div(sample_period_msecond, NSEC_PER_MSEC);
502
503	/*
504	* Outputting the "watchdog" prefix on every line is redundant and not
505	* concise, and the original alarm information is sufficient for
506	* positioning in logs, hence here printk() is used instead of pr_crit().
507	*/
508	printk(KERN_CRIT "CPU#%d Utilization every %llums during lockup:\n",
509	smp_processor_id(), sample_period_msecond);
510
511	for (i = `0`; i < NUM_SAMPLE_PERIODS; i++) {
512	group = (tail + i) % NUM_SAMPLE_PERIODS;
513	printk(KERN_CRIT "\t#%d: %3u%% system,\t%3u%% softirq,\t"
514	"%3u%% hardirq,\t%3u%% idle\n", i + `1`,
515	__this_cpu_read(cpustat_util[group][STATS_SYSTEM]),
516	__this_cpu_read(cpustat_util[group][STATS_SOFTIRQ]),
517	__this_cpu_read(cpustat_util[group][STATS_HARDIRQ]),
518	__this_cpu_read(cpustat_util[group][STATS_IDLE]));
519	}
520	}
521
522	#define HARDIRQ_PERCENT_THRESH 50
523	#define NUM_HARDIRQ_REPORT 5
524	struct irq_counts {
525	int irq;
526	u32 counts;
527	};
528
529	static DEFINE_PER_CPU(bool, snapshot_taken);
530
531	/ Tabulate the most frequent interrupts. /
532	static void tabulate_irq_count(struct irq_counts irq_counts, int* irq, u32 counts, int rank)
533	{
534	int i;
535	struct irq_counts new_count = {irq, counts};
536
537	for (i = `0`; i < rank; i++) {
538	if (counts > irq_counts[i].counts)
539	swap(new_count, irq_counts[i]);
540	}
541	}
542
543	/*
544	* If the hardirq time exceeds HARDIRQ_PERCENT_THRESH% of the sample_period,
545	* then the cause of softlockup might be interrupt storm. In this case, it
546	* would be useful to start interrupt counting.
547	*/
548	static bool need_counting_irqs(void)
549	{
550	u8 util;
551	int tail = __this_cpu_read(cpustat_tail);
552
553	tail = (tail + NUM_HARDIRQ_REPORT - `1`) % NUM_HARDIRQ_REPORT;
554	util = __this_cpu_read(cpustat_util[tail][STATS_HARDIRQ]);
555	return util > HARDIRQ_PERCENT_THRESH;
556	}
557
558	static void start_counting_irqs(void)
559	{
560	if (!__this_cpu_read(snapshot_taken)) {
561	kstat_snapshot_irqs();
562	__this_cpu_write(snapshot_taken, true);
563	}
564	}
565
566	static void stop_counting_irqs(void)
567	{
568	__this_cpu_write(snapshot_taken, false);
569	}
570
571	static void print_irq_counts(void)
572	{
573	unsigned int i, count;
574	struct irq_counts irq_counts_sorted[NUM_HARDIRQ_REPORT] = {
575	{-`1`, `0`}, {-`1`, `0`}, {-`1`, `0`}, {-`1`, `0`}, {-`1`, `0`}
576	};
577
578	if (__this_cpu_read(snapshot_taken)) {
579	for_each_active_irq(i) {
580	count = kstat_get_irq_since_snapshot(irq: i);
581	tabulate_irq_count(irq_counts: irq_counts_sorted, irq: i, counts: count, NUM_HARDIRQ_REPORT);
582	}
583
584	/*
585	* Outputting the "watchdog" prefix on every line is redundant and not
586	* concise, and the original alarm information is sufficient for
587	* positioning in logs, hence here printk() is used instead of pr_crit().
588	*/
589	printk(KERN_CRIT "CPU#%d Detect HardIRQ Time exceeds %d%%. Most frequent HardIRQs:\n",
590	smp_processor_id(), HARDIRQ_PERCENT_THRESH);
591
592	for (i = `0`; i < NUM_HARDIRQ_REPORT; i++) {
593	if (irq_counts_sorted[i].irq == -`1`)
594	break;
595
596	printk(KERN_CRIT "\t#%u: %-10u\tirq#%d\n",
597	i + `1`, irq_counts_sorted[i].counts,
598	irq_counts_sorted[i].irq);
599	}
600
601	/*
602	* If the hardirq time is less than HARDIRQ_PERCENT_THRESH% in the last
603	* sample_period, then we suspect the interrupt storm might be subsiding.
604	*/
605	if (!need_counting_irqs())
606	stop_counting_irqs();
607	}
608	}
609
610	static void report_cpu_status(void)
611	{
612	print_cpustat();
613	print_irq_counts();
614	}
615	#else
616	static inline void update_cpustat(void) { }
617	static inline void report_cpu_status(void) { }
618	static inline bool need_counting_irqs(void) { return false; }
619	static inline void start_counting_irqs(void) { }
620	static inline void stop_counting_irqs(void) { }
621	#endif
622
623	/*
624	* Hard-lockup warnings should be triggered after just a few seconds. Soft-
625	* lockups can have false positives under extreme conditions. So we generally
626	* want a higher threshold for soft lockups than for hard lockups. So we couple
627	* the thresholds with a factor: we make the soft threshold twice the amount of
628	* time the hard threshold is.
629	*/
630	static int get_softlockup_thresh(void)
631	{
632	return watchdog_thresh * `2`;
633	}
634
635	/*
636	* Returns seconds, approximately. We don't need nanosecond
637	* resolution, and we don't need to waste time with a big divide when
638	* 2^30ns == 1.074s.
639	*/
640	static unsigned long get_timestamp(void)
641	{
642	return running_clock() >> `30LL`; / 2^30 ~= 10^9 /
643	}
644
645	static void set_sample_period(void)
646	{
647	/*
648	* convert watchdog_thresh from seconds to ns
649	* the divide by 5 is to give hrtimer several chances (two
650	* or three with the current relation between the soft
651	* and hard thresholds) to increment before the
652	* hardlockup detector generates a warning
653	*/
654	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / NUM_SAMPLE_PERIODS);
655	watchdog_update_hrtimer_threshold(period: sample_period);
656	}
657
658	static void update_report_ts(void)
659	{
660	__this_cpu_write(watchdog_report_ts, get_timestamp());
661	}
662
663	/ Commands for resetting the watchdog /
664	static void update_touch_ts(void)
665	{
666	__this_cpu_write(watchdog_touch_ts, get_timestamp());
667	update_report_ts();
668	}
669
670	/**
671	* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
672	*
673	* Call when the scheduler may have stalled for legitimate reasons
674	* preventing the watchdog task from executing - e.g. the scheduler
675	* entering idle state. This should only be used for scheduler events.
676	* Use touch_softlockup_watchdog() for everything else.
677	*/
678	notrace void touch_softlockup_watchdog_sched(void)
679	{
680	/*
681	* Preemption can be enabled. It doesn't matter which CPU's watchdog
682	* report period gets restarted here, so use the raw_ operation.
683	*/
684	raw_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
685	}
686
687	notrace void touch_softlockup_watchdog(void)
688	{
689	touch_softlockup_watchdog_sched();
690	wq_watchdog_touch(raw_smp_processor_id());
691	}
692	EXPORT_SYMBOL(touch_softlockup_watchdog);
693
694	void touch_all_softlockup_watchdogs(void)
695	{
696	int cpu;
697
698	/*
699	* watchdog_mutex cannpt be taken here, as this might be called
700	* from (soft)interrupt context, so the access to
701	* watchdog_allowed_cpumask might race with a concurrent update.
702	*
703	* The watchdog time stamp can race against a concurrent real
704	* update as well, the only side effect might be a cycle delay for
705	* the softlockup check.
706	*/
707	for_each_cpu(cpu, &watchdog_allowed_mask) {
708	per_cpu(watchdog_report_ts, cpu) = SOFTLOCKUP_DELAY_REPORT;
709	wq_watchdog_touch(cpu);
710	}
711	}
712
713	void touch_softlockup_watchdog_sync(void)
714	{
715	__this_cpu_write(softlockup_touch_sync, true);
716	__this_cpu_write(watchdog_report_ts, SOFTLOCKUP_DELAY_REPORT);
717	}
718
719	static int is_softlockup(unsigned long touch_ts,
720	unsigned long period_ts,
721	unsigned long now)
722	{
723	if ((watchdog_enabled & WATCHDOG_SOFTOCKUP_ENABLED) && watchdog_thresh) {
724	/*
725	* If period_ts has not been updated during a sample_period, then
726	* in the subsequent few sample_periods, period_ts might also not
727	* be updated, which could indicate a potential softlockup. In
728	* this case, if we suspect the cause of the potential softlockup
729	* might be interrupt storm, then we need to count the interrupts
730	* to find which interrupt is storming.
731	*/
732	if (time_after_eq(now, period_ts + get_softlockup_thresh() / NUM_SAMPLE_PERIODS) &&
733	need_counting_irqs())
734	start_counting_irqs();
735
736	/*
737	* A poorly behaving BPF scheduler can live-lock the system into
738	* soft lockups. Tell sched_ext to try ejecting the BPF
739	* scheduler when close to a soft lockup.
740	*/
741	if (time_after_eq(now, period_ts + get_softlockup_thresh() * `3` / `4`))
742	scx_softlockup(dur_s: now - touch_ts);
743
744	/ Warn about unreasonable delays. /
745	if (time_after(now, period_ts + get_softlockup_thresh()))
746	return now - touch_ts;
747	}
748	return `0`;
749	}
750
751	/ watchdog detector functions /
752	static DEFINE_PER_CPU(struct completion, softlockup_completion);
753	static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
754
755	/*
756	* The watchdog feed function - touches the timestamp.
757	*
758	* It only runs once every sample_period seconds (4 seconds by
759	* default) to reset the softlockup timestamp. If this gets delayed
760	* for more than 2*watchdog_thresh seconds then the debug-printout
761	* triggers in watchdog_timer_fn().
762	*/
763	static int softlockup_fn(void *data)
764	{
765	update_touch_ts();
766	stop_counting_irqs();
767	complete(this_cpu_ptr(&softlockup_completion));
768
769	return `0`;
770	}
771
772	/ watchdog kicker functions /
773	static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
774	{
775	unsigned long touch_ts, period_ts, now;
776	struct pt_regs *regs = get_irq_regs();
777	int duration;
778	int softlockup_all_cpu_backtrace;
779	unsigned long flags;
780
781	if (!watchdog_enabled)
782	return HRTIMER_NORESTART;
783
784	/*
785	* pass the buddy check if a panic is in process
786	*/
787	if (panic_in_progress())
788	return HRTIMER_NORESTART;
789
790	softlockup_all_cpu_backtrace = (softlockup_si_mask & SYS_INFO_ALL_BT) ?
791	`1` : sysctl_softlockup_all_cpu_backtrace;
792
793	watchdog_hardlockup_kick();
794
795	/ kick the softlockup detector /
796	if (completion_done(this_cpu_ptr(&softlockup_completion))) {
797	reinit_completion(this_cpu_ptr(&softlockup_completion));
798	stop_one_cpu_nowait(smp_processor_id(),
799	fn: softlockup_fn, NULL,
800	this_cpu_ptr(&softlockup_stop_work));
801	}
802
803	/ .. and repeat /
804	hrtimer_forward_now(timer: hrtimer, interval: ns_to_ktime(ns: sample_period));
805
806	/*
807	* Read the current timestamp first. It might become invalid anytime
808	* when a virtual machine is stopped by the host or when the watchog
809	* is touched from NMI.
810	*/
811	now = get_timestamp();
812	/*
813	* If a virtual machine is stopped by the host it can look to
814	* the watchdog like a soft lockup. This function touches the watchdog.
815	*/
816	kvm_check_and_clear_guest_paused();
817	/*
818	* The stored timestamp is comparable with @now only when not touched.
819	* It might get touched anytime from NMI. Make sure that is_softlockup()
820	* uses the same (valid) value.
821	*/
822	period_ts = READ_ONCE(*this_cpu_ptr(&watchdog_report_ts));
823
824	update_cpustat();
825
826	/ Reset the interval when touched by known problematic code. /
827	if (period_ts == SOFTLOCKUP_DELAY_REPORT) {
828	if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
829	/*
830	* If the time stamp was touched atomically
831	* make sure the scheduler tick is up to date.
832	*/
833	__this_cpu_write(softlockup_touch_sync, false);
834	sched_clock_tick();
835	}
836
837	update_report_ts();
838	return HRTIMER_RESTART;
839	}
840
841	/ Check for a softlockup. /
842	touch_ts = __this_cpu_read(watchdog_touch_ts);
843	duration = is_softlockup(touch_ts, period_ts, now);
844	if (unlikely(duration)) {
845	#ifdef CONFIG_SYSFS
846	++softlockup_count;
847	#endif
848
849	/*
850	* Prevent multiple soft-lockup reports if one cpu is already
851	* engaged in dumping all cpu back traces.
852	*/
853	if (softlockup_all_cpu_backtrace) {
854	if (test_and_set_bit_lock(nr: `0`, addr: &soft_lockup_nmi_warn))
855	return HRTIMER_RESTART;
856	}
857
858	/ Start period for the next softlockup warning. /
859	update_report_ts();
860
861	printk_cpu_sync_get_irqsave(flags);
862	pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
863	smp_processor_id(), duration,
864	current->comm, task_pid_nr(current));
865	report_cpu_status();
866	print_modules();
867	print_irqtrace_events(current);
868	if (regs)
869	show_regs(regs);
870	else
871	dump_stack();
872	printk_cpu_sync_put_irqrestore(flags);
873
874	if (softlockup_all_cpu_backtrace) {
875	trigger_allbutcpu_cpu_backtrace(smp_processor_id());
876	if (!softlockup_panic)
877	clear_bit_unlock(nr: `0`, addr: &soft_lockup_nmi_warn);
878	}
879
880	add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
881	sys_info(si_mask: softlockup_si_mask & ~SYS_INFO_ALL_BT);
882	if (softlockup_panic)
883	panic(fmt: "softlockup: hung tasks");
884	}
885
886	return HRTIMER_RESTART;
887	}
888
889	static void watchdog_enable(unsigned int cpu)
890	{
891	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
892	struct completion *done = this_cpu_ptr(&softlockup_completion);
893
894	WARN_ON_ONCE(cpu != smp_processor_id());
895
896	init_completion(x: done);
897	complete(done);
898
899	/*
900	* Start the timer first to prevent the hardlockup watchdog triggering
901	* before the timer has a chance to fire.
902	*/
903	hrtimer_setup(timer: hrtimer, function: watchdog_timer_fn, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL_HARD);
904	hrtimer_start(timer: hrtimer, tim: ns_to_ktime(ns: sample_period),
905	mode: HRTIMER_MODE_REL_PINNED_HARD);
906
907	/ Initialize timestamp /
908	update_touch_ts();
909	/ Enable the hardlockup detector /
910	if (watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)
911	watchdog_hardlockup_enable(cpu);
912	}
913
914	static void watchdog_disable(unsigned int cpu)
915	{
916	struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
917
918	WARN_ON_ONCE(cpu != smp_processor_id());
919
920	/*
921	* Disable the hardlockup detector first. That prevents that a large
922	* delay between disabling the timer and disabling the hardlockup
923	* detector causes a false positive.
924	*/
925	watchdog_hardlockup_disable(cpu);
926	hrtimer_cancel(timer: hrtimer);
927	wait_for_completion(this_cpu_ptr(&softlockup_completion));
928	}
929
930	static int softlockup_stop_fn(void *data)
931	{
932	watchdog_disable(smp_processor_id());
933	return `0`;
934	}
935
936	static void softlockup_stop_all(void)
937	{
938	int cpu;
939
940	if (!softlockup_initialized)
941	return;
942
943	for_each_cpu(cpu, &watchdog_allowed_mask)
944	smp_call_on_cpu(cpu, func: softlockup_stop_fn, NULL, phys: false);
945
946	cpumask_clear(dstp: &watchdog_allowed_mask);
947	}
948
949	static int softlockup_start_fn(void *data)
950	{
951	watchdog_enable(smp_processor_id());
952	return `0`;
953	}
954
955	static void softlockup_start_all(void)
956	{
957	int cpu;
958
959	cpumask_copy(dstp: &watchdog_allowed_mask, srcp: &watchdog_cpumask);
960	for_each_cpu(cpu, &watchdog_allowed_mask)
961	smp_call_on_cpu(cpu, func: softlockup_start_fn, NULL, phys: false);
962	}
963
964	int lockup_detector_online_cpu(unsigned int cpu)
965	{
966	if (cpumask_test_cpu(cpu, cpumask: &watchdog_allowed_mask))
967	watchdog_enable(cpu);
968	return `0`;
969	}
970
971	int lockup_detector_offline_cpu(unsigned int cpu)
972	{
973	if (cpumask_test_cpu(cpu, cpumask: &watchdog_allowed_mask))
974	watchdog_disable(cpu);
975	return `0`;
976	}
977
978	static void __lockup_detector_reconfigure(bool thresh_changed)
979	{
980	cpus_read_lock();
981	watchdog_hardlockup_stop();
982
983	softlockup_stop_all();
984	/*
985	* To prevent watchdog_timer_fn from using the old interval and
986	* the new watchdog_thresh at the same time, which could lead to
987	* false softlockup reports, it is necessary to update the
988	* watchdog_thresh after the softlockup is completed.
989	*/
990	if (thresh_changed)
991	watchdog_thresh = READ_ONCE(watchdog_thresh_next);
992	set_sample_period();
993	lockup_detector_update_enable();
994	if (watchdog_enabled && watchdog_thresh)
995	softlockup_start_all();
996
997	watchdog_hardlockup_start();
998	cpus_read_unlock();
999	}
1000
1001	void lockup_detector_reconfigure(void)
1002	{
1003	mutex_lock(&watchdog_mutex);
1004	__lockup_detector_reconfigure(thresh_changed: false);
1005	mutex_unlock(lock: &watchdog_mutex);
1006	}
1007
1008	/*
1009	* Create the watchdog infrastructure and configure the detector(s).
1010	*/
1011	static __init void lockup_detector_setup(void)
1012	{
1013	/*
1014	* If sysctl is off and watchdog got disabled on the command line,
1015	* nothing to do here.
1016	*/
1017	lockup_detector_update_enable();
1018
1019	if (!IS_ENABLED(CONFIG_SYSCTL) &&
1020	!(watchdog_enabled && watchdog_thresh))
1021	return;
1022
1023	mutex_lock(&watchdog_mutex);
1024	__lockup_detector_reconfigure(thresh_changed: false);
1025	softlockup_initialized = true;
1026	mutex_unlock(lock: &watchdog_mutex);
1027	}
1028
1029	#else /* CONFIG_SOFTLOCKUP_DETECTOR */
1030	static void __lockup_detector_reconfigure(bool thresh_changed)
1031	{
1032	cpus_read_lock();
1033	watchdog_hardlockup_stop();
1034	if (thresh_changed)
1035	watchdog_thresh = READ_ONCE(watchdog_thresh_next);
1036	lockup_detector_update_enable();
1037	watchdog_hardlockup_start();
1038	cpus_read_unlock();
1039	}
1040	void lockup_detector_reconfigure(void)
1041	{
1042	__lockup_detector_reconfigure(false);
1043	}
1044	static inline void lockup_detector_setup(void)
1045	{
1046	__lockup_detector_reconfigure(false);
1047	}
1048	#endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
1049
1050	/**
1051	* lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
1052	*
1053	* Special interface for parisc. It prevents lockup detector warnings from
1054	* the default pm_poweroff() function which busy loops forever.
1055	*/
1056	void lockup_detector_soft_poweroff(void)
1057	{
1058	watchdog_enabled = `0`;
1059	}
1060
1061	#ifdef CONFIG_SYSCTL
1062
1063	/ Propagate any changes to the watchdog infrastructure /
1064	static void proc_watchdog_update(bool thresh_changed)
1065	{
1066	/ Remove impossible cpus to keep sysctl output clean. /
1067	cpumask_and(dstp: &watchdog_cpumask, src1p: &watchdog_cpumask, cpu_possible_mask);
1068	__lockup_detector_reconfigure(thresh_changed);
1069	}
1070
1071	/*
1072	* common function for watchdog, nmi_watchdog and soft_watchdog parameter
1073	*
1074	* caller \| table->data points to \| 'which'
1075	* -------------------\|----------------------------------\|-------------------------------
1076	* proc_watchdog \| watchdog_user_enabled \| WATCHDOG_HARDLOCKUP_ENABLED \|
1077	* \| \| WATCHDOG_SOFTOCKUP_ENABLED
1078	* -------------------\|----------------------------------\|-------------------------------
1079	* proc_nmi_watchdog \| watchdog_hardlockup_user_enabled \| WATCHDOG_HARDLOCKUP_ENABLED
1080	* -------------------\|----------------------------------\|-------------------------------
1081	* proc_soft_watchdog \| watchdog_softlockup_user_enabled \| WATCHDOG_SOFTOCKUP_ENABLED
1082	*/
1083	static int proc_watchdog_common(int which, const struct ctl_table table, int* write,
1084	void buffer, size_t lenp, loff_t *ppos)
1085	{
1086	int err, old, *param = table->data;
1087
1088	mutex_lock(&watchdog_mutex);
1089
1090	old = *param;
1091	if (!write) {
1092	/*
1093	* On read synchronize the userspace interface. This is a
1094	* racy snapshot.
1095	*/
1096	*param = (watchdog_enabled & which) != `0`;
1097	err = proc_dointvec_minmax(table, dir: write, buffer, lenp, ppos);
1098	*param = old;
1099	} else {
1100	err = proc_dointvec_minmax(table, dir: write, buffer, lenp, ppos);
1101	if (!err && old != READ_ONCE(*param))
1102	proc_watchdog_update(thresh_changed: false);
1103	}
1104	mutex_unlock(lock: &watchdog_mutex);
1105	return err;
1106	}
1107
1108	/*
1109	* /proc/sys/kernel/watchdog
1110	*/
1111	static int proc_watchdog(const struct ctl_table table, int* write,
1112	void buffer, size_t lenp, loff_t *ppos)
1113	{
1114	return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED \|
1115	WATCHDOG_SOFTOCKUP_ENABLED,
1116	table, write, buffer, lenp, ppos);
1117	}
1118
1119	/*
1120	* /proc/sys/kernel/nmi_watchdog
1121	*/
1122	static int proc_nmi_watchdog(const struct ctl_table table, int* write,
1123	void buffer, size_t lenp, loff_t *ppos)
1124	{
1125	if (!watchdog_hardlockup_available && write)
1126	return -ENOTSUPP;
1127	return proc_watchdog_common(WATCHDOG_HARDLOCKUP_ENABLED,
1128	table, write, buffer, lenp, ppos);
1129	}
1130
1131	#ifdef CONFIG_SOFTLOCKUP_DETECTOR
1132	/*
1133	* /proc/sys/kernel/soft_watchdog
1134	*/
1135	static int proc_soft_watchdog(const struct ctl_table table, int* write,
1136	void buffer, size_t lenp, loff_t *ppos)
1137	{
1138	return proc_watchdog_common(WATCHDOG_SOFTOCKUP_ENABLED,
1139	table, write, buffer, lenp, ppos);
1140	}
1141	#endif
1142
1143	/*
1144	* /proc/sys/kernel/watchdog_thresh
1145	*/
1146	static int proc_watchdog_thresh(const struct ctl_table table, int* write,
1147	void buffer, size_t lenp, loff_t *ppos)
1148	{
1149	int err, old;
1150
1151	mutex_lock(&watchdog_mutex);
1152
1153	watchdog_thresh_next = READ_ONCE(watchdog_thresh);
1154
1155	old = watchdog_thresh_next;
1156	err = proc_dointvec_minmax(table, dir: write, buffer, lenp, ppos);
1157
1158	if (!err && write && old != READ_ONCE(watchdog_thresh_next))
1159	proc_watchdog_update(thresh_changed: true);
1160
1161	mutex_unlock(lock: &watchdog_mutex);
1162	return err;
1163	}
1164
1165	/*
1166	* The cpumask is the mask of possible cpus that the watchdog can run
1167	* on, not the mask of cpus it is actually running on. This allows the
1168	* user to specify a mask that will include cpus that have not yet
1169	* been brought online, if desired.
1170	*/
1171	static int proc_watchdog_cpumask(const struct ctl_table table, int* write,
1172	void buffer, size_t lenp, loff_t *ppos)
1173	{
1174	int err;
1175
1176	mutex_lock(&watchdog_mutex);
1177
1178	err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
1179	if (!err && write)
1180	proc_watchdog_update(thresh_changed: false);
1181
1182	mutex_unlock(lock: &watchdog_mutex);
1183	return err;
1184	}
1185
1186	static const int sixty = `60`;
1187
1188	static const struct ctl_table watchdog_sysctls[] = {
1189	{
1190	.procname = "watchdog",
1191	.data = &watchdog_user_enabled,
1192	.maxlen = sizeof(int),
1193	.mode = `0644`,
1194	.proc_handler = proc_watchdog,
1195	.extra1 = SYSCTL_ZERO,
1196	.extra2 = SYSCTL_ONE,
1197	},
1198	{
1199	.procname = "watchdog_thresh",
1200	.data = &watchdog_thresh_next,
1201	.maxlen = sizeof(int),
1202	.mode = `0644`,
1203	.proc_handler = proc_watchdog_thresh,
1204	.extra1 = SYSCTL_ZERO,
1205	.extra2 = (void *)&sixty,
1206	},
1207	{
1208	.procname = "watchdog_cpumask",
1209	.data = &watchdog_cpumask_bits,
1210	.maxlen = NR_CPUS,
1211	.mode = `0644`,
1212	.proc_handler = proc_watchdog_cpumask,
1213	},
1214	#ifdef CONFIG_SOFTLOCKUP_DETECTOR
1215	{
1216	.procname = "soft_watchdog",
1217	.data = &watchdog_softlockup_user_enabled,
1218	.maxlen = sizeof(int),
1219	.mode = `0644`,
1220	.proc_handler = proc_soft_watchdog,
1221	.extra1 = SYSCTL_ZERO,
1222	.extra2 = SYSCTL_ONE,
1223	},
1224	{
1225	.procname = "softlockup_panic",
1226	.data = &softlockup_panic,
1227	.maxlen = sizeof(int),
1228	.mode = `0644`,
1229	.proc_handler = proc_dointvec_minmax,
1230	.extra1 = SYSCTL_ZERO,
1231	.extra2 = SYSCTL_ONE,
1232	},
1233	{
1234	.procname = "softlockup_sys_info",
1235	.data = &softlockup_si_mask,
1236	.maxlen = sizeof(softlockup_si_mask),
1237	.mode = `0644`,
1238	.proc_handler = sysctl_sys_info_handler,
1239	},
1240	#ifdef CONFIG_SMP
1241	{
1242	.procname = "softlockup_all_cpu_backtrace",
1243	.data = &sysctl_softlockup_all_cpu_backtrace,
1244	.maxlen = sizeof(int),
1245	.mode = `0644`,
1246	.proc_handler = proc_dointvec_minmax,
1247	.extra1 = SYSCTL_ZERO,
1248	.extra2 = SYSCTL_ONE,
1249	},
1250	#endif /* CONFIG_SMP */
1251	#endif
1252	#ifdef CONFIG_HARDLOCKUP_DETECTOR
1253	{
1254	.procname = "hardlockup_panic",
1255	.data = &hardlockup_panic,
1256	.maxlen = sizeof(int),
1257	.mode = `0644`,
1258	.proc_handler = proc_dointvec_minmax,
1259	.extra1 = SYSCTL_ZERO,
1260	.extra2 = SYSCTL_ONE,
1261	},
1262	{
1263	.procname = "hardlockup_sys_info",
1264	.data = &hardlockup_si_mask,
1265	.maxlen = sizeof(hardlockup_si_mask),
1266	.mode = `0644`,
1267	.proc_handler = sysctl_sys_info_handler,
1268	},
1269	#ifdef CONFIG_SMP
1270	{
1271	.procname = "hardlockup_all_cpu_backtrace",
1272	.data = &sysctl_hardlockup_all_cpu_backtrace,
1273	.maxlen = sizeof(int),
1274	.mode = `0644`,
1275	.proc_handler = proc_dointvec_minmax,
1276	.extra1 = SYSCTL_ZERO,
1277	.extra2 = SYSCTL_ONE,
1278	},
1279	#endif /* CONFIG_SMP */
1280	#endif
1281	{
1282	.procname = "nmi_watchdog",
1283	.data = &watchdog_hardlockup_user_enabled,
1284	.maxlen = sizeof(int),
1285	.mode = `0644`,
1286	.proc_handler = proc_nmi_watchdog,
1287	.extra1 = SYSCTL_ZERO,
1288	.extra2 = SYSCTL_ONE,
1289	},
1290	};
1291
1292	static void __init watchdog_sysctl_init(void)
1293	{
1294	register_sysctl_init("kernel", watchdog_sysctls);
1295	}
1296
1297	#else
1298	#define watchdog_sysctl_init() do { } while (0)
1299	#endif /* CONFIG_SYSCTL */
1300
1301	static void __init lockup_detector_delay_init(struct work_struct *work);
1302	static bool allow_lockup_detector_init_retry __initdata;
1303
1304	static struct work_struct detector_work __initdata =
1305	__WORK_INITIALIZER(detector_work, lockup_detector_delay_init);
1306
1307	static void __init lockup_detector_delay_init(struct work_struct *work)
1308	{
1309	int ret;
1310
1311	ret = watchdog_hardlockup_probe();
1312	if (ret) {
1313	if (ret == -ENODEV)
1314	pr_info("NMI not fully supported\n");
1315	else
1316	pr_info("Delayed init of the lockup detector failed: %d\n", ret);
1317	pr_info("Hard watchdog permanently disabled\n");
1318	return;
1319	}
1320
1321	allow_lockup_detector_init_retry = false;
1322
1323	watchdog_hardlockup_available = true;
1324	lockup_detector_setup();
1325	}
1326
1327	/*
1328	* lockup_detector_retry_init - retry init lockup detector if possible.
1329	*
1330	* Retry hardlockup detector init. It is useful when it requires some
1331	* functionality that has to be initialized later on a particular
1332	* platform.
1333	*/
1334	void __init lockup_detector_retry_init(void)
1335	{
1336	/ Must be called before late init calls /
1337	if (!allow_lockup_detector_init_retry)
1338	return;
1339
1340	schedule_work(work: &detector_work);
1341	}
1342
1343	/*
1344	* Ensure that optional delayed hardlockup init is proceed before
1345	* the init code and memory is freed.
1346	*/
1347	static int __init lockup_detector_check(void)
1348	{
1349	/ Prevent any later retry. /
1350	allow_lockup_detector_init_retry = false;
1351
1352	/ Make sure no work is pending. /
1353	flush_work(work: &detector_work);
1354
1355	watchdog_sysctl_init();
1356
1357	return `0`;
1358
1359	}
1360	late_initcall_sync(lockup_detector_check);
1361
1362	void __init lockup_detector_init(void)
1363	{
1364	if (tick_nohz_full_enabled())
1365	pr_info("Disabling watchdog on nohz_full cores by default\n");
1366
1367	cpumask_copy(dstp: &watchdog_cpumask,
1368	srcp: housekeeping_cpumask(type: HK_TYPE_TIMER));
1369
1370	if (!watchdog_hardlockup_probe())
1371	watchdog_hardlockup_available = true;
1372	else
1373	allow_lockup_detector_init_retry = true;
1374
1375	lockup_detector_setup();
1376	}
1377

source code of linux/kernel/watchdog.c