smp.c source code [linux/arch/powerpc/kernel/smp.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* SMP support for ppc.
4	*
5	* Written by Cort Dougan (cort@cs.nmt.edu) borrowing a great
6	* deal of code from the sparc and intel versions.
7	*
8	* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
9	*
10	* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
11	* Mike Corrigan {engebret\|bergner\|mikec}@us.ibm.com
12	*/
13
14	#undef DEBUG
15
16	#include <linux/kernel.h>
17	#include <linux/export.h>
18	#include <linux/sched/mm.h>
19	#include <linux/sched/task_stack.h>
20	#include <linux/sched/topology.h>
21	#include <linux/smp.h>
22	#include <linux/interrupt.h>
23	#include <linux/delay.h>
24	#include <linux/init.h>
25	#include <linux/spinlock.h>
26	#include <linux/cache.h>
27	#include <linux/err.h>
28	#include <linux/device.h>
29	#include <linux/cpu.h>
30	#include <linux/notifier.h>
31	#include <linux/topology.h>
32	#include <linux/profile.h>
33	#include <linux/processor.h>
34	#include <linux/random.h>
35	#include <linux/stackprotector.h>
36	#include <linux/pgtable.h>
37	#include <linux/clockchips.h>
38	#include <linux/kexec.h>
39
40	#include <asm/ptrace.h>
41	#include <linux/atomic.h>
42	#include <asm/irq.h>
43	#include <asm/hw_irq.h>
44	#include <asm/kvm_ppc.h>
45	#include <asm/dbell.h>
46	#include <asm/page.h>
47	#include <asm/smp.h>
48	#include <asm/time.h>
49	#include <asm/machdep.h>
50	#include <asm/mmu_context.h>
51	#include <asm/cputhreads.h>
52	#include <asm/cputable.h>
53	#include <asm/mpic.h>
54	#include <asm/vdso_datapage.h>
55	#ifdef CONFIG_PPC64
56	#include <asm/paca.h>
57	#endif
58	#include <asm/vdso.h>
59	#include <asm/debug.h>
60	#include <asm/cpu_has_feature.h>
61	#include <asm/ftrace.h>
62	#include <asm/kup.h>
63	#include <asm/fadump.h>
64	#include <asm/systemcfg.h>
65
66	#include <trace/events/ipi.h>
67
68	#ifdef DEBUG
69	#include <asm/udbg.h>
70	#define DBG(fmt...) udbg_printf(fmt)
71	#else
72	#define DBG(fmt...)
73	#endif
74
75	#ifdef CONFIG_HOTPLUG_CPU
76	/ State of each CPU during hotplug phases /
77	static DEFINE_PER_CPU(int, cpu_state) = { `0` };
78	#endif
79
80	struct task_struct *secondary_current;
81	bool has_big_cores __ro_after_init;
82	bool coregroup_enabled __ro_after_init;
83	bool thread_group_shares_l2 __ro_after_init;
84	bool thread_group_shares_l3 __ro_after_init;
85
86	DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
87	DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
88	DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
89	DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
90	static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
91
92	EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
93	EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
94	EXPORT_PER_CPU_SYMBOL(cpu_core_map);
95	EXPORT_SYMBOL_GPL(has_big_cores);
96
97	#define MAX_THREAD_LIST_SIZE 8
98	#define THREAD_GROUP_SHARE_L1 1
99	#define THREAD_GROUP_SHARE_L2_L3 2
100	struct thread_groups {
101	unsigned int property;
102	unsigned int nr_groups;
103	unsigned int threads_per_group;
104	unsigned int thread_list[MAX_THREAD_LIST_SIZE];
105	};
106
107	/ Maximum number of properties that groups of threads within a core can share /
108	#define MAX_THREAD_GROUP_PROPERTIES 2
109
110	struct thread_groups_list {
111	unsigned int nr_properties;
112	struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
113	};
114
115	static struct thread_groups_list tgl[NR_CPUS] __initdata;
116	/*
117	* On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
118	* the set its siblings that share the L1-cache.
119	*/
120	DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
121
122	/*
123	* On some big-cores system, thread_group_l2_cache_map for each CPU
124	* corresponds to the set its siblings within the core that share the
125	* L2-cache.
126	*/
127	DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
128
129	/*
130	* On P10, thread_group_l3_cache_map for each CPU is equal to the
131	* thread_group_l2_cache_map
132	*/
133	DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
134
135	/ SMP operations for this machine /
136	struct smp_ops_t *smp_ops;
137
138	/ Can't be static due to PowerMac hackery /
139	volatile unsigned int cpu_callin_map[NR_CPUS];
140
141	int smt_enabled_at_boot = `1`;
142
143	/*
144	* Returns 1 if the specified cpu should be brought up during boot.
145	* Used to inhibit booting threads if they've been disabled or
146	* limited on the command line
147	*/
148	int smp_generic_cpu_bootable(unsigned int nr)
149	{
150	/ Special case - we inhibit secondary thread startup*
151	* during boot if the user requests it.
152	*/
153	if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
154	if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != `0`)
155	return `0`;
156	if (smt_enabled_at_boot
157	&& cpu_thread_in_core(nr) >= smt_enabled_at_boot)
158	return `0`;
159	}
160
161	return `1`;
162	}
163
164
165	#ifdef CONFIG_PPC64
166	int smp_generic_kick_cpu(int nr)
167	{
168	if (nr < `0` \|\| nr >= nr_cpu_ids)
169	return -EINVAL;
170
171	/*
172	* The processor is currently spinning, waiting for the
173	* cpu_start field to become non-zero After we set cpu_start,
174	* the processor will continue on to secondary_start
175	*/
176	if (!paca_ptrs[nr]->cpu_start) {
177	paca_ptrs[nr]->cpu_start = `1`;
178	smp_mb();
179	return `0`;
180	}
181
182	#ifdef CONFIG_HOTPLUG_CPU
183	/*
184	* Ok it's not there, so it might be soft-unplugged, let's
185	* try to bring it back
186	*/
187	generic_set_cpu_up(nr);
188	smp_wmb();
189	smp_send_reschedule(nr);
190	#endif /* CONFIG_HOTPLUG_CPU */
191
192	return `0`;
193	}
194	#endif /* CONFIG_PPC64 */
195
196	static irqreturn_t call_function_action(int irq, void *data)
197	{
198	generic_smp_call_function_interrupt();
199	return IRQ_HANDLED;
200	}
201
202	static irqreturn_t reschedule_action(int irq, void *data)
203	{
204	scheduler_ipi();
205	return IRQ_HANDLED;
206	}
207
208	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
209	static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
210	{
211	timer_broadcast_interrupt();
212	return IRQ_HANDLED;
213	}
214	#endif
215
216	#ifdef CONFIG_NMI_IPI
217	static irqreturn_t nmi_ipi_action(int irq, void *data)
218	{
219	smp_handle_nmi_ipi(get_irq_regs());
220	return IRQ_HANDLED;
221	}
222	#endif
223
224	static irq_handler_t smp_ipi_action[] = {
225	[PPC_MSG_CALL_FUNCTION] = call_function_action,
226	[PPC_MSG_RESCHEDULE] = reschedule_action,
227	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
228	[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
229	#endif
230	#ifdef CONFIG_NMI_IPI
231	[PPC_MSG_NMI_IPI] = nmi_ipi_action,
232	#endif
233	};
234
235	/*
236	* The NMI IPI is a fallback and not truly non-maskable. It is simpler
237	* than going through the call function infrastructure, and strongly
238	* serialized, so it is more appropriate for debugging.
239	*/
240	const char *smp_ipi_name[] = {
241	[PPC_MSG_CALL_FUNCTION] = "ipi call function",
242	[PPC_MSG_RESCHEDULE] = "ipi reschedule",
243	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
244	[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
245	#endif
246	#ifdef CONFIG_NMI_IPI
247	[PPC_MSG_NMI_IPI] = "nmi ipi",
248	#endif
249	};
250
251	/ optional function to request ipi, for controllers with >= 4 ipis /
252	int smp_request_message_ipi(int virq, int msg)
253	{
254	int err;
255
256	if (msg < `0` \|\| msg > PPC_MSG_NMI_IPI)
257	return -EINVAL;
258	#ifndef CONFIG_NMI_IPI
259	if (msg == PPC_MSG_NMI_IPI)
260	return `1`;
261	#endif
262
263	err = request_irq(irq: virq, handler: smp_ipi_action[msg],
264	IRQF_PERCPU \| IRQF_NO_THREAD \| IRQF_NO_SUSPEND,
265	name: smp_ipi_name[msg], NULL);
266	WARN(err < `0`, "unable to request_irq %d for %s (rc %d)\n",
267	virq, smp_ipi_name[msg], err);
268
269	return err;
270	}
271
272	#ifdef CONFIG_PPC_SMP_MUXED_IPI
273	struct cpu_messages {
274	long messages; / current messages /
275	};
276	static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
277
278	void smp_muxed_ipi_set_message(int cpu, int msg)
279	{
280	struct cpu_messages *info = &per_cpu(ipi_message, cpu);
281	char message = (char* *)&info->messages;
282
283	/*
284	* Order previous accesses before accesses in the IPI handler.
285	*/
286	smp_mb();
287	WRITE_ONCE(message[msg], `1`);
288	}
289
290	void smp_muxed_ipi_message_pass(int cpu, int msg)
291	{
292	smp_muxed_ipi_set_message(cpu, msg);
293
294	/*
295	* cause_ipi functions are required to include a full barrier
296	* before doing whatever causes the IPI.
297	*/
298	smp_ops->cause_ipi(cpu);
299	}
300
301	#ifdef __BIG_ENDIAN__
302	#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
303	#else
304	#define IPI_MESSAGE(A) (1uL << (8 * (A)))
305	#endif
306
307	irqreturn_t smp_ipi_demux(void)
308	{
309	mb(); / order any irq clear /
310
311	return smp_ipi_demux_relaxed();
312	}
313
314	/ sync-free variant. Callers should ensure synchronization /
315	irqreturn_t smp_ipi_demux_relaxed(void)
316	{
317	struct cpu_messages *info;
318	unsigned long all;
319
320	info = this_cpu_ptr(&ipi_message);
321	do {
322	all = xchg(&info->messages, `0`);
323	#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
324	/*
325	* Must check for PPC_MSG_RM_HOST_ACTION messages
326	* before PPC_MSG_CALL_FUNCTION messages because when
327	* a VM is destroyed, we call kick_all_cpus_sync()
328	* to ensure that any pending PPC_MSG_RM_HOST_ACTION
329	* messages have completed before we free any VCPUs.
330	*/
331	if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
332	kvmppc_xics_ipi_action();
333	#endif
334	if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
335	generic_smp_call_function_interrupt();
336	if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
337	scheduler_ipi();
338	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
339	if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
340	timer_broadcast_interrupt();
341	#endif
342	#ifdef CONFIG_NMI_IPI
343	if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
344	nmi_ipi_action(`0`, NULL);
345	#endif
346	} while (READ_ONCE(info->messages));
347
348	return IRQ_HANDLED;
349	}
350	#endif /* CONFIG_PPC_SMP_MUXED_IPI */
351
352	static inline void do_message_pass(int cpu, int msg)
353	{
354	if (smp_ops->message_pass)
355	smp_ops->message_pass(cpu, msg);
356	#ifdef CONFIG_PPC_SMP_MUXED_IPI
357	else
358	smp_muxed_ipi_message_pass(cpu, msg);
359	#endif
360	}
361
362	void arch_smp_send_reschedule(int cpu)
363	{
364	if (likely(smp_ops))
365	do_message_pass(cpu, msg: PPC_MSG_RESCHEDULE);
366	}
367	EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
368
369	void arch_send_call_function_single_ipi(int cpu)
370	{
371	do_message_pass(cpu, msg: PPC_MSG_CALL_FUNCTION);
372	}
373
374	void arch_send_call_function_ipi_mask(const struct cpumask *mask)
375	{
376	unsigned int cpu;
377
378	for_each_cpu(cpu, mask)
379	do_message_pass(cpu, msg: PPC_MSG_CALL_FUNCTION);
380	}
381
382	#ifdef CONFIG_NMI_IPI
383
384	/*
385	* "NMI IPI" system.
386	*
387	* NMI IPIs may not be recoverable, so should not be used as ongoing part of
388	* a running system. They can be used for crash, debug, halt/reboot, etc.
389	*
390	* The IPI call waits with interrupts disabled until all targets enter the
391	* NMI handler, then returns. Subsequent IPIs can be issued before targets
392	* have returned from their handlers, so there is no guarantee about
393	* concurrency or re-entrancy.
394	*
395	* A new NMI can be issued before all targets exit the handler.
396	*
397	* The IPI call may time out without all targets entering the NMI handler.
398	* In that case, there is some logic to recover (and ignore subsequent
399	* NMI interrupts that may eventually be raised), but the platform interrupt
400	* handler may not be able to distinguish this from other exception causes,
401	* which may cause a crash.
402	*/
403
404	static atomic_t __nmi_ipi_lock = ATOMIC_INIT(`0`);
405	static struct cpumask nmi_ipi_pending_mask;
406	static bool nmi_ipi_busy = false;
407	static void (nmi_ipi_function)(struct* pt_regs *) = NULL;
408
409	noinstr static void nmi_ipi_lock_start(unsigned long *flags)
410	{
411	raw_local_irq_save(*flags);
412	hard_irq_disable();
413	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, `0`, `1`) == `1`) {
414	raw_local_irq_restore(*flags);
415	spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == `0`);
416	raw_local_irq_save(*flags);
417	hard_irq_disable();
418	}
419	}
420
421	noinstr static void nmi_ipi_lock(void)
422	{
423	while (raw_atomic_cmpxchg(&__nmi_ipi_lock, `0`, `1`) == `1`)
424	spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == `0`);
425	}
426
427	noinstr static void nmi_ipi_unlock(void)
428	{
429	smp_mb();
430	WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != `1`);
431	raw_atomic_set(&__nmi_ipi_lock, `0`);
432	}
433
434	noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
435	{
436	nmi_ipi_unlock();
437	raw_local_irq_restore(*flags);
438	}
439
440	/*
441	* Platform NMI handler calls this to ack
442	*/
443	noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
444	{
445	void (fn)(struct* pt_regs *) = NULL;
446	unsigned long flags;
447	int me = raw_smp_processor_id();
448	int ret = `0`;
449
450	/*
451	* Unexpected NMIs are possible here because the interrupt may not
452	* be able to distinguish NMI IPIs from other types of NMIs, or
453	* because the caller may have timed out.
454	*/
455	nmi_ipi_lock_start(&flags);
456	if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
457	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
458	fn = READ_ONCE(nmi_ipi_function);
459	WARN_ON_ONCE(!fn);
460	ret = `1`;
461	}
462	nmi_ipi_unlock_end(&flags);
463
464	if (fn)
465	fn(regs);
466
467	return ret;
468	}
469
470	static void do_smp_send_nmi_ipi(int cpu, bool safe)
471	{
472	if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
473	return;
474
475	if (cpu >= `0`) {
476	do_message_pass(cpu, PPC_MSG_NMI_IPI);
477	} else {
478	int c;
479
480	for_each_online_cpu(c) {
481	if (c == raw_smp_processor_id())
482	continue;
483	do_message_pass(c, PPC_MSG_NMI_IPI);
484	}
485	}
486	}
487
488	/*
489	* - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
490	* - fn is the target callback function.
491	* - delay_us > 0 is the delay before giving up waiting for targets to
492	* begin executing the handler, == 0 specifies indefinite delay.
493	*/
494	static int __smp_send_nmi_ipi(int cpu, void (fn)(struct* pt_regs *),
495	u64 delay_us, bool safe)
496	{
497	unsigned long flags;
498	int me = raw_smp_processor_id();
499	int ret = `1`;
500
501	BUG_ON(cpu == me);
502	BUG_ON(cpu < `0` && cpu != NMI_IPI_ALL_OTHERS);
503
504	if (unlikely(!smp_ops))
505	return `0`;
506
507	nmi_ipi_lock_start(&flags);
508	while (nmi_ipi_busy) {
509	nmi_ipi_unlock_end(&flags);
510	spin_until_cond(!nmi_ipi_busy);
511	nmi_ipi_lock_start(&flags);
512	}
513	nmi_ipi_busy = true;
514	nmi_ipi_function = fn;
515
516	WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
517
518	if (cpu < `0`) {
519	/ ALL_OTHERS /
520	cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
521	cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
522	} else {
523	cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
524	}
525
526	nmi_ipi_unlock();
527
528	/ Interrupts remain hard disabled /
529
530	do_smp_send_nmi_ipi(cpu, safe);
531
532	nmi_ipi_lock();
533	/ nmi_ipi_busy is set here, so unlock/lock is okay /
534	while (!cpumask_empty(&nmi_ipi_pending_mask)) {
535	nmi_ipi_unlock();
536	udelay(`1`);
537	nmi_ipi_lock();
538	if (delay_us) {
539	delay_us--;
540	if (!delay_us)
541	break;
542	}
543	}
544
545	if (!cpumask_empty(&nmi_ipi_pending_mask)) {
546	/ Timeout waiting for CPUs to call smp_handle_nmi_ipi /
547	ret = `0`;
548	cpumask_clear(&nmi_ipi_pending_mask);
549	}
550
551	nmi_ipi_function = NULL;
552	nmi_ipi_busy = false;
553
554	nmi_ipi_unlock_end(&flags);
555
556	return ret;
557	}
558
559	int smp_send_nmi_ipi(int cpu, void (fn)(struct* pt_regs *), u64 delay_us)
560	{
561	return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
562	}
563
564	int smp_send_safe_nmi_ipi(int cpu, void (fn)(struct* pt_regs *), u64 delay_us)
565	{
566	return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
567	}
568	#endif /* CONFIG_NMI_IPI */
569
570	#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
571	void tick_broadcast(const struct cpumask *mask)
572	{
573	unsigned int cpu;
574
575	for_each_cpu(cpu, mask)
576	do_message_pass(cpu, PPC_MSG_TICK_BROADCAST);
577	}
578	#endif
579
580	#ifdef CONFIG_DEBUGGER
581	static void debugger_ipi_callback(struct pt_regs *regs)
582	{
583	debugger_ipi(regs);
584	}
585
586	void smp_send_debugger_break(void)
587	{
588	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, `1000000`);
589	}
590	#endif
591
592	#ifdef CONFIG_CRASH_DUMP
593	void crash_send_ipi(void (crash_ipi_callback)(struct* pt_regs *))
594	{
595	int cpu;
596
597	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, `1000000`);
598	if (kdump_in_progress() && crash_wake_offline) {
599	for_each_present_cpu(cpu) {
600	if (cpu_online(cpu))
601	continue;
602	/*
603	* crash_ipi_callback will wait for
604	* all cpus, including offline CPUs.
605	* We don't care about nmi_ipi_function.
606	* Offline cpus will jump straight into
607	* crash_ipi_callback, we can skip the
608	* entire NMI dance and waiting for
609	* cpus to clear pending mask, etc.
610	*/
611	do_smp_send_nmi_ipi(cpu, false);
612	}
613	}
614	}
615	#endif
616
617	void crash_smp_send_stop(void)
618	{
619	static bool stopped = false;
620
621	/*
622	* In case of fadump, register data for all CPUs is captured by f/w
623	* on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
624	* this rtas call to avoid tricky post processing of those CPUs'
625	* backtraces.
626	*/
627	if (should_fadump_crash())
628	return;
629
630	if (stopped)
631	return;
632
633	stopped = true;
634
635	#ifdef CONFIG_CRASH_DUMP
636	if (kexec_crash_image) {
637	crash_kexec_prepare();
638	return;
639	}
640	#endif
641
642	smp_send_stop();
643	}
644
645	#ifdef CONFIG_NMI_IPI
646	static void nmi_stop_this_cpu(struct pt_regs *regs)
647	{
648	/*
649	* IRQs are already hard disabled by the smp_handle_nmi_ipi.
650	*/
651	set_cpu_online(smp_processor_id(), false);
652
653	spin_begin();
654	while (`1`)
655	spin_cpu_relax();
656	}
657
658	void smp_send_stop(void)
659	{
660	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, `1000000`);
661	}
662
663	#else /* CONFIG_NMI_IPI */
664
665	static void stop_this_cpu(void *dummy)
666	{
667	hard_irq_disable();
668
669	/*
670	* Offlining CPUs in stop_this_cpu can result in scheduler warnings,
671	* (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
672	* to know other CPUs are offline before it breaks locks to flush
673	* printk buffers, in case we panic()ed while holding the lock.
674	*/
675	set_cpu_online(smp_processor_id(), online: false);
676
677	spin_begin();
678	while (`1`)
679	spin_cpu_relax();
680	}
681
682	void smp_send_stop(void)
683	{
684	static bool stopped = false;
685
686	/*
687	* Prevent waiting on csd lock from a previous smp_send_stop.
688	* This is racy, but in general callers try to do the right
689	* thing and only fire off one smp_send_stop (e.g., see
690	* kernel/panic.c)
691	*/
692	if (stopped)
693	return;
694
695	stopped = true;
696
697	smp_call_function(func: stop_this_cpu, NULL, wait: `0`);
698	}
699	#endif /* CONFIG_NMI_IPI */
700
701	static struct task_struct *current_set[NR_CPUS];
702
703	static void smp_store_cpu_info(int id)
704	{
705	per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
706	#ifdef CONFIG_PPC_E500
707	per_cpu(next_tlbcam_idx, id)
708	= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - `1`;
709	#endif
710	}
711
712	/*
713	* Relationships between CPUs are maintained in a set of per-cpu cpumasks so
714	* rather than just passing around the cpumask we pass around a function that
715	* returns the that cpumask for the given CPU.
716	*/
717	static void set_cpus_related(int i, int j, struct cpumask (get_cpumask)(int))
718	{
719	cpumask_set_cpu(cpu: i, dstp: get_cpumask(j));
720	cpumask_set_cpu(cpu: j, dstp: get_cpumask(i));
721	}
722
723	#ifdef CONFIG_HOTPLUG_CPU
724	static void set_cpus_unrelated(int i, int j,
725	struct cpumask (get_cpumask)(int))
726	{
727	cpumask_clear_cpu(cpu: i, dstp: get_cpumask(j));
728	cpumask_clear_cpu(cpu: j, dstp: get_cpumask(i));
729	}
730	#endif
731
732	/*
733	* Extends set_cpus_related. Instead of setting one CPU at a time in
734	* dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
735	*/
736	static void or_cpumasks_related(int i, int j, struct cpumask (srcmask)(int),
737	struct cpumask (dstmask)(int))
738	{
739	struct cpumask *mask;
740	int k;
741
742	mask = srcmask(j);
743	for_each_cpu(k, srcmask(i))
744	cpumask_or(dstp: dstmask(k), src1p: dstmask(k), src2p: mask);
745
746	if (i == j)
747	return;
748
749	mask = srcmask(i);
750	for_each_cpu(k, srcmask(j))
751	cpumask_or(dstp: dstmask(k), src1p: dstmask(k), src2p: mask);
752	}
753
754	/*
755	* parse_thread_groups: Parses the "ibm,thread-groups" device tree
756	* property for the CPU device node @dn and stores
757	* the parsed output in the thread_groups_list
758	* structure @tglp.
759	*
760	* @dn: The device node of the CPU device.
761	* @tglp: Pointer to a thread group list structure into which the parsed
762	* output of "ibm,thread-groups" is stored.
763	*
764	* ibm,thread-groups[0..N-1] array defines which group of threads in
765	* the CPU-device node can be grouped together based on the property.
766	*
767	* This array can represent thread groupings for multiple properties.
768	*
769	* ibm,thread-groups[i + 0] tells us the property based on which the
770	* threads are being grouped together. If this value is 1, it implies
771	* that the threads in the same group share L1, translation cache. If
772	* the value is 2, it implies that the threads in the same group share
773	* the same L2 cache.
774	*
775	* ibm,thread-groups[i+1] tells us how many such thread groups exist for the
776	* property ibm,thread-groups[i]
777	*
778	* ibm,thread-groups[i+2] tells us the number of threads in each such
779	* group.
780	* Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
781	*
782	* ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
783	* "ibm,ppc-interrupt-server#s" arranged as per their membership in
784	* the grouping.
785	*
786	* Example:
787	* If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
788	* This can be decomposed up into two consecutive arrays:
789	* a) [1,2,4,8,10,12,14,9,11,13,15]
790	* b) [2,2,4,8,10,12,14,9,11,13,15]
791	*
792	* where in,
793	*
794	* a) provides information of Property "1" being shared by "2" groups,
795	* each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
796	* the first group is {8,10,12,14} and the
797	* "ibm,ppc-interrupt-server#s" of the second group is
798	* {9,11,13,15}. Property "1" is indicative of the thread in the
799	* group sharing L1 cache, translation cache and Instruction Data
800	* flow.
801	*
802	* b) provides information of Property "2" being shared by "2" groups,
803	* each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
804	* the first group is {8,10,12,14} and the
805	* "ibm,ppc-interrupt-server#s" of the second group is
806	* {9,11,13,15}. Property "2" indicates that the threads in each
807	* group share the L2-cache.
808	*
809	* Returns 0 on success, -EINVAL if the property does not exist,
810	* -ENODATA if property does not have a value, and -EOVERFLOW if the
811	* property data isn't large enough.
812	*/
813	static int parse_thread_groups(struct device_node *dn,
814	struct thread_groups_list *tglp)
815	{
816	unsigned int property_idx = `0`;
817	u32 *thread_group_array;
818	size_t total_threads;
819	int ret = `0`, count;
820	u32 *thread_list;
821	int i = `0`;
822
823	count = of_property_count_u32_elems(np: dn, propname: "ibm,thread-groups");
824	thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
825	ret = of_property_read_u32_array(np: dn, propname: "ibm,thread-groups",
826	out_values: thread_group_array, sz: count);
827	if (ret)
828	goto out_free;
829
830	while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
831	int j;
832	struct thread_groups *tg = &tglp->property_tgs[property_idx++];
833
834	tg->property = thread_group_array[i];
835	tg->nr_groups = thread_group_array[i + `1`];
836	tg->threads_per_group = thread_group_array[i + `2`];
837	total_threads = tg->nr_groups * tg->threads_per_group;
838
839	thread_list = &thread_group_array[i + `3`];
840
841	for (j = `0`; j < total_threads; j++)
842	tg->thread_list[j] = thread_list[j];
843	i = i + `3` + total_threads;
844	}
845
846	tglp->nr_properties = property_idx;
847
848	out_free:
849	kfree(objp: thread_group_array);
850	return ret;
851	}
852
853	/*
854	* get_cpu_thread_group_start : Searches the thread group in tg->thread_list
855	* that @cpu belongs to.
856	*
857	* @cpu : The logical CPU whose thread group is being searched.
858	* @tg : The thread-group structure of the CPU node which @cpu belongs
859	* to.
860	*
861	* Returns the index to tg->thread_list that points to the start
862	* of the thread_group that @cpu belongs to.
863	*
864	* Returns -1 if cpu doesn't belong to any of the groups pointed to by
865	* tg->thread_list.
866	*/
867	static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
868	{
869	int hw_cpu_id = get_hard_smp_processor_id(cpu);
870	int i, j;
871
872	for (i = `0`; i < tg->nr_groups; i++) {
873	int group_start = i * tg->threads_per_group;
874
875	for (j = `0`; j < tg->threads_per_group; j++) {
876	int idx = group_start + j;
877
878	if (tg->thread_list[idx] == hw_cpu_id)
879	return group_start;
880	}
881	}
882
883	return -`1`;
884	}
885
886	static struct thread_groups __init get_thread_groups(int* cpu,
887	int group_property,
888	int *err)
889	{
890	struct device_node *dn = of_get_cpu_node(cpu, NULL);
891	struct thread_groups_list *cpu_tgl = &tgl[cpu];
892	struct thread_groups *tg = NULL;
893	int i;
894	*err = `0`;
895
896	if (!dn) {
897	*err = -ENODATA;
898	return NULL;
899	}
900
901	if (!cpu_tgl->nr_properties) {
902	*err = parse_thread_groups(dn, tglp: cpu_tgl);
903	if (*err)
904	goto out;
905	}
906
907	for (i = `0`; i < cpu_tgl->nr_properties; i++) {
908	if (cpu_tgl->property_tgs[i].property == group_property) {
909	tg = &cpu_tgl->property_tgs[i];
910	break;
911	}
912	}
913
914	if (!tg)
915	*err = -EINVAL;
916	out:
917	of_node_put(node: dn);
918	return tg;
919	}
920
921	static int __init update_mask_from_threadgroup(cpumask_var_t mask, struct* thread_groups *tg,
922	int cpu, int cpu_group_start)
923	{
924	int first_thread = cpu_first_thread_sibling(cpu);
925	int i;
926
927	zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
928
929	for (i = first_thread; i < first_thread + threads_per_core; i++) {
930	int i_group_start = get_cpu_thread_group_start(cpu: i, tg);
931
932	if (unlikely(i_group_start == -`1`)) {
933	WARN_ON_ONCE(`1`);
934	return -ENODATA;
935	}
936
937	if (i_group_start == cpu_group_start)
938	cpumask_set_cpu(cpu: i, dstp: *mask);
939	}
940
941	return `0`;
942	}
943
944	static int __init init_thread_group_cache_map(int cpu, int cache_property)
945
946	{
947	int cpu_group_start = -`1`, err = `0`;
948	struct thread_groups *tg = NULL;
949	cpumask_var_t *mask = NULL;
950
951	if (cache_property != THREAD_GROUP_SHARE_L1 &&
952	cache_property != THREAD_GROUP_SHARE_L2_L3)
953	return -EINVAL;
954
955	tg = get_thread_groups(cpu, group_property: cache_property, err: &err);
956
957	if (!tg)
958	return err;
959
960	cpu_group_start = get_cpu_thread_group_start(cpu, tg);
961
962	if (unlikely(cpu_group_start == -`1`)) {
963	WARN_ON_ONCE(`1`);
964	return -ENODATA;
965	}
966
967	if (cache_property == THREAD_GROUP_SHARE_L1) {
968	mask = &per_cpu(thread_group_l1_cache_map, cpu);
969	update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
970	}
971	else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
972	mask = &per_cpu(thread_group_l2_cache_map, cpu);
973	update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
974	mask = &per_cpu(thread_group_l3_cache_map, cpu);
975	update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
976	}
977
978
979	return `0`;
980	}
981
982	static bool shared_caches __ro_after_init;
983
984	#ifdef CONFIG_SCHED_SMT
985	/ cpumask of CPUs with asymmetric SMT dependency /
986	static int powerpc_smt_flags(void)
987	{
988	int flags = SD_SHARE_CPUCAPACITY \| SD_SHARE_LLC;
989
990	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
991	printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
992	flags \|= SD_ASYM_PACKING;
993	}
994	return flags;
995	}
996	#endif
997
998	/*
999	* On shared processor LPARs scheduled on a big core (which has two or more
1000	* independent thread groups per core), prefer lower numbered CPUs, so
1001	* that workload consolidates to lesser number of cores.
1002	*/
1003	static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
1004
1005	/*
1006	* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
1007	* This topology makes it much cheaper to migrate tasks between adjacent cores
1008	* since the migrated task remains cache hot. We want to take advantage of this
1009	* at the scheduler level so an extra topology level is required.
1010	*/
1011	static int powerpc_shared_cache_flags(void)
1012	{
1013	if (static_branch_unlikely(&splpar_asym_pack))
1014	return SD_SHARE_LLC \| SD_ASYM_PACKING;
1015
1016	return SD_SHARE_LLC;
1017	}
1018
1019	static int powerpc_shared_proc_flags(void)
1020	{
1021	if (static_branch_unlikely(&splpar_asym_pack))
1022	return SD_ASYM_PACKING;
1023
1024	return `0`;
1025	}
1026
1027	/*
1028	* We can't just pass cpu_l2_cache_mask() directly because
1029	* returns a non-const pointer and the compiler barfs on that.
1030	*/
1031	static const struct cpumask tl_cache_mask(struct* sched_domain_topology_level tl, int* cpu)
1032	{
1033	return per_cpu(cpu_l2_cache_map, cpu);
1034	}
1035
1036	#ifdef CONFIG_SCHED_SMT
1037	static const struct cpumask tl_smallcore_smt_mask(struct* sched_domain_topology_level tl, int* cpu)
1038	{
1039	return cpu_smallcore_mask(cpu);
1040	}
1041	#endif
1042
1043	struct cpumask cpu_coregroup_mask(int* cpu)
1044	{
1045	return per_cpu(cpu_coregroup_map, cpu);
1046	}
1047
1048	static bool has_coregroup_support(void)
1049	{
1050	/ Coregroup identification not available on shared systems /
1051	if (is_shared_processor())
1052	return `0`;
1053
1054	return coregroup_enabled;
1055	}
1056
1057	static int __init init_big_cores(void)
1058	{
1059	int cpu;
1060
1061	for_each_possible_cpu(cpu) {
1062	int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
1063
1064	if (err)
1065	return err;
1066
1067	zalloc_cpumask_var_node(mask: &per_cpu(cpu_smallcore_map, cpu),
1068	GFP_KERNEL,
1069	cpu_to_node(cpu));
1070	}
1071
1072	has_big_cores = true;
1073
1074	for_each_possible_cpu(cpu) {
1075	int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
1076
1077	if (err)
1078	return err;
1079	}
1080
1081	thread_group_shares_l2 = true;
1082	thread_group_shares_l3 = true;
1083	pr_debug("L2/L3 cache only shared by the threads in the small core\n");
1084
1085	return `0`;
1086	}
1087
1088	/*
1089	* die_mask and die_id are only available on systems which support
1090	* multiple coregroups within a same package. On all other systems, die_mask
1091	* would be same as package mask and die_id would be set to -1.
1092	*/
1093	const struct cpumask cpu_die_mask(int* cpu)
1094	{
1095	if (has_coregroup_support())
1096	return per_cpu(cpu_coregroup_map, cpu);
1097	else
1098	return cpu_node_mask(cpu);
1099	}
1100	EXPORT_SYMBOL_GPL(cpu_die_mask);
1101
1102	int cpu_die_id(int cpu)
1103	{
1104	if (has_coregroup_support())
1105	return cpu_to_coregroup_id(cpu);
1106	else
1107	return -`1`;
1108	}
1109	EXPORT_SYMBOL_GPL(cpu_die_id);
1110
1111	void __init smp_prepare_cpus(unsigned int max_cpus)
1112	{
1113	unsigned int cpu, num_threads;
1114
1115	DBG("smp_prepare_cpus\n");
1116
1117	/*
1118	* setup_cpu may need to be called on the boot cpu. We haven't
1119	* spun any cpus up but lets be paranoid.
1120	*/
1121	BUG_ON(boot_cpuid != smp_processor_id());
1122
1123	/ Fixup boot cpu /
1124	smp_store_cpu_info(id: boot_cpuid);
1125	cpu_callin_map[boot_cpuid] = `1`;
1126
1127	for_each_possible_cpu(cpu) {
1128	zalloc_cpumask_var_node(mask: &per_cpu(cpu_sibling_map, cpu),
1129	GFP_KERNEL, cpu_to_node(cpu));
1130	zalloc_cpumask_var_node(mask: &per_cpu(cpu_l2_cache_map, cpu),
1131	GFP_KERNEL, cpu_to_node(cpu));
1132	zalloc_cpumask_var_node(mask: &per_cpu(cpu_core_map, cpu),
1133	GFP_KERNEL, cpu_to_node(cpu));
1134	if (has_coregroup_support())
1135	zalloc_cpumask_var_node(mask: &per_cpu(cpu_coregroup_map, cpu),
1136	GFP_KERNEL, cpu_to_node(cpu));
1137
1138	#ifdef CONFIG_NUMA
1139	/*
1140	* numa_node_id() works after this.
1141	*/
1142	if (cpu_present(cpu)) {
1143	set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
1144	set_cpu_numa_mem(cpu,
1145	local_memory_node(numa_cpu_lookup_table[cpu]));
1146	}
1147	#endif
1148	}
1149
1150	/ Init the cpumasks so the boot CPU is related to itself /
1151	cpumask_set_cpu(cpu: boot_cpuid, dstp: cpu_sibling_mask(boot_cpuid));
1152	cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
1153	cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
1154
1155	if (has_coregroup_support())
1156	cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
1157
1158	init_big_cores();
1159	if (has_big_cores) {
1160	cpumask_set_cpu(boot_cpuid,
1161	cpu_smallcore_mask(boot_cpuid));
1162	}
1163
1164	if (cpu_to_chip_id(boot_cpuid) != -`1`) {
1165	int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
1166
1167	/*
1168	* All threads of a core will all belong to the same core,
1169	* chip_id_lookup_table will have one entry per core.
1170	* Assumption: if boot_cpuid doesn't have a chip-id, then no
1171	* other CPUs, will also not have chip-id.
1172	*/
1173	chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
1174	if (chip_id_lookup_table)
1175	memset(chip_id_lookup_table, -`1`, sizeof(int) * idx);
1176	}
1177
1178	if (smp_ops && smp_ops->probe)
1179	smp_ops->probe();
1180
1181	// Initalise the generic SMT topology support
1182	num_threads = `1`;
1183	if (smt_enabled_at_boot)
1184	num_threads = smt_enabled_at_boot;
1185	cpu_smt_set_num_threads(num_threads, threads_per_core);
1186	}
1187
1188	void __init smp_prepare_boot_cpu(void)
1189	{
1190	BUG_ON(smp_processor_id() != boot_cpuid);
1191	#ifdef CONFIG_PPC64
1192	paca_ptrs[boot_cpuid]->__current = current;
1193	#endif
1194	set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
1195	current_set[boot_cpuid] = current;
1196	}
1197
1198	#ifdef CONFIG_HOTPLUG_CPU
1199
1200	int generic_cpu_disable(void)
1201	{
1202	unsigned int cpu = smp_processor_id();
1203
1204	if (cpu == boot_cpuid)
1205	return -EBUSY;
1206
1207	set_cpu_online(cpu, online: false);
1208	#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
1209	systemcfg->processorCount--;
1210	#endif
1211	/ Update affinity of all IRQs previously aimed at this CPU /
1212	irq_migrate_all_off_this_cpu();
1213
1214	/*
1215	* Depending on the details of the interrupt controller, it's possible
1216	* that one of the interrupts we just migrated away from this CPU is
1217	* actually already pending on this CPU. If we leave it in that state
1218	* the interrupt will never be EOI'ed, and will never fire again. So
1219	* temporarily enable interrupts here, to allow any pending interrupt to
1220	* be received (and EOI'ed), before we take this CPU offline.
1221	*/
1222	local_irq_enable();
1223	mdelay(`1`);
1224	local_irq_disable();
1225
1226	return `0`;
1227	}
1228
1229	void generic_cpu_die(unsigned int cpu)
1230	{
1231	int i;
1232
1233	for (i = `0`; i < `100`; i++) {
1234	smp_rmb();
1235	if (is_cpu_dead(cpu))
1236	return;
1237	msleep(msecs: `100`);
1238	}
1239	printk(KERN_ERR "CPU%d didn't die...\n", cpu);
1240	}
1241
1242	void generic_set_cpu_dead(unsigned int cpu)
1243	{
1244	per_cpu(cpu_state, cpu) = CPU_DEAD;
1245	}
1246
1247	/*
1248	* The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise
1249	* the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(),
1250	* which makes the delay in generic_cpu_die() not happen.
1251	*/
1252	void generic_set_cpu_up(unsigned int cpu)
1253	{
1254	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1255	}
1256
1257	int generic_check_cpu_restart(unsigned int cpu)
1258	{
1259	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
1260	}
1261
1262	int is_cpu_dead(unsigned int cpu)
1263	{
1264	return per_cpu(cpu_state, cpu) == CPU_DEAD;
1265	}
1266
1267	static bool secondaries_inhibited(void)
1268	{
1269	return kvm_hv_mode_active();
1270	}
1271
1272	#else /* HOTPLUG_CPU */
1273
1274	#define secondaries_inhibited() 0
1275
1276	#endif
1277
1278	static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
1279	{
1280	#ifdef CONFIG_PPC64
1281	paca_ptrs[cpu]->__current = idle;
1282	paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
1283	THREAD_SIZE - STACK_FRAME_MIN_SIZE;
1284	#endif
1285	task_thread_info(idle)->cpu = cpu;
1286	secondary_current = current_set[cpu] = idle;
1287	}
1288
1289	int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1290	{
1291	const unsigned long boot_spin_ms = `5` * MSEC_PER_SEC;
1292	const bool booting = system_state < SYSTEM_RUNNING;
1293	const unsigned long hp_spin_ms = `1`;
1294	unsigned long deadline;
1295	int rc;
1296	const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
1297
1298	/*
1299	* Don't allow secondary threads to come online if inhibited
1300	*/
1301	if (threads_per_core > `1` && secondaries_inhibited() &&
1302	cpu_thread_in_subcore(cpu))
1303	return -EBUSY;
1304
1305	if (smp_ops == NULL \|\|
1306	(smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
1307	return -EINVAL;
1308
1309	cpu_idle_thread_init(cpu, idle: tidle);
1310
1311	/*
1312	* The platform might need to allocate resources prior to bringing
1313	* up the CPU
1314	*/
1315	if (smp_ops->prepare_cpu) {
1316	rc = smp_ops->prepare_cpu(cpu);
1317	if (rc)
1318	return rc;
1319	}
1320
1321	/ Make sure callin-map entry is 0 (can be leftover a CPU*
1322	* hotplug
1323	*/
1324	cpu_callin_map[cpu] = `0`;
1325
1326	/ The information for processor bringup must*
1327	* be written out to main store before we release
1328	* the processor.
1329	*/
1330	smp_mb();
1331
1332	/ wake up cpus /
1333	DBG("smp: kicking cpu %d\n", cpu);
1334	rc = smp_ops->kick_cpu(cpu);
1335	if (rc) {
1336	pr_err("smp: failed starting cpu %d (rc %d)\n", cpu, rc);
1337	return rc;
1338	}
1339
1340	/*
1341	* At boot time, simply spin on the callin word until the
1342	* deadline passes.
1343	*
1344	* At run time, spin for an optimistic amount of time to avoid
1345	* sleeping in the common case.
1346	*/
1347	deadline = jiffies + msecs_to_jiffies(m: spin_wait_ms);
1348	spin_until_cond(cpu_callin_map[cpu] \|\| time_is_before_jiffies(deadline));
1349
1350	if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
1351	const unsigned long sleep_interval_us = `10` * USEC_PER_MSEC;
1352	const unsigned long sleep_wait_ms = `100` * MSEC_PER_SEC;
1353
1354	deadline = jiffies + msecs_to_jiffies(m: sleep_wait_ms);
1355	while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
1356	fsleep(usecs: sleep_interval_us);
1357	}
1358
1359	if (!cpu_callin_map[cpu]) {
1360	printk(KERN_ERR "Processor %u is stuck.\n", cpu);
1361	return -ENOENT;
1362	}
1363
1364	DBG("Processor %u found.\n", cpu);
1365
1366	if (smp_ops->give_timebase)
1367	smp_ops->give_timebase();
1368
1369	/ Wait until cpu puts itself in the online & active maps /
1370	spin_until_cond(cpu_online(cpu));
1371
1372	return `0`;
1373	}
1374
1375	/ Return the value of the reg property corresponding to the given*
1376	* logical cpu.
1377	*/
1378	int cpu_to_core_id(int cpu)
1379	{
1380	struct device_node *np;
1381	int id = -`1`;
1382
1383	np = of_get_cpu_node(cpu, NULL);
1384	if (!np)
1385	goto out;
1386
1387	id = of_get_cpu_hwid(cpun: np, thread: `0`);
1388	out:
1389	of_node_put(node: np);
1390	return id;
1391	}
1392	EXPORT_SYMBOL_GPL(cpu_to_core_id);
1393
1394	/ Helper routines for cpu to core mapping /
1395	int cpu_core_index_of_thread(int cpu)
1396	{
1397	return cpu >> threads_shift;
1398	}
1399	EXPORT_SYMBOL_GPL(cpu_core_index_of_thread);
1400
1401	int cpu_first_thread_of_core(int core)
1402	{
1403	return core << threads_shift;
1404	}
1405	EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
1406
1407	/ Must be called when no change can occur to cpu_present_mask,*
1408	* i.e. during cpu online or offline.
1409	*/
1410	static struct device_node cpu_to_l2cache(int* cpu)
1411	{
1412	struct device_node *np;
1413	struct device_node *cache;
1414
1415	if (!cpu_present(cpu))
1416	return NULL;
1417
1418	np = of_get_cpu_node(cpu, NULL);
1419	if (np == NULL)
1420	return NULL;
1421
1422	cache = of_find_next_cache_node(np);
1423
1424	of_node_put(node: np);
1425
1426	return cache;
1427	}
1428
1429	static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
1430	{
1431	struct cpumask (submask_fn)(int) = cpu_sibling_mask;
1432	struct device_node l2_cache, np;
1433	int i;
1434
1435	if (has_big_cores)
1436	submask_fn = cpu_smallcore_mask;
1437
1438	/*
1439	* If the threads in a thread-group share L2 cache, then the
1440	* L2-mask can be obtained from thread_group_l2_cache_map.
1441	*/
1442	if (thread_group_shares_l2) {
1443	cpumask_set_cpu(cpu, dstp: cpu_l2_cache_mask(cpu));
1444
1445	for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
1446	if (cpu_online(cpu: i))
1447	set_cpus_related(i, j: cpu, get_cpumask: cpu_l2_cache_mask);
1448	}
1449
1450	/ Verify that L1-cache siblings are a subset of L2 cache-siblings /
1451	if (!cpumask_equal(src1p: submask_fn(cpu), src2p: cpu_l2_cache_mask(cpu)) &&
1452	!cpumask_subset(src1p: submask_fn(cpu), src2p: cpu_l2_cache_mask(cpu))) {
1453	pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
1454	cpu);
1455	}
1456
1457	return true;
1458	}
1459
1460	l2_cache = cpu_to_l2cache(cpu);
1461	if (!l2_cache \|\| !*mask) {
1462	/ Assume only core siblings share cache with this CPU /
1463	for_each_cpu(i, cpu_sibling_mask(cpu))
1464	set_cpus_related(cpu, i, cpu_l2_cache_mask);
1465
1466	return false;
1467	}
1468
1469	cpumask_and(dstp: *mask, cpu_online_mask, src2p: cpu_node_mask(cpu));
1470
1471	/ Update l2-cache mask with all the CPUs that are part of submask /
1472	or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
1473
1474	/ Skip all CPUs already part of current CPU l2-cache mask /
1475	cpumask_andnot(dstp: mask, src1p: mask, src2p: cpu_l2_cache_mask(cpu));
1476
1477	for_each_cpu(i, *mask) {
1478	/*
1479	* when updating the marks the current CPU has not been marked
1480	* online, but we need to update the cache masks
1481	*/
1482	np = cpu_to_l2cache(cpu: i);
1483
1484	/ Skip all CPUs already part of current CPU l2-cache /
1485	if (np == l2_cache) {
1486	or_cpumasks_related(i: cpu, j: i, srcmask: submask_fn, dstmask: cpu_l2_cache_mask);
1487	cpumask_andnot(dstp: mask, src1p: mask, src2p: submask_fn(i));
1488	} else {
1489	cpumask_andnot(dstp: mask, src1p: mask, src2p: cpu_l2_cache_mask(i));
1490	}
1491
1492	of_node_put(node: np);
1493	}
1494	of_node_put(node: l2_cache);
1495
1496	return true;
1497	}
1498
1499	#ifdef CONFIG_HOTPLUG_CPU
1500	static void remove_cpu_from_masks(int cpu)
1501	{
1502	struct cpumask (mask_fn)(int) = cpu_sibling_mask;
1503	int i;
1504
1505	unmap_cpu_from_node(cpu);
1506
1507	if (shared_caches)
1508	mask_fn = cpu_l2_cache_mask;
1509
1510	for_each_cpu(i, mask_fn(cpu)) {
1511	set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
1512	set_cpus_unrelated(cpu, i, cpu_sibling_mask);
1513	if (has_big_cores)
1514	set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
1515	}
1516
1517	for_each_cpu(i, cpu_core_mask(cpu))
1518	set_cpus_unrelated(i: cpu, j: i, get_cpumask: cpu_core_mask);
1519
1520	if (has_coregroup_support()) {
1521	for_each_cpu(i, cpu_coregroup_mask(cpu))
1522	set_cpus_unrelated(i: cpu, j: i, get_cpumask: cpu_coregroup_mask);
1523	}
1524	}
1525	#endif
1526
1527	static inline void add_cpu_to_smallcore_masks(int cpu)
1528	{
1529	int i;
1530
1531	if (!has_big_cores)
1532	return;
1533
1534	cpumask_set_cpu(cpu, dstp: cpu_smallcore_mask(cpu));
1535
1536	for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
1537	if (cpu_online(cpu: i))
1538	set_cpus_related(i, j: cpu, get_cpumask: cpu_smallcore_mask);
1539	}
1540	}
1541
1542	static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
1543	{
1544	struct cpumask (submask_fn)(int) = cpu_sibling_mask;
1545	int coregroup_id = cpu_to_coregroup_id(cpu);
1546	int i;
1547
1548	if (shared_caches)
1549	submask_fn = cpu_l2_cache_mask;
1550
1551	if (!*mask) {
1552	/ Assume only siblings are part of this CPU's coregroup /
1553	for_each_cpu(i, submask_fn(cpu))
1554	set_cpus_related(i: cpu, j: i, get_cpumask: cpu_coregroup_mask);
1555
1556	return;
1557	}
1558
1559	cpumask_and(dstp: *mask, cpu_online_mask, src2p: cpu_node_mask(cpu));
1560
1561	/ Update coregroup mask with all the CPUs that are part of submask /
1562	or_cpumasks_related(i: cpu, j: cpu, srcmask: submask_fn, dstmask: cpu_coregroup_mask);
1563
1564	/ Skip all CPUs already part of coregroup mask /
1565	cpumask_andnot(dstp: mask, src1p: mask, src2p: cpu_coregroup_mask(cpu));
1566
1567	for_each_cpu(i, *mask) {
1568	/ Skip all CPUs not part of this coregroup /
1569	if (coregroup_id == cpu_to_coregroup_id(i)) {
1570	or_cpumasks_related(i: cpu, j: i, srcmask: submask_fn, dstmask: cpu_coregroup_mask);
1571	cpumask_andnot(dstp: mask, src1p: mask, src2p: submask_fn(i));
1572	} else {
1573	cpumask_andnot(dstp: mask, src1p: mask, src2p: cpu_coregroup_mask(cpu: i));
1574	}
1575	}
1576	}
1577
1578	static void add_cpu_to_masks(int cpu)
1579	{
1580	struct cpumask (submask_fn)(int) = cpu_sibling_mask;
1581	int first_thread = cpu_first_thread_sibling(cpu);
1582	cpumask_var_t mask;
1583	int chip_id = -`1`;
1584	bool ret;
1585	int i;
1586
1587	/*
1588	* This CPU will not be in the online mask yet so we need to manually
1589	* add it to its own thread sibling mask.
1590	*/
1591	map_cpu_to_node(cpu, cpu_to_node(cpu));
1592	cpumask_set_cpu(cpu, dstp: cpu_sibling_mask(cpu));
1593	cpumask_set_cpu(cpu, dstp: cpu_core_mask(cpu));
1594
1595	for (i = first_thread; i < first_thread + threads_per_core; i++)
1596	if (cpu_online(i))
1597	set_cpus_related(i, cpu, cpu_sibling_mask);
1598
1599	add_cpu_to_smallcore_masks(cpu);
1600
1601	/ In CPU-hotplug path, hence use GFP_ATOMIC /
1602	ret = alloc_cpumask_var_node(mask: &mask, GFP_ATOMIC, cpu_to_node(cpu));
1603	update_mask_by_l2(cpu, mask: &mask);
1604
1605	if (has_coregroup_support())
1606	update_coregroup_mask(cpu, mask: &mask);
1607
1608	if (chip_id_lookup_table && ret)
1609	chip_id = cpu_to_chip_id(cpu);
1610
1611	if (shared_caches)
1612	submask_fn = cpu_l2_cache_mask;
1613
1614	/ Update core_mask with all the CPUs that are part of submask /
1615	or_cpumasks_related(i: cpu, j: cpu, srcmask: submask_fn, dstmask: cpu_core_mask);
1616
1617	/ Skip all CPUs already part of current CPU core mask /
1618	cpumask_andnot(dstp: mask, cpu_online_mask, src2p: cpu_core_mask(cpu));
1619
1620	/ If chip_id is -1; limit the cpu_core_mask to within PKG /
1621	if (chip_id == -`1`)
1622	cpumask_and(dstp: mask, src1p: mask, src2p: cpu_node_mask(cpu));
1623
1624	for_each_cpu(i, mask) {
1625	if (chip_id == cpu_to_chip_id(i)) {
1626	or_cpumasks_related(i: cpu, j: i, srcmask: submask_fn, dstmask: cpu_core_mask);
1627	cpumask_andnot(dstp: mask, src1p: mask, src2p: submask_fn(i));
1628	} else {
1629	cpumask_andnot(dstp: mask, src1p: mask, src2p: cpu_core_mask(i));
1630	}
1631	}
1632
1633	free_cpumask_var(mask);
1634	}
1635
1636	/ Activate a secondary processor. /
1637	__no_stack_protector
1638	void start_secondary(void *unused)
1639	{
1640	unsigned int cpu = raw_smp_processor_id();
1641
1642	/ PPC64 calls setup_kup() in early_setup_secondary() /
1643	if (IS_ENABLED(CONFIG_PPC32))
1644	setup_kup();
1645
1646	mmgrab_lazy_tlb(mm: &init_mm);
1647	current->active_mm = &init_mm;
1648	VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
1649	cpumask_set_cpu(cpu, dstp: mm_cpumask(mm: &init_mm));
1650	inc_mm_active_cpus(&init_mm);
1651
1652	smp_store_cpu_info(id: cpu);
1653	set_dec(tb_ticks_per_jiffy);
1654	rcutree_report_cpu_starting(cpu);
1655	cpu_callin_map[cpu] = `1`;
1656
1657	if (smp_ops->setup_cpu)
1658	smp_ops->setup_cpu(cpu);
1659	if (smp_ops->take_timebase)
1660	smp_ops->take_timebase();
1661
1662	secondary_cpu_time_init();
1663
1664	#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
1665	if (system_state == SYSTEM_RUNNING)
1666	systemcfg->processorCount++;
1667	#endif
1668
1669	#ifdef CONFIG_PPC64
1670	vdso_getcpu_init();
1671	#endif
1672	set_numa_node(numa_cpu_lookup_table[cpu]);
1673	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
1674
1675	/ Update topology CPU masks /
1676	add_cpu_to_masks(cpu);
1677
1678	/*
1679	* Check for any shared caches. Note that this must be done on a
1680	* per-core basis because one core in the pair might be disabled.
1681	*/
1682	if (!shared_caches) {
1683	struct cpumask (sibling_mask)(int) = cpu_sibling_mask;
1684	struct cpumask *mask = cpu_l2_cache_mask(cpu);
1685
1686	if (has_big_cores)
1687	sibling_mask = cpu_smallcore_mask;
1688
1689	if (cpumask_weight(srcp: mask) > cpumask_weight(srcp: sibling_mask(cpu)))
1690	shared_caches = true;
1691	}
1692
1693	smp_wmb();
1694	notify_cpu_starting(cpu);
1695	set_cpu_online(cpu, online: true);
1696
1697	boot_init_stack_canary();
1698
1699	local_irq_enable();
1700
1701	/ We can enable ftrace for secondary cpus now /
1702	this_cpu_enable_ftrace();
1703
1704	cpu_startup_entry(state: CPUHP_AP_ONLINE_IDLE);
1705
1706	BUG();
1707	}
1708
1709	static struct sched_domain_topology_level powerpc_topology[`6`];
1710
1711	static void __init build_sched_topology(void)
1712	{
1713	int i = `0`;
1714
1715	if (is_shared_processor() && has_big_cores)
1716	static_branch_enable(&splpar_asym_pack);
1717
1718	#ifdef CONFIG_SCHED_SMT
1719	if (has_big_cores) {
1720	pr_info("Big cores detected but using small core scheduling\n");
1721	powerpc_topology[i++] =
1722	SDTL_INIT(tl_smallcore_smt_mask, powerpc_smt_flags, SMT);
1723	} else {
1724	powerpc_topology[i++] = SDTL_INIT(tl_smt_mask, powerpc_smt_flags, SMT);
1725	}
1726	#endif
1727	if (shared_caches) {
1728	powerpc_topology[i++] =
1729	SDTL_INIT(tl_cache_mask, powerpc_shared_cache_flags, CACHE);
1730	}
1731
1732	if (has_coregroup_support()) {
1733	powerpc_topology[i++] =
1734	SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC);
1735	}
1736
1737	powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG);
1738
1739	/ There must be one trailing NULL entry left. /
1740	BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - `1`);
1741
1742	set_sched_topology(powerpc_topology);
1743	}
1744
1745	void __init smp_cpus_done(unsigned int max_cpus)
1746	{
1747	/*
1748	* We are running pinned to the boot CPU, see rest_init().
1749	*/
1750	if (smp_ops && smp_ops->setup_cpu)
1751	smp_ops->setup_cpu(boot_cpuid);
1752
1753	if (smp_ops && smp_ops->bringup_done)
1754	smp_ops->bringup_done();
1755
1756	dump_numa_cpu_topology();
1757	build_sched_topology();
1758	}
1759
1760	/*
1761	* For asym packing, by default lower numbered CPU has higher priority.
1762	* On shared processors, pack to lower numbered core. However avoid moving
1763	* between thread_groups within the same core.
1764	*/
1765	int arch_asym_cpu_priority(int cpu)
1766	{
1767	if (static_branch_unlikely(&splpar_asym_pack))
1768	return -cpu / threads_per_core;
1769
1770	return -cpu;
1771	}
1772
1773	#ifdef CONFIG_HOTPLUG_CPU
1774	int __cpu_disable(void)
1775	{
1776	int cpu = smp_processor_id();
1777	int err;
1778
1779	if (!smp_ops->cpu_disable)
1780	return -ENOSYS;
1781
1782	this_cpu_disable_ftrace();
1783
1784	err = smp_ops->cpu_disable();
1785	if (err)
1786	return err;
1787
1788	/ Update sibling maps /
1789	remove_cpu_from_masks(cpu);
1790
1791	return `0`;
1792	}
1793
1794	void __cpu_die(unsigned int cpu)
1795	{
1796	/*
1797	* This could perhaps be a generic call in idlea_task_dead(), but
1798	* that requires testing from all archs, so first put it here to
1799	*/
1800	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
1801	dec_mm_active_cpus(&init_mm);
1802	cpumask_clear_cpu(cpu, dstp: mm_cpumask(mm: &init_mm));
1803
1804	if (smp_ops->cpu_die)
1805	smp_ops->cpu_die(cpu);
1806	}
1807
1808	void __noreturn arch_cpu_idle_dead(void)
1809	{
1810	/*
1811	* Disable on the down path. This will be re-enabled by
1812	* start_secondary() via start_secondary_resume() below
1813	*/
1814	this_cpu_disable_ftrace();
1815
1816	if (smp_ops->cpu_offline_self)
1817	smp_ops->cpu_offline_self();
1818
1819	/ If we return, we re-enter start_secondary /
1820	start_secondary_resume();
1821	}
1822
1823	#endif
1824

source code of linux/arch/powerpc/kernel/smp.c