cacheinfo.c source code [linux/arch/x86/kernel/cpu/cacheinfo.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* x86 CPU caches detection and configuration
4	*
5	* Previous changes
6	* - Venkatesh Pallipadi: Cache identification through CPUID(0x4)
7	* - Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure
8	* - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD
9	*/
10
11	#include <linux/cacheinfo.h>
12	#include <linux/cpu.h>
13	#include <linux/cpuhotplug.h>
14	#include <linux/stop_machine.h>
15
16	#include <asm/amd/nb.h>
17	#include <asm/cacheinfo.h>
18	#include <asm/cpufeature.h>
19	#include <asm/cpuid/api.h>
20	#include <asm/mtrr.h>
21	#include <asm/smp.h>
22	#include <asm/tlbflush.h>
23
24	#include "cpu.h"
25
26	/ Shared last level cache maps /
27	DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
28
29	/ Shared L2 cache maps /
30	DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
31
32	static cpumask_var_t cpu_cacheinfo_mask;
33
34	/ Kernel controls MTRR and/or PAT MSRs. /
35	unsigned int memory_caching_control __ro_after_init;
36
37	enum _cache_type {
38	CTYPE_NULL = `0`,
39	CTYPE_DATA = `1`,
40	CTYPE_INST = `2`,
41	CTYPE_UNIFIED = `3`
42	};
43
44	union _cpuid4_leaf_eax {
45	struct {
46	enum _cache_type type :`5`;
47	unsigned int level :`3`;
48	unsigned int is_self_initializing :`1`;
49	unsigned int is_fully_associative :`1`;
50	unsigned int reserved :`4`;
51	unsigned int num_threads_sharing :`12`;
52	unsigned int num_cores_on_die :`6`;
53	} split;
54	u32 full;
55	};
56
57	union _cpuid4_leaf_ebx {
58	struct {
59	unsigned int coherency_line_size :`12`;
60	unsigned int physical_line_partition :`10`;
61	unsigned int ways_of_associativity :`10`;
62	} split;
63	u32 full;
64	};
65
66	union _cpuid4_leaf_ecx {
67	struct {
68	unsigned int number_of_sets :`32`;
69	} split;
70	u32 full;
71	};
72
73	struct _cpuid4_info {
74	union _cpuid4_leaf_eax eax;
75	union _cpuid4_leaf_ebx ebx;
76	union _cpuid4_leaf_ecx ecx;
77	unsigned int id;
78	unsigned long size;
79	};
80
81	/ Map CPUID(0x4) EAX.cache_type to <linux/cacheinfo.h> types /
82	static const enum cache_type cache_type_map[] = {
83	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
84	[CTYPE_DATA] = CACHE_TYPE_DATA,
85	[CTYPE_INST] = CACHE_TYPE_INST,
86	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
87	};
88
89	/*
90	* Fallback AMD CPUID(0x4) emulation
91	* AMD CPUs with TOPOEXT can just use CPUID(0x8000001d)
92	*
93	* @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should
94	* be determined from CPUID(0x8000001d) instead of CPUID(0x80000006).
95	*/
96
97	#define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff
98	#define AMD_L2_L3_INVALID_ASSOC 0x9
99
100	union l1_cache {
101	struct {
102	unsigned line_size :`8`;
103	unsigned lines_per_tag :`8`;
104	unsigned assoc :`8`;
105	unsigned size_in_kb :`8`;
106	};
107	unsigned int val;
108	};
109
110	union l2_cache {
111	struct {
112	unsigned line_size :`8`;
113	unsigned lines_per_tag :`4`;
114	unsigned assoc :`4`;
115	unsigned size_in_kb :`16`;
116	};
117	unsigned int val;
118	};
119
120	union l3_cache {
121	struct {
122	unsigned line_size :`8`;
123	unsigned lines_per_tag :`4`;
124	unsigned assoc :`4`;
125	unsigned res :`2`;
126	unsigned size_encoded :`14`;
127	};
128	unsigned int val;
129	};
130
131	/ L2/L3 associativity mapping /
132	static const unsigned short assocs[] = {
133	[`1`] = `1`,
134	[`2`] = `2`,
135	[`3`] = `3`,
136	[`4`] = `4`,
137	[`5`] = `6`,
138	[`6`] = `8`,
139	[`8`] = `16`,
140	[`0xa`] = `32`,
141	[`0xb`] = `48`,
142	[`0xc`] = `64`,
143	[`0xd`] = `96`,
144	[`0xe`] = `128`,
145	[`0xf`] = AMD_CPUID4_FULLY_ASSOCIATIVE
146	};
147
148	static const unsigned char levels[] = { `1`, `1`, `2`, `3` };
149	static const unsigned char types[] = { `1`, `2`, `3`, `3` };
150
151	static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax,
152	union _cpuid4_leaf_ebx ebx, union* _cpuid4_leaf_ecx *ecx)
153	{
154	unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
155	union l1_cache l1i, l1d, *l1;
156	union l2_cache l2;
157	union l3_cache l3;
158
159	eax->full = `0`;
160	ebx->full = `0`;
161	ecx->full = `0`;
162
163	cpuid(op: `0x80000005`, eax: &dummy, ebx: &dummy, ecx: &l1d.val, edx: &l1i.val);
164	cpuid(op: `0x80000006`, eax: &dummy, ebx: &dummy, ecx: &l2.val, edx: &l3.val);
165
166	l1 = &l1d;
167	switch (index) {
168	case `1`:
169	l1 = &l1i;
170	fallthrough;
171	case `0`:
172	if (!l1->val)
173	return;
174
175	assoc = (l1->assoc == `0xff`) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc;
176	line_size = l1->line_size;
177	lines_per_tag = l1->lines_per_tag;
178	size_in_kb = l1->size_in_kb;
179	break;
180	case `2`:
181	if (!l2.assoc \|\| l2.assoc == AMD_L2_L3_INVALID_ASSOC)
182	return;
183
184	/ Use x86_cache_size as it might have K7 errata fixes /
185	assoc = assocs[l2.assoc];
186	line_size = l2.line_size;
187	lines_per_tag = l2.lines_per_tag;
188	size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
189	break;
190	case `3`:
191	if (!l3.assoc \|\| l3.assoc == AMD_L2_L3_INVALID_ASSOC)
192	return;
193
194	assoc = assocs[l3.assoc];
195	line_size = l3.line_size;
196	lines_per_tag = l3.lines_per_tag;
197	size_in_kb = l3.size_encoded * `512`;
198	if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
199	size_in_kb = size_in_kb >> `1`;
200	assoc = assoc >> `1`;
201	}
202	break;
203	default:
204	return;
205	}
206
207	eax->split.is_self_initializing = `1`;
208	eax->split.type = types[index];
209	eax->split.level = levels[index];
210	eax->split.num_threads_sharing = `0`;
211	eax->split.num_cores_on_die = topology_num_cores_per_package();
212
213	if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE)
214	eax->split.is_fully_associative = `1`;
215
216	ebx->split.coherency_line_size = line_size - `1`;
217	ebx->split.ways_of_associativity = assoc - `1`;
218	ebx->split.physical_line_partition = lines_per_tag - `1`;
219	ecx->split.number_of_sets = (size_in_kb * `1024`) / line_size /
220	(ebx->split.ways_of_associativity + `1`) - `1`;
221	}
222
223	static int cpuid4_info_fill_done(struct _cpuid4_info id4, union* _cpuid4_leaf_eax eax,
224	union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx)
225	{
226	if (eax.split.type == CTYPE_NULL)
227	return -EIO;
228
229	id4->eax = eax;
230	id4->ebx = ebx;
231	id4->ecx = ecx;
232	id4->size = (ecx.split.number_of_sets + `1`) *
233	(ebx.split.coherency_line_size + `1`) *
234	(ebx.split.physical_line_partition + `1`) *
235	(ebx.split.ways_of_associativity + `1`);
236
237	return `0`;
238	}
239
240	static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
241	{
242	union _cpuid4_leaf_eax eax;
243	union _cpuid4_leaf_ebx ebx;
244	union _cpuid4_leaf_ecx ecx;
245	u32 ignored;
246
247	if (boot_cpu_has(X86_FEATURE_TOPOEXT) \|\| boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
248	cpuid_count(op: `0x8000001d`, count: index, eax: &eax.full, ebx: &ebx.full, ecx: &ecx.full, edx: &ignored);
249	else
250	legacy_amd_cpuid4(index, eax: &eax, ebx: &ebx, ecx: &ecx);
251
252	return cpuid4_info_fill_done(id4, eax, ebx, ecx);
253	}
254
255	static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4)
256	{
257	union _cpuid4_leaf_eax eax;
258	union _cpuid4_leaf_ebx ebx;
259	union _cpuid4_leaf_ecx ecx;
260	u32 ignored;
261
262	cpuid_count(op: `4`, count: index, eax: &eax.full, ebx: &ebx.full, ecx: &ecx.full, edx: &ignored);
263
264	return cpuid4_info_fill_done(id4, eax, ebx, ecx);
265	}
266
267	static int fill_cpuid4_info(int index, struct _cpuid4_info *id4)
268	{
269	u8 cpu_vendor = boot_cpu_data.x86_vendor;
270
271	return (cpu_vendor == X86_VENDOR_AMD \|\| cpu_vendor == X86_VENDOR_HYGON) ?
272	amd_fill_cpuid4_info(index, id4) :
273	intel_fill_cpuid4_info(index, id4);
274	}
275
276	static int find_num_cache_leaves(struct cpuinfo_x86 *c)
277	{
278	unsigned int eax, ebx, ecx, edx, op;
279	union _cpuid4_leaf_eax cache_eax;
280	int i = -`1`;
281
282	/ Do a CPUID(op) loop to calculate num_cache_leaves /
283	op = (c->x86_vendor == X86_VENDOR_AMD \|\| c->x86_vendor == X86_VENDOR_HYGON) ? `0x8000001d` : `4`;
284	do {
285	++i;
286	cpuid_count(op, count: i, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx);
287	cache_eax.full = eax;
288	} while (cache_eax.split.type != CTYPE_NULL);
289	return i;
290	}
291
292	/*
293	* The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
294	* ECX as cache index. Then right shift apicid by the number's order to get
295	* cache id for this cache node.
296	*/
297	static unsigned int get_cache_id(u32 apicid, const struct _cpuid4_info *id4)
298	{
299	unsigned long num_threads_sharing;
300	int index_msb;
301
302	num_threads_sharing = `1` + id4->eax.split.num_threads_sharing;
303	index_msb = get_count_order(count: num_threads_sharing);
304
305	return apicid >> index_msb;
306	}
307
308	/*
309	* AMD/Hygon CPUs may have multiple LLCs if L3 caches exist.
310	*/
311
312	void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
313	{
314	if (!cpuid_amd_hygon_has_l3_cache())
315	return;
316
317	if (c->x86 < `0x17`) {
318	/ Pre-Zen: LLC is at the node level /
319	c->topo.llc_id = die_id;
320	} else if (c->x86 == `0x17` && c->x86_model <= `0x1F`) {
321	/*
322	* Family 17h up to 1F models: LLC is at the core
323	* complex level. Core complex ID is ApicId[3].
324	*/
325	c->topo.llc_id = c->topo.apicid >> `3`;
326	} else {
327	/*
328	* Newer families: LLC ID is calculated from the number
329	* of threads sharing the L3 cache.
330	*/
331	u32 llc_index = find_num_cache_leaves(c) - `1`;
332	struct _cpuid4_info id4 = {};
333
334	if (!amd_fill_cpuid4_info(index: llc_index, id4: &id4))
335	c->topo.llc_id = get_cache_id(apicid: c->topo.apicid, id4: &id4);
336	}
337	}
338
339	void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
340	{
341	if (!cpuid_amd_hygon_has_l3_cache())
342	return;
343
344	/*
345	* Hygons are similar to AMD Family 17h up to 1F models: LLC is
346	* at the core complex level. Core complex ID is ApicId[3].
347	*/
348	c->topo.llc_id = c->topo.apicid >> `3`;
349	}
350
351	void init_amd_cacheinfo(struct cpuinfo_x86 *c)
352	{
353	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu: c->cpu_index);
354
355	if (boot_cpu_has(X86_FEATURE_TOPOEXT))
356	ci->num_leaves = find_num_cache_leaves(c);
357	else if (c->extended_cpuid_level >= `0x80000006`)
358	ci->num_leaves = (cpuid_edx(op: `0x80000006`) & `0xf000`) ? `4` : `3`;
359	}
360
361	void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
362	{
363	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu: c->cpu_index);
364
365	ci->num_leaves = find_num_cache_leaves(c);
366	}
367
368	static void intel_cacheinfo_done(struct cpuinfo_x86 c, unsigned* int l3,
369	unsigned int l2, unsigned int l1i, unsigned int l1d)
370	{
371	/*
372	* If llc_id is still unset, then cpuid_level < 4, which implies
373	* that the only possibility left is SMT. Since CPUID(0x2) doesn't
374	* specify any shared caches and SMT shares all caches, we can
375	* unconditionally set LLC ID to the package ID so that all
376	* threads share it.
377	*/
378	if (c->topo.llc_id == BAD_APICID)
379	c->topo.llc_id = c->topo.pkg_id;
380
381	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d);
382
383	if (!l2)
384	cpu_detect_cache_sizes(c);
385	}
386
387	/*
388	* Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available.
389	*/
390	static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
391	{
392	unsigned int l1i = `0`, l1d = `0`, l2 = `0`, l3 = `0`;
393	const struct leaf_0x2_table *desc;
394	union leaf_0x2_regs regs;
395	u8 *ptr;
396
397	if (c->cpuid_level < `2`)
398	return;
399
400	cpuid_leaf_0x2(regs: &regs);
401	for_each_cpuid_0x2_desc(regs, ptr, desc) {
402	switch (desc->c_type) {
403	case CACHE_L1_INST: l1i += desc->c_size; break;
404	case CACHE_L1_DATA: l1d += desc->c_size; break;
405	case CACHE_L2: l2 += desc->c_size; break;
406	case CACHE_L3: l3 += desc->c_size; break;
407	}
408	}
409
410	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
411	}
412
413	static unsigned int calc_cache_topo_id(struct cpuinfo_x86 c, const* struct _cpuid4_info *id4)
414	{
415	unsigned int num_threads_sharing;
416	int index_msb;
417
418	num_threads_sharing = `1` + id4->eax.split.num_threads_sharing;
419	index_msb = get_count_order(count: num_threads_sharing);
420	return c->topo.apicid & ~((`1` << index_msb) - `1`);
421	}
422
423	static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
424	{
425	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu: c->cpu_index);
426	unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID;
427	unsigned int l1d = `0`, l1i = `0`, l2 = `0`, l3 = `0`;
428
429	if (c->cpuid_level < `4`)
430	return false;
431
432	/*
433	* There should be at least one leaf. A non-zero value means
434	* that the number of leaves has been previously initialized.
435	*/
436	if (!ci->num_leaves)
437	ci->num_leaves = find_num_cache_leaves(c);
438
439	if (!ci->num_leaves)
440	return false;
441
442	for (int i = `0`; i < ci->num_leaves; i++) {
443	struct _cpuid4_info id4 = {};
444	int ret;
445
446	ret = intel_fill_cpuid4_info(index: i, id4: &id4);
447	if (ret < `0`)
448	continue;
449
450	switch (id4.eax.split.level) {
451	case `1`:
452	if (id4.eax.split.type == CTYPE_DATA)
453	l1d = id4.size / `1024`;
454	else if (id4.eax.split.type == CTYPE_INST)
455	l1i = id4.size / `1024`;
456	break;
457	case `2`:
458	l2 = id4.size / `1024`;
459	l2_id = calc_cache_topo_id(c, id4: &id4);
460	break;
461	case `3`:
462	l3 = id4.size / `1024`;
463	l3_id = calc_cache_topo_id(c, id4: &id4);
464	break;
465	default:
466	break;
467	}
468	}
469
470	c->topo.l2c_id = l2_id;
471	c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id;
472	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
473	return true;
474	}
475
476	void init_intel_cacheinfo(struct cpuinfo_x86 *c)
477	{
478	/ Don't use CPUID(0x2) if CPUID(0x4) is supported. /
479	if (intel_cacheinfo_0x4(c))
480	return;
481
482	intel_cacheinfo_0x2(c);
483	}
484
485	/*
486	* <linux/cacheinfo.h> shared_cpu_map setup, AMD/Hygon
487	*/
488	static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
489	const struct _cpuid4_info *id4)
490	{
491	struct cpu_cacheinfo *this_cpu_ci;
492	struct cacheinfo *ci;
493	int i, sibling;
494
495	/*
496	* For L3, always use the pre-calculated cpu_llc_shared_mask
497	* to derive shared_cpu_map.
498	*/
499	if (index == `3`) {
500	for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
501	this_cpu_ci = get_cpu_cacheinfo(cpu: i);
502	if (!this_cpu_ci->info_list)
503	continue;
504
505	ci = this_cpu_ci->info_list + index;
506	for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
507	if (!cpu_online(cpu: sibling))
508	continue;
509	cpumask_set_cpu(cpu: sibling, dstp: &ci->shared_cpu_map);
510	}
511	}
512	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
513	unsigned int apicid, nshared, first, last;
514
515	nshared = id4->eax.split.num_threads_sharing + `1`;
516	apicid = cpu_data(cpu).topo.apicid;
517	first = apicid - (apicid % nshared);
518	last = first + nshared - `1`;
519
520	for_each_online_cpu(i) {
521	this_cpu_ci = get_cpu_cacheinfo(cpu: i);
522	if (!this_cpu_ci->info_list)
523	continue;
524
525	apicid = cpu_data(i).topo.apicid;
526	if ((apicid < first) \|\| (apicid > last))
527	continue;
528
529	ci = this_cpu_ci->info_list + index;
530
531	for_each_online_cpu(sibling) {
532	apicid = cpu_data(sibling).topo.apicid;
533	if ((apicid < first) \|\| (apicid > last))
534	continue;
535	cpumask_set_cpu(cpu: sibling, dstp: &ci->shared_cpu_map);
536	}
537	}
538	} else
539	return `0`;
540
541	return `1`;
542	}
543
544	/*
545	* <linux/cacheinfo.h> shared_cpu_map setup, Intel + fallback AMD/Hygon
546	*/
547	static void __cache_cpumap_setup(unsigned int cpu, int index,
548	const struct _cpuid4_info *id4)
549	{
550	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
551	struct cpuinfo_x86 *c = &cpu_data(cpu);
552	struct cacheinfo ci, sibling_ci;
553	unsigned long num_threads_sharing;
554	int index_msb, i;
555
556	if (c->x86_vendor == X86_VENDOR_AMD \|\| c->x86_vendor == X86_VENDOR_HYGON) {
557	if (__cache_amd_cpumap_setup(cpu, index, id4))
558	return;
559	}
560
561	ci = this_cpu_ci->info_list + index;
562	num_threads_sharing = `1` + id4->eax.split.num_threads_sharing;
563
564	cpumask_set_cpu(cpu, dstp: &ci->shared_cpu_map);
565	if (num_threads_sharing == `1`)
566	return;
567
568	index_msb = get_count_order(count: num_threads_sharing);
569
570	for_each_online_cpu(i)
571	if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
572	struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(cpu: i);
573
574	/ Skip if itself or no cacheinfo /
575	if (i == cpu \|\| !sib_cpu_ci->info_list)
576	continue;
577
578	sibling_ci = sib_cpu_ci->info_list + index;
579	cpumask_set_cpu(cpu: i, dstp: &ci->shared_cpu_map);
580	cpumask_set_cpu(cpu, dstp: &sibling_ci->shared_cpu_map);
581	}
582	}
583
584	static void ci_info_init(struct cacheinfo ci, const* struct _cpuid4_info *id4,
585	struct amd_northbridge *nb)
586	{
587	ci->id = id4->id;
588	ci->attributes = CACHE_ID;
589	ci->level = id4->eax.split.level;
590	ci->type = cache_type_map[id4->eax.split.type];
591	ci->coherency_line_size = id4->ebx.split.coherency_line_size + `1`;
592	ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + `1`;
593	ci->size = id4->size;
594	ci->number_of_sets = id4->ecx.split.number_of_sets + `1`;
595	ci->physical_line_partition = id4->ebx.split.physical_line_partition + `1`;
596	ci->priv = nb;
597	}
598
599	int init_cache_level(unsigned int cpu)
600	{
601	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
602
603	/ There should be at least one leaf. /
604	if (!ci->num_leaves)
605	return -ENOENT;
606
607	return `0`;
608	}
609
610	int populate_cache_leaves(unsigned int cpu)
611	{
612	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
613	struct cacheinfo *ci = this_cpu_ci->info_list;
614	u8 cpu_vendor = boot_cpu_data.x86_vendor;
615	u32 apicid = cpu_data(cpu).topo.apicid;
616	struct amd_northbridge *nb = NULL;
617	struct _cpuid4_info id4 = {};
618	int idx, ret;
619
620	for (idx = `0`; idx < this_cpu_ci->num_leaves; idx++) {
621	ret = fill_cpuid4_info(index: idx, id4: &id4);
622	if (ret)
623	return ret;
624
625	id4.id = get_cache_id(apicid, id4: &id4);
626
627	if (cpu_vendor == X86_VENDOR_AMD \|\| cpu_vendor == X86_VENDOR_HYGON)
628	nb = amd_init_l3_cache(index: idx);
629
630	ci_info_init(ci: ci++, id4: &id4, nb);
631	__cache_cpumap_setup(cpu, index: idx, id4: &id4);
632	}
633
634	this_cpu_ci->cpu_map_populated = true;
635	return `0`;
636	}
637
638	/*
639	* Disable and enable caches. Needed for changing MTRRs and the PAT MSR.
640	*
641	* Since we are disabling the cache don't allow any interrupts,
642	* they would run extremely slow and would only increase the pain.
643	*
644	* The caller must ensure that local interrupts are disabled and
645	* are reenabled after cache_enable() has been called.
646	*/
647	static unsigned long saved_cr4;
648	static DEFINE_RAW_SPINLOCK(cache_disable_lock);
649
650	/*
651	* Cache flushing is the most time-consuming step when programming the
652	* MTRRs. On many Intel CPUs without known erratas, it can be skipped
653	* if the CPU declares cache self-snooping support.
654	*/
655	static void maybe_flush_caches(void)
656	{
657	if (!static_cpu_has(X86_FEATURE_SELFSNOOP))
658	wbinvd();
659	}
660
661	void cache_disable(void) __acquires(cache_disable_lock)
662	{
663	unsigned long cr0;
664
665	/*
666	* This is not ideal since the cache is only flushed/disabled
667	* for this CPU while the MTRRs are changed, but changing this
668	* requires more invasive changes to the way the kernel boots.
669	*/
670	raw_spin_lock(&cache_disable_lock);
671
672	/ Enter the no-fill (CD=1, NW=0) cache mode and flush caches. /
673	cr0 = read_cr0() \| X86_CR0_CD;
674	write_cr0(x: cr0);
675
676	maybe_flush_caches();
677
678	/ Save value of CR4 and clear Page Global Enable (bit 7) /
679	if (cpu_feature_enabled(X86_FEATURE_PGE)) {
680	saved_cr4 = __read_cr4();
681	__write_cr4(x: saved_cr4 & ~X86_CR4_PGE);
682	}
683
684	/ Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 /
685	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
686	flush_tlb_local();
687
688	if (cpu_feature_enabled(X86_FEATURE_MTRR))
689	mtrr_disable();
690
691	maybe_flush_caches();
692	}
693
694	void cache_enable(void) __releases(cache_disable_lock)
695	{
696	/ Flush TLBs (no need to flush caches - they are disabled) /
697	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
698	flush_tlb_local();
699
700	if (cpu_feature_enabled(X86_FEATURE_MTRR))
701	mtrr_enable();
702
703	/ Enable caches /
704	write_cr0(x: read_cr0() & ~X86_CR0_CD);
705
706	/ Restore value of CR4 /
707	if (cpu_feature_enabled(X86_FEATURE_PGE))
708	__write_cr4(x: saved_cr4);
709
710	raw_spin_unlock(&cache_disable_lock);
711	}
712
713	static void cache_cpu_init(void)
714	{
715	unsigned long flags;
716
717	local_irq_save(flags);
718
719	if (memory_caching_control & CACHE_MTRR) {
720	cache_disable();
721	mtrr_generic_set_state();
722	cache_enable();
723	}
724
725	if (memory_caching_control & CACHE_PAT)
726	pat_cpu_init();
727
728	local_irq_restore(flags);
729	}
730
731	static bool cache_aps_delayed_init = true;
732
733	void set_cache_aps_delayed_init(bool val)
734	{
735	cache_aps_delayed_init = val;
736	}
737
738	bool get_cache_aps_delayed_init(void)
739	{
740	return cache_aps_delayed_init;
741	}
742
743	static int cache_rendezvous_handler(void *unused)
744	{
745	if (get_cache_aps_delayed_init() \|\| !cpu_online(smp_processor_id()))
746	cache_cpu_init();
747
748	return `0`;
749	}
750
751	void __init cache_bp_init(void)
752	{
753	mtrr_bp_init();
754	pat_bp_init();
755
756	if (memory_caching_control)
757	cache_cpu_init();
758	}
759
760	void cache_bp_restore(void)
761	{
762	if (memory_caching_control)
763	cache_cpu_init();
764	}
765
766	static int cache_ap_online(unsigned int cpu)
767	{
768	cpumask_set_cpu(cpu, dstp: cpu_cacheinfo_mask);
769
770	if (!memory_caching_control \|\| get_cache_aps_delayed_init())
771	return `0`;
772
773	/*
774	* Ideally we should hold mtrr_mutex here to avoid MTRR entries
775	* changed, but this routine will be called in CPU boot time,
776	* holding the lock breaks it.
777	*
778	* This routine is called in two cases:
779	*
780	* 1. very early time of software resume, when there absolutely
781	* isn't MTRR entry changes;
782	*
783	* 2. CPU hotadd time. We let mtrr_add/del_page hold cpuhotplug
784	* lock to prevent MTRR entry changes
785	*/
786	stop_machine_from_inactive_cpu(fn: cache_rendezvous_handler, NULL,
787	cpus: cpu_cacheinfo_mask);
788
789	return `0`;
790	}
791
792	static int cache_ap_offline(unsigned int cpu)
793	{
794	cpumask_clear_cpu(cpu, dstp: cpu_cacheinfo_mask);
795	return `0`;
796	}
797
798	/*
799	* Delayed cache initialization for all AP's
800	*/
801	void cache_aps_init(void)
802	{
803	if (!memory_caching_control \|\| !get_cache_aps_delayed_init())
804	return;
805
806	stop_machine(fn: cache_rendezvous_handler, NULL, cpu_online_mask);
807	set_cache_aps_delayed_init(false);
808	}
809
810	static int __init cache_ap_register(void)
811	{
812	zalloc_cpumask_var(mask: &cpu_cacheinfo_mask, GFP_KERNEL);
813	cpumask_set_cpu(smp_processor_id(), dstp: cpu_cacheinfo_mask);
814
815	cpuhp_setup_state_nocalls(state: CPUHP_AP_CACHECTRL_STARTING,
816	name: "x86/cachectrl:starting",
817	startup: cache_ap_online, teardown: cache_ap_offline);
818	return `0`;
819	}
820	early_initcall(cache_ap_register);
821

source code of linux/arch/x86/kernel/cpu/cacheinfo.c