rdtgroup.c source code [linux/fs/resctrl/rdtgroup.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* User interface for Resource Allocation in Resource Director Technology(RDT)
4	*
5	* Copyright (C) 2016 Intel Corporation
6	*
7	* Author: Fenghua Yu <fenghua.yu@intel.com>
8	*
9	* More information about RDT be found in the Intel (R) x86 Architecture
10	* Software Developer Manual.
11	*/
12
13	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15	#include <linux/cpu.h>
16	#include <linux/debugfs.h>
17	#include <linux/fs.h>
18	#include <linux/fs_parser.h>
19	#include <linux/sysfs.h>
20	#include <linux/kernfs.h>
21	#include <linux/resctrl.h>
22	#include <linux/seq_buf.h>
23	#include <linux/seq_file.h>
24	#include <linux/sched/task.h>
25	#include <linux/slab.h>
26	#include <linux/user_namespace.h>
27
28	#include <uapi/linux/magic.h>
29
30	#include "internal.h"
31
32	/ Mutex to protect rdtgroup access. /
33	DEFINE_MUTEX(rdtgroup_mutex);
34
35	static struct kernfs_root *rdt_root;
36
37	struct rdtgroup rdtgroup_default;
38
39	LIST_HEAD(rdt_all_groups);
40
41	/ list of entries for the schemata file /
42	LIST_HEAD(resctrl_schema_all);
43
44	/*
45	* List of struct mon_data containing private data of event files for use by
46	* rdtgroup_mondata_show(). Protected by rdtgroup_mutex.
47	*/
48	static LIST_HEAD(mon_data_kn_priv_list);
49
50	/ The filesystem can only be mounted once. /
51	bool resctrl_mounted;
52
53	/ Kernel fs node for "info" directory under root /
54	static struct kernfs_node *kn_info;
55
56	/ Kernel fs node for "mon_groups" directory under root /
57	static struct kernfs_node *kn_mongrp;
58
59	/ Kernel fs node for "mon_data" directory under root /
60	static struct kernfs_node *kn_mondata;
61
62	/*
63	* Used to store the max resource name width to display the schemata names in
64	* a tabular format.
65	*/
66	int max_name_width;
67
68	static struct seq_buf last_cmd_status;
69
70	static char last_cmd_status_buf[`512`];
71
72	static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
73
74	static void rdtgroup_destroy_root(void);
75
76	struct dentry *debugfs_resctrl;
77
78	/*
79	* Memory bandwidth monitoring event to use for the default CTRL_MON group
80	* and each new CTRL_MON group created by the user. Only relevant when
81	* the filesystem is mounted with the "mba_MBps" option so it does not
82	* matter that it remains uninitialized on systems that do not support
83	* the "mba_MBps" option.
84	*/
85	enum resctrl_event_id mba_mbps_default_event;
86
87	static bool resctrl_debug;
88
89	void rdt_last_cmd_clear(void)
90	{
91	lockdep_assert_held(&rdtgroup_mutex);
92	seq_buf_clear(s: &last_cmd_status);
93	}
94
95	void rdt_last_cmd_puts(const char *s)
96	{
97	lockdep_assert_held(&rdtgroup_mutex);
98	seq_buf_puts(s: &last_cmd_status, str: s);
99	}
100
101	void rdt_last_cmd_printf(const char *fmt, ...)
102	{
103	va_list ap;
104
105	va_start(ap, fmt);
106	lockdep_assert_held(&rdtgroup_mutex);
107	seq_buf_vprintf(s: &last_cmd_status, fmt, args: ap);
108	va_end(ap);
109	}
110
111	void rdt_staged_configs_clear(void)
112	{
113	struct rdt_ctrl_domain *dom;
114	struct rdt_resource *r;
115
116	lockdep_assert_held(&rdtgroup_mutex);
117
118	for_each_alloc_capable_rdt_resource(r) {
119	list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
120	memset(dom->staged_config, `0`, sizeof(dom->staged_config));
121	}
122	}
123
124	static bool resctrl_is_mbm_enabled(void)
125	{
126	return (resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_TOTAL_EVENT_ID) \|\|
127	resctrl_is_mon_event_enabled(eventid: QOS_L3_MBM_LOCAL_EVENT_ID));
128	}
129
130	/*
131	* Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap
132	* of free CLOSIDs.
133	*
134	* Using a global CLOSID across all resources has some advantages and
135	* some drawbacks:
136	* + We can simply set current's closid to assign a task to a resource
137	* group.
138	* + Context switch code can avoid extra memory references deciding which
139	* CLOSID to load into the PQR_ASSOC MSR
140	* - We give up some options in configuring resource groups across multi-socket
141	* systems.
142	* - Our choices on how to configure each resource become progressively more
143	* limited as the number of resources grows.
144	*/
145	static unsigned long *closid_free_map;
146
147	static int closid_free_map_len;
148
149	int closids_supported(void)
150	{
151	return closid_free_map_len;
152	}
153
154	static int closid_init(void)
155	{
156	struct resctrl_schema *s;
157	u32 rdt_min_closid = ~`0`;
158
159	/ Monitor only platforms still call closid_init() /
160	if (list_empty(head: &resctrl_schema_all))
161	return `0`;
162
163	/ Compute rdt_min_closid across all resources /
164	list_for_each_entry(s, &resctrl_schema_all, list)
165	rdt_min_closid = min(rdt_min_closid, s->num_closid);
166
167	closid_free_map = bitmap_alloc(nbits: rdt_min_closid, GFP_KERNEL);
168	if (!closid_free_map)
169	return -ENOMEM;
170	bitmap_fill(dst: closid_free_map, nbits: rdt_min_closid);
171
172	/ RESCTRL_RESERVED_CLOSID is always reserved for the default group /
173	__clear_bit(RESCTRL_RESERVED_CLOSID, closid_free_map);
174	closid_free_map_len = rdt_min_closid;
175
176	return `0`;
177	}
178
179	static void closid_exit(void)
180	{
181	bitmap_free(bitmap: closid_free_map);
182	closid_free_map = NULL;
183	}
184
185	static int closid_alloc(void)
186	{
187	int cleanest_closid;
188	u32 closid;
189
190	lockdep_assert_held(&rdtgroup_mutex);
191
192	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
193	resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID)) {
194	cleanest_closid = resctrl_find_cleanest_closid();
195	if (cleanest_closid < `0`)
196	return cleanest_closid;
197	closid = cleanest_closid;
198	} else {
199	closid = find_first_bit(addr: closid_free_map, size: closid_free_map_len);
200	if (closid == closid_free_map_len)
201	return -ENOSPC;
202	}
203	__clear_bit(closid, closid_free_map);
204
205	return closid;
206	}
207
208	void closid_free(int closid)
209	{
210	lockdep_assert_held(&rdtgroup_mutex);
211
212	__set_bit(closid, closid_free_map);
213	}
214
215	/**
216	* closid_allocated - test if provided closid is in use
217	* @closid: closid to be tested
218	*
219	* Return: true if @closid is currently associated with a resource group,
220	* false if @closid is free
221	*/
222	bool closid_allocated(unsigned int closid)
223	{
224	lockdep_assert_held(&rdtgroup_mutex);
225
226	return !test_bit(closid, closid_free_map);
227	}
228
229	bool closid_alloc_fixed(u32 closid)
230	{
231	return __test_and_clear_bit(closid, closid_free_map);
232	}
233
234	/**
235	* rdtgroup_mode_by_closid - Return mode of resource group with closid
236	* @closid: closid if the resource group
237	*
238	* Each resource group is associated with a @closid. Here the mode
239	* of a resource group can be queried by searching for it using its closid.
240	*
241	* Return: mode as &enum rdtgrp_mode of resource group with closid @closid
242	*/
243	enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
244	{
245	struct rdtgroup *rdtgrp;
246
247	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
248	if (rdtgrp->closid == closid)
249	return rdtgrp->mode;
250	}
251
252	return RDT_NUM_MODES;
253	}
254
255	static const char * const rdt_mode_str[] = {
256	[RDT_MODE_SHAREABLE] = "shareable",
257	[RDT_MODE_EXCLUSIVE] = "exclusive",
258	[RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
259	[RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
260	};
261
262	/**
263	* rdtgroup_mode_str - Return the string representation of mode
264	* @mode: the resource group mode as &enum rdtgroup_mode
265	*
266	* Return: string representation of valid mode, "unknown" otherwise
267	*/
268	static const char rdtgroup_mode_str(enum* rdtgrp_mode mode)
269	{
270	if (mode < RDT_MODE_SHAREABLE \|\| mode >= RDT_NUM_MODES)
271	return "unknown";
272
273	return rdt_mode_str[mode];
274	}
275
276	/ set uid and gid of rdtgroup dirs and files to that of the creator /
277	static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
278	{
279	struct iattr iattr = { .ia_valid = ATTR_UID \| ATTR_GID,
280	.ia_uid = current_fsuid(),
281	.ia_gid = current_fsgid(), };
282
283	if (uid_eq(left: iattr.ia_uid, GLOBAL_ROOT_UID) &&
284	gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
285	return `0`;
286
287	return kernfs_setattr(kn, &iattr);
288	}
289
290	static int rdtgroup_add_file(struct kernfs_node parent_kn, struct* rftype *rft)
291	{
292	struct kernfs_node *kn;
293	int ret;
294
295	kn = __kernfs_create_file(parent: parent_kn, name: rft->name, mode: rft->mode,
296	GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
297	size: `0`, ops: rft->kf_ops, priv: rft, NULL, NULL);
298	if (IS_ERR(kn))
299	return PTR_ERR(kn);
300
301	ret = rdtgroup_kn_set_ugid(kn);
302	if (ret) {
303	kernfs_remove(kn);
304	return ret;
305	}
306
307	return `0`;
308	}
309
310	static int rdtgroup_seqfile_show(struct seq_file m, void* *arg)
311	{
312	struct kernfs_open_file *of = m->private;
313	struct rftype *rft = of->kn->priv;
314
315	if (rft->seq_show)
316	return rft->seq_show(of, m, arg);
317	return `0`;
318	}
319
320	static ssize_t rdtgroup_file_write(struct kernfs_open_file of, char* *buf,
321	size_t nbytes, loff_t off)
322	{
323	struct rftype *rft = of->kn->priv;
324
325	if (rft->write)
326	return rft->write(of, buf, nbytes, off);
327
328	return -EINVAL;
329	}
330
331	static const struct kernfs_ops rdtgroup_kf_single_ops = {
332	.atomic_write_len = PAGE_SIZE,
333	.write = rdtgroup_file_write,
334	.seq_show = rdtgroup_seqfile_show,
335	};
336
337	static const struct kernfs_ops kf_mondata_ops = {
338	.atomic_write_len = PAGE_SIZE,
339	.seq_show = rdtgroup_mondata_show,
340	};
341
342	static bool is_cpu_list(struct kernfs_open_file *of)
343	{
344	struct rftype *rft = of->kn->priv;
345
346	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
347	}
348
349	static int rdtgroup_cpus_show(struct kernfs_open_file *of,
350	struct seq_file s, void* *v)
351	{
352	struct rdtgroup *rdtgrp;
353	struct cpumask *mask;
354	int ret = `0`;
355
356	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
357
358	if (rdtgrp) {
359	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
360	if (!rdtgrp->plr->d) {
361	rdt_last_cmd_clear();
362	rdt_last_cmd_puts(s: "Cache domain offline\n");
363	ret = -ENODEV;
364	} else {
365	mask = &rdtgrp->plr->d->hdr.cpu_mask;
366	seq_printf(m: s, fmt: is_cpu_list(of) ?
367	"%pbl\n" : "%pb\n",
368	cpumask_pr_args(mask));
369	}
370	} else {
371	seq_printf(m: s, fmt: is_cpu_list(of) ? "%pbl\n" : "%pb\n",
372	cpumask_pr_args(&rdtgrp->cpu_mask));
373	}
374	} else {
375	ret = -ENOENT;
376	}
377	rdtgroup_kn_unlock(kn: of->kn);
378
379	return ret;
380	}
381
382	/*
383	* Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
384	*
385	* Per task closids/rmids must have been set up before calling this function.
386	* @r may be NULL.
387	*/
388	static void
389	update_closid_rmid(const struct cpumask cpu_mask, struct* rdtgroup *r)
390	{
391	struct resctrl_cpu_defaults defaults, *p = NULL;
392
393	if (r) {
394	defaults.closid = r->closid;
395	defaults.rmid = r->mon.rmid;
396	p = &defaults;
397	}
398
399	on_each_cpu_mask(mask: cpu_mask, func: resctrl_arch_sync_cpu_closid_rmid, info: p, wait: `1`);
400	}
401
402	static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
403	cpumask_var_t tmpmask)
404	{
405	struct rdtgroup prgrp = rdtgrp->mon.parent, crgrp;
406	struct list_head *head;
407
408	/ Check whether cpus belong to parent ctrl group /
409	cpumask_andnot(dstp: tmpmask, src1p: newmask, src2p: &prgrp->cpu_mask);
410	if (!cpumask_empty(srcp: tmpmask)) {
411	rdt_last_cmd_puts(s: "Can only add CPUs to mongroup that belong to parent\n");
412	return -EINVAL;
413	}
414
415	/ Check whether cpus are dropped from this group /
416	cpumask_andnot(dstp: tmpmask, src1p: &rdtgrp->cpu_mask, src2p: newmask);
417	if (!cpumask_empty(srcp: tmpmask)) {
418	/ Give any dropped cpus to parent rdtgroup /
419	cpumask_or(dstp: &prgrp->cpu_mask, src1p: &prgrp->cpu_mask, src2p: tmpmask);
420	update_closid_rmid(cpu_mask: tmpmask, r: prgrp);
421	}
422
423	/*
424	* If we added cpus, remove them from previous group that owned them
425	* and update per-cpu rmid
426	*/
427	cpumask_andnot(dstp: tmpmask, src1p: newmask, src2p: &rdtgrp->cpu_mask);
428	if (!cpumask_empty(srcp: tmpmask)) {
429	head = &prgrp->mon.crdtgrp_list;
430	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
431	if (crgrp == rdtgrp)
432	continue;
433	cpumask_andnot(dstp: &crgrp->cpu_mask, src1p: &crgrp->cpu_mask,
434	src2p: tmpmask);
435	}
436	update_closid_rmid(cpu_mask: tmpmask, r: rdtgrp);
437	}
438
439	/ Done pushing/pulling - update this group with new mask /
440	cpumask_copy(dstp: &rdtgrp->cpu_mask, srcp: newmask);
441
442	return `0`;
443	}
444
445	static void cpumask_rdtgrp_clear(struct rdtgroup r, struct* cpumask *m)
446	{
447	struct rdtgroup *crgrp;
448
449	cpumask_andnot(dstp: &r->cpu_mask, src1p: &r->cpu_mask, src2p: m);
450	/ update the child mon group masks as well/
451	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
452	cpumask_and(dstp: &crgrp->cpu_mask, src1p: &r->cpu_mask, src2p: &crgrp->cpu_mask);
453	}
454
455	static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
456	cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
457	{
458	struct rdtgroup r, crgrp;
459	struct list_head *head;
460
461	/ Check whether cpus are dropped from this group /
462	cpumask_andnot(dstp: tmpmask, src1p: &rdtgrp->cpu_mask, src2p: newmask);
463	if (!cpumask_empty(srcp: tmpmask)) {
464	/ Can't drop from default group /
465	if (rdtgrp == &rdtgroup_default) {
466	rdt_last_cmd_puts(s: "Can't drop CPUs from default group\n");
467	return -EINVAL;
468	}
469
470	/ Give any dropped cpus to rdtgroup_default /
471	cpumask_or(dstp: &rdtgroup_default.cpu_mask,
472	src1p: &rdtgroup_default.cpu_mask, src2p: tmpmask);
473	update_closid_rmid(cpu_mask: tmpmask, r: &rdtgroup_default);
474	}
475
476	/*
477	* If we added cpus, remove them from previous group and
478	* the prev group's child groups that owned them
479	* and update per-cpu closid/rmid.
480	*/
481	cpumask_andnot(dstp: tmpmask, src1p: newmask, src2p: &rdtgrp->cpu_mask);
482	if (!cpumask_empty(srcp: tmpmask)) {
483	list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
484	if (r == rdtgrp)
485	continue;
486	cpumask_and(dstp: tmpmask1, src1p: &r->cpu_mask, src2p: tmpmask);
487	if (!cpumask_empty(srcp: tmpmask1))
488	cpumask_rdtgrp_clear(r, m: tmpmask1);
489	}
490	update_closid_rmid(cpu_mask: tmpmask, r: rdtgrp);
491	}
492
493	/ Done pushing/pulling - update this group with new mask /
494	cpumask_copy(dstp: &rdtgrp->cpu_mask, srcp: newmask);
495
496	/*
497	* Clear child mon group masks since there is a new parent mask
498	* now and update the rmid for the cpus the child lost.
499	*/
500	head = &rdtgrp->mon.crdtgrp_list;
501	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
502	cpumask_and(dstp: tmpmask, src1p: &rdtgrp->cpu_mask, src2p: &crgrp->cpu_mask);
503	update_closid_rmid(cpu_mask: tmpmask, r: rdtgrp);
504	cpumask_clear(dstp: &crgrp->cpu_mask);
505	}
506
507	return `0`;
508	}
509
510	static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
511	char *buf, size_t nbytes, loff_t off)
512	{
513	cpumask_var_t tmpmask, newmask, tmpmask1;
514	struct rdtgroup *rdtgrp;
515	int ret;
516
517	if (!buf)
518	return -EINVAL;
519
520	if (!zalloc_cpumask_var(mask: &tmpmask, GFP_KERNEL))
521	return -ENOMEM;
522	if (!zalloc_cpumask_var(mask: &newmask, GFP_KERNEL)) {
523	free_cpumask_var(mask: tmpmask);
524	return -ENOMEM;
525	}
526	if (!zalloc_cpumask_var(mask: &tmpmask1, GFP_KERNEL)) {
527	free_cpumask_var(mask: tmpmask);
528	free_cpumask_var(mask: newmask);
529	return -ENOMEM;
530	}
531
532	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
533	if (!rdtgrp) {
534	ret = -ENOENT;
535	goto unlock;
536	}
537
538	rdt_last_cmd_clear();
539
540	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED \|\|
541	rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
542	ret = -EINVAL;
543	rdt_last_cmd_puts(s: "Pseudo-locking in progress\n");
544	goto unlock;
545	}
546
547	if (is_cpu_list(of))
548	ret = cpulist_parse(buf, dstp: newmask);
549	else
550	ret = cpumask_parse(buf, dstp: newmask);
551
552	if (ret) {
553	rdt_last_cmd_puts(s: "Bad CPU list/mask\n");
554	goto unlock;
555	}
556
557	/ check that user didn't specify any offline cpus /
558	cpumask_andnot(dstp: tmpmask, src1p: newmask, cpu_online_mask);
559	if (!cpumask_empty(srcp: tmpmask)) {
560	ret = -EINVAL;
561	rdt_last_cmd_puts(s: "Can only assign online CPUs\n");
562	goto unlock;
563	}
564
565	if (rdtgrp->type == RDTCTRL_GROUP)
566	ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
567	else if (rdtgrp->type == RDTMON_GROUP)
568	ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
569	else
570	ret = -EINVAL;
571
572	unlock:
573	rdtgroup_kn_unlock(kn: of->kn);
574	free_cpumask_var(mask: tmpmask);
575	free_cpumask_var(mask: newmask);
576	free_cpumask_var(mask: tmpmask1);
577
578	return ret ?: nbytes;
579	}
580
581	/**
582	* rdtgroup_remove - the helper to remove resource group safely
583	* @rdtgrp: resource group to remove
584	*
585	* On resource group creation via a mkdir, an extra kernfs_node reference is
586	* taken to ensure that the rdtgroup structure remains accessible for the
587	* rdtgroup_kn_unlock() calls where it is removed.
588	*
589	* Drop the extra reference here, then free the rdtgroup structure.
590	*
591	* Return: void
592	*/
593	static void rdtgroup_remove(struct rdtgroup *rdtgrp)
594	{
595	kernfs_put(kn: rdtgrp->kn);
596	kfree(objp: rdtgrp);
597	}
598
599	static void _update_task_closid_rmid(void *task)
600	{
601	/*
602	* If the task is still current on this CPU, update PQR_ASSOC MSR.
603	* Otherwise, the MSR is updated when the task is scheduled in.
604	*/
605	if (task == current)
606	resctrl_arch_sched_in(tsk: task);
607	}
608
609	static void update_task_closid_rmid(struct task_struct *t)
610	{
611	if (IS_ENABLED(CONFIG_SMP) && task_curr(p: t))
612	smp_call_function_single(cpuid: task_cpu(p: t), func: _update_task_closid_rmid, info: t, wait: `1`);
613	else
614	_update_task_closid_rmid(task: t);
615	}
616
617	static bool task_in_rdtgroup(struct task_struct tsk, struct* rdtgroup *rdtgrp)
618	{
619	u32 closid, rmid = rdtgrp->mon.rmid;
620
621	if (rdtgrp->type == RDTCTRL_GROUP)
622	closid = rdtgrp->closid;
623	else if (rdtgrp->type == RDTMON_GROUP)
624	closid = rdtgrp->mon.parent->closid;
625	else
626	return false;
627
628	return resctrl_arch_match_closid(tsk, closid) &&
629	resctrl_arch_match_rmid(tsk, ignored: closid, rmid);
630	}
631
632	static int __rdtgroup_move_task(struct task_struct *tsk,
633	struct rdtgroup *rdtgrp)
634	{
635	/ If the task is already in rdtgrp, no need to move the task. /
636	if (task_in_rdtgroup(tsk, rdtgrp))
637	return `0`;
638
639	/*
640	* Set the task's closid/rmid before the PQR_ASSOC MSR can be
641	* updated by them.
642	*
643	* For ctrl_mon groups, move both closid and rmid.
644	* For monitor groups, can move the tasks only from
645	* their parent CTRL group.
646	*/
647	if (rdtgrp->type == RDTMON_GROUP &&
648	!resctrl_arch_match_closid(tsk, closid: rdtgrp->mon.parent->closid)) {
649	rdt_last_cmd_puts(s: "Can't move task to different control group\n");
650	return -EINVAL;
651	}
652
653	if (rdtgrp->type == RDTMON_GROUP)
654	resctrl_arch_set_closid_rmid(tsk, closid: rdtgrp->mon.parent->closid,
655	rmid: rdtgrp->mon.rmid);
656	else
657	resctrl_arch_set_closid_rmid(tsk, closid: rdtgrp->closid,
658	rmid: rdtgrp->mon.rmid);
659
660	/*
661	* Ensure the task's closid and rmid are written before determining if
662	* the task is current that will decide if it will be interrupted.
663	* This pairs with the full barrier between the rq->curr update and
664	* resctrl_arch_sched_in() during context switch.
665	*/
666	smp_mb();
667
668	/*
669	* By now, the task's closid and rmid are set. If the task is current
670	* on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
671	* group go into effect. If the task is not current, the MSR will be
672	* updated when the task is scheduled in.
673	*/
674	update_task_closid_rmid(t: tsk);
675
676	return `0`;
677	}
678
679	static bool is_closid_match(struct task_struct t, struct* rdtgroup *r)
680	{
681	return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
682	resctrl_arch_match_closid(tsk: t, closid: r->closid));
683	}
684
685	static bool is_rmid_match(struct task_struct t, struct* rdtgroup *r)
686	{
687	return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
688	resctrl_arch_match_rmid(tsk: t, ignored: r->mon.parent->closid,
689	rmid: r->mon.rmid));
690	}
691
692	/**
693	* rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
694	* @r: Resource group
695	*
696	* Return: 1 if tasks have been assigned to @r, 0 otherwise
697	*/
698	int rdtgroup_tasks_assigned(struct rdtgroup *r)
699	{
700	struct task_struct p, t;
701	int ret = `0`;
702
703	lockdep_assert_held(&rdtgroup_mutex);
704
705	rcu_read_lock();
706	for_each_process_thread(p, t) {
707	if (is_closid_match(t, r) \|\| is_rmid_match(t, r)) {
708	ret = `1`;
709	break;
710	}
711	}
712	rcu_read_unlock();
713
714	return ret;
715	}
716
717	static int rdtgroup_task_write_permission(struct task_struct *task,
718	struct kernfs_open_file *of)
719	{
720	const struct cred *tcred = get_task_cred(task);
721	const struct cred *cred = current_cred();
722	int ret = `0`;
723
724	/*
725	* Even if we're attaching all tasks in the thread group, we only
726	* need to check permissions on one of them.
727	*/
728	if (!uid_eq(left: cred->euid, GLOBAL_ROOT_UID) &&
729	!uid_eq(cred->euid, tcred->uid) &&
730	!uid_eq(cred->euid, tcred->suid)) {
731	rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
732	ret = -EPERM;
733	}
734
735	put_cred(tcred);
736	return ret;
737	}
738
739	static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
740	struct kernfs_open_file *of)
741	{
742	struct task_struct *tsk;
743	int ret;
744
745	rcu_read_lock();
746	if (pid) {
747	tsk = find_task_by_vpid(nr: pid);
748	if (!tsk) {
749	rcu_read_unlock();
750	rdt_last_cmd_printf(fmt: "No task %d\n", pid);
751	return -ESRCH;
752	}
753	} else {
754	tsk = current;
755	}
756
757	get_task_struct(t: tsk);
758	rcu_read_unlock();
759
760	ret = rdtgroup_task_write_permission(task: tsk, of);
761	if (!ret)
762	ret = __rdtgroup_move_task(tsk, rdtgrp);
763
764	put_task_struct(t: tsk);
765	return ret;
766	}
767
768	static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
769	char *buf, size_t nbytes, loff_t off)
770	{
771	struct rdtgroup *rdtgrp;
772	char *pid_str;
773	int ret = `0`;
774	pid_t pid;
775
776	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
777	if (!rdtgrp) {
778	rdtgroup_kn_unlock(kn: of->kn);
779	return -ENOENT;
780	}
781	rdt_last_cmd_clear();
782
783	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED \|\|
784	rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
785	ret = -EINVAL;
786	rdt_last_cmd_puts(s: "Pseudo-locking in progress\n");
787	goto unlock;
788	}
789
790	while (buf && buf[`0`] != `'\0'` && buf[`0`] != `'\n'`) {
791	pid_str = strim(strsep(&buf, ","));
792
793	if (kstrtoint(s: pid_str, base: `0`, res: &pid)) {
794	rdt_last_cmd_printf(fmt: "Task list parsing error pid %s\n", pid_str);
795	ret = -EINVAL;
796	break;
797	}
798
799	if (pid < `0`) {
800	rdt_last_cmd_printf(fmt: "Invalid pid %d\n", pid);
801	ret = -EINVAL;
802	break;
803	}
804
805	ret = rdtgroup_move_task(pid, rdtgrp, of);
806	if (ret) {
807	rdt_last_cmd_printf(fmt: "Error while processing task %d\n", pid);
808	break;
809	}
810	}
811
812	unlock:
813	rdtgroup_kn_unlock(kn: of->kn);
814
815	return ret ?: nbytes;
816	}
817
818	static void show_rdt_tasks(struct rdtgroup r, struct* seq_file *s)
819	{
820	struct task_struct p, t;
821	pid_t pid;
822
823	rcu_read_lock();
824	for_each_process_thread(p, t) {
825	if (is_closid_match(t, r) \|\| is_rmid_match(t, r)) {
826	pid = task_pid_vnr(tsk: t);
827	if (pid)
828	seq_printf(m: s, fmt: "%d\n", pid);
829	}
830	}
831	rcu_read_unlock();
832	}
833
834	static int rdtgroup_tasks_show(struct kernfs_open_file *of,
835	struct seq_file s, void* *v)
836	{
837	struct rdtgroup *rdtgrp;
838	int ret = `0`;
839
840	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
841	if (rdtgrp)
842	show_rdt_tasks(r: rdtgrp, s);
843	else
844	ret = -ENOENT;
845	rdtgroup_kn_unlock(kn: of->kn);
846
847	return ret;
848	}
849
850	static int rdtgroup_closid_show(struct kernfs_open_file *of,
851	struct seq_file s, void* *v)
852	{
853	struct rdtgroup *rdtgrp;
854	int ret = `0`;
855
856	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
857	if (rdtgrp)
858	seq_printf(m: s, fmt: "%u\n", rdtgrp->closid);
859	else
860	ret = -ENOENT;
861	rdtgroup_kn_unlock(kn: of->kn);
862
863	return ret;
864	}
865
866	static int rdtgroup_rmid_show(struct kernfs_open_file *of,
867	struct seq_file s, void* *v)
868	{
869	struct rdtgroup *rdtgrp;
870	int ret = `0`;
871
872	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
873	if (rdtgrp)
874	seq_printf(m: s, fmt: "%u\n", rdtgrp->mon.rmid);
875	else
876	ret = -ENOENT;
877	rdtgroup_kn_unlock(kn: of->kn);
878
879	return ret;
880	}
881
882	#ifdef CONFIG_PROC_CPU_RESCTRL
883	/*
884	* A task can only be part of one resctrl control group and of one monitor
885	* group which is associated to that control group.
886	*
887	* 1) res:
888	* mon:
889	*
890	* resctrl is not available.
891	*
892	* 2) res:/
893	* mon:
894	*
895	* Task is part of the root resctrl control group, and it is not associated
896	* to any monitor group.
897	*
898	* 3) res:/
899	* mon:mon0
900	*
901	* Task is part of the root resctrl control group and monitor group mon0.
902	*
903	* 4) res:group0
904	* mon:
905	*
906	* Task is part of resctrl control group group0, and it is not associated
907	* to any monitor group.
908	*
909	* 5) res:group0
910	* mon:mon1
911	*
912	* Task is part of resctrl control group group0 and monitor group mon1.
913	*/
914	int proc_resctrl_show(struct seq_file s, struct* pid_namespace *ns,
915	struct pid pid, struct* task_struct *tsk)
916	{
917	struct rdtgroup *rdtg;
918	int ret = `0`;
919
920	mutex_lock(&rdtgroup_mutex);
921
922	/ Return empty if resctrl has not been mounted. /
923	if (!resctrl_mounted) {
924	seq_puts(m: s, s: "res:\nmon:\n");
925	goto unlock;
926	}
927
928	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
929	struct rdtgroup *crg;
930
931	/*
932	* Task information is only relevant for shareable
933	* and exclusive groups.
934	*/
935	if (rdtg->mode != RDT_MODE_SHAREABLE &&
936	rdtg->mode != RDT_MODE_EXCLUSIVE)
937	continue;
938
939	if (!resctrl_arch_match_closid(tsk, closid: rdtg->closid))
940	continue;
941
942	seq_printf(m: s, fmt: "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
943	rdt_kn_name(kn: rdtg->kn));
944	seq_puts(m: s, s: "mon:");
945	list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
946	mon.crdtgrp_list) {
947	if (!resctrl_arch_match_rmid(tsk, ignored: crg->mon.parent->closid,
948	rmid: crg->mon.rmid))
949	continue;
950	seq_printf(m: s, fmt: "%s", rdt_kn_name(kn: crg->kn));
951	break;
952	}
953	seq_putc(m: s, c: `'\n'`);
954	goto unlock;
955	}
956	/*
957	* The above search should succeed. Otherwise return
958	* with an error.
959	*/
960	ret = -ENOENT;
961	unlock:
962	mutex_unlock(lock: &rdtgroup_mutex);
963
964	return ret;
965	}
966	#endif
967
968	static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
969	struct seq_file seq, void* *v)
970	{
971	int len;
972
973	mutex_lock(&rdtgroup_mutex);
974	len = seq_buf_used(s: &last_cmd_status);
975	if (len)
976	seq_printf(m: seq, fmt: "%.*s", len, last_cmd_status_buf);
977	else
978	seq_puts(m: seq, s: "ok\n");
979	mutex_unlock(lock: &rdtgroup_mutex);
980	return `0`;
981	}
982
983	void rdt_kn_parent_priv(struct* kernfs_node *kn)
984	{
985	/*
986	* The parent pointer is only valid within RCU section since it can be
987	* replaced.
988	*/
989	guard(rcu)();
990	return rcu_dereference(kn->__parent)->priv;
991	}
992
993	static int rdt_num_closids_show(struct kernfs_open_file *of,
994	struct seq_file seq, void* *v)
995	{
996	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
997
998	seq_printf(m: seq, fmt: "%u\n", s->num_closid);
999	return `0`;
1000	}
1001
1002	static int rdt_default_ctrl_show(struct kernfs_open_file *of,
1003	struct seq_file seq, void* *v)
1004	{
1005	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1006	struct rdt_resource *r = s->res;
1007
1008	seq_printf(m: seq, fmt: "%x\n", resctrl_get_default_ctrl(r));
1009	return `0`;
1010	}
1011
1012	static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
1013	struct seq_file seq, void* *v)
1014	{
1015	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1016	struct rdt_resource *r = s->res;
1017
1018	seq_printf(m: seq, fmt: "%u\n", r->cache.min_cbm_bits);
1019	return `0`;
1020	}
1021
1022	static int rdt_shareable_bits_show(struct kernfs_open_file *of,
1023	struct seq_file seq, void* *v)
1024	{
1025	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1026	struct rdt_resource *r = s->res;
1027
1028	seq_printf(m: seq, fmt: "%x\n", r->cache.shareable_bits);
1029	return `0`;
1030	}
1031
1032	/*
1033	* rdt_bit_usage_show - Display current usage of resources
1034	*
1035	* A domain is a shared resource that can now be allocated differently. Here
1036	* we display the current regions of the domain as an annotated bitmask.
1037	* For each domain of this resource its allocation bitmask
1038	* is annotated as below to indicate the current usage of the corresponding bit:
1039	* 0 - currently unused
1040	* X - currently available for sharing and used by software and hardware
1041	* H - currently used by hardware only but available for software use
1042	* S - currently used and shareable by software only
1043	* E - currently used exclusively by one resource group
1044	* P - currently pseudo-locked by one resource group
1045	*/
1046	static int rdt_bit_usage_show(struct kernfs_open_file *of,
1047	struct seq_file seq, void* *v)
1048	{
1049	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1050	/*
1051	* Use unsigned long even though only 32 bits are used to ensure
1052	* test_bit() is used safely.
1053	*/
1054	unsigned long sw_shareable = `0`, hw_shareable = `0`;
1055	unsigned long exclusive = `0`, pseudo_locked = `0`;
1056	struct rdt_resource *r = s->res;
1057	struct rdt_ctrl_domain *dom;
1058	int i, hwb, swb, excl, psl;
1059	enum rdtgrp_mode mode;
1060	bool sep = false;
1061	u32 ctrl_val;
1062
1063	cpus_read_lock();
1064	mutex_lock(&rdtgroup_mutex);
1065	list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
1066	if (sep)
1067	seq_putc(m: seq, c: `';'`);
1068	hw_shareable = r->cache.shareable_bits;
1069	sw_shareable = `0`;
1070	exclusive = `0`;
1071	seq_printf(m: seq, fmt: "%d=", dom->hdr.id);
1072	for (i = `0`; i < closids_supported(); i++) {
1073	if (!closid_allocated(closid: i) \|\|
1074	(resctrl_arch_get_io_alloc_enabled(r) &&
1075	i == resctrl_io_alloc_closid(r)))
1076	continue;
1077	ctrl_val = resctrl_arch_get_config(r, d: dom, closid: i,
1078	type: s->conf_type);
1079	mode = rdtgroup_mode_by_closid(closid: i);
1080	switch (mode) {
1081	case RDT_MODE_SHAREABLE:
1082	sw_shareable \|= ctrl_val;
1083	break;
1084	case RDT_MODE_EXCLUSIVE:
1085	exclusive \|= ctrl_val;
1086	break;
1087	case RDT_MODE_PSEUDO_LOCKSETUP:
1088	/*
1089	* RDT_MODE_PSEUDO_LOCKSETUP is possible
1090	* here but not included since the CBM
1091	* associated with this CLOSID in this mode
1092	* is not initialized and no task or cpu can be
1093	* assigned this CLOSID.
1094	*/
1095	break;
1096	case RDT_MODE_PSEUDO_LOCKED:
1097	case RDT_NUM_MODES:
1098	WARN(`1`,
1099	"invalid mode for closid %d\n", i);
1100	break;
1101	}
1102	}
1103
1104	/*
1105	* When the "io_alloc" feature is enabled, a portion of the cache
1106	* is configured for shared use between hardware and software.
1107	* Also, when CDP is enabled the CBMs of CDP_CODE and CDP_DATA
1108	* resources are kept in sync. So, the CBMs for "io_alloc" can
1109	* be accessed through either resource.
1110	*/
1111	if (resctrl_arch_get_io_alloc_enabled(r)) {
1112	ctrl_val = resctrl_arch_get_config(r, d: dom,
1113	closid: resctrl_io_alloc_closid(r),
1114	type: s->conf_type);
1115	hw_shareable \|= ctrl_val;
1116	}
1117
1118	for (i = r->cache.cbm_len - `1`; i >= `0`; i--) {
1119	pseudo_locked = dom->plr ? dom->plr->cbm : `0`;
1120	hwb = test_bit(i, &hw_shareable);
1121	swb = test_bit(i, &sw_shareable);
1122	excl = test_bit(i, &exclusive);
1123	psl = test_bit(i, &pseudo_locked);
1124	if (hwb && swb)
1125	seq_putc(m: seq, c: `'X'`);
1126	else if (hwb && !swb)
1127	seq_putc(m: seq, c: `'H'`);
1128	else if (!hwb && swb)
1129	seq_putc(m: seq, c: `'S'`);
1130	else if (excl)
1131	seq_putc(m: seq, c: `'E'`);
1132	else if (psl)
1133	seq_putc(m: seq, c: `'P'`);
1134	else / Unused bits remain /
1135	seq_putc(m: seq, c: `'0'`);
1136	}
1137	sep = true;
1138	}
1139	seq_putc(m: seq, c: `'\n'`);
1140	mutex_unlock(lock: &rdtgroup_mutex);
1141	cpus_read_unlock();
1142	return `0`;
1143	}
1144
1145	static int rdt_min_bw_show(struct kernfs_open_file *of,
1146	struct seq_file seq, void* *v)
1147	{
1148	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1149	struct rdt_resource *r = s->res;
1150
1151	seq_printf(m: seq, fmt: "%u\n", r->membw.min_bw);
1152	return `0`;
1153	}
1154
1155	static int rdt_num_rmids_show(struct kernfs_open_file *of,
1156	struct seq_file seq, void* *v)
1157	{
1158	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1159
1160	seq_printf(m: seq, fmt: "%d\n", r->mon.num_rmid);
1161
1162	return `0`;
1163	}
1164
1165	static int rdt_mon_features_show(struct kernfs_open_file *of,
1166	struct seq_file seq, void* *v)
1167	{
1168	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1169	struct mon_evt *mevt;
1170
1171	for_each_mon_event(mevt) {
1172	if (mevt->rid != r->rid \|\| !mevt->enabled)
1173	continue;
1174	seq_printf(m: seq, fmt: "%s\n", mevt->name);
1175	if (mevt->configurable &&
1176	!resctrl_arch_mbm_cntr_assign_enabled(r))
1177	seq_printf(m: seq, fmt: "%s_config\n", mevt->name);
1178	}
1179
1180	return `0`;
1181	}
1182
1183	static int rdt_bw_gran_show(struct kernfs_open_file *of,
1184	struct seq_file seq, void* *v)
1185	{
1186	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1187	struct rdt_resource *r = s->res;
1188
1189	seq_printf(m: seq, fmt: "%u\n", r->membw.bw_gran);
1190	return `0`;
1191	}
1192
1193	static int rdt_delay_linear_show(struct kernfs_open_file *of,
1194	struct seq_file seq, void* *v)
1195	{
1196	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1197	struct rdt_resource *r = s->res;
1198
1199	seq_printf(m: seq, fmt: "%u\n", r->membw.delay_linear);
1200	return `0`;
1201	}
1202
1203	static int max_threshold_occ_show(struct kernfs_open_file *of,
1204	struct seq_file seq, void* *v)
1205	{
1206	seq_printf(m: seq, fmt: "%u\n", resctrl_rmid_realloc_threshold);
1207
1208	return `0`;
1209	}
1210
1211	static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1212	struct seq_file seq, void* *v)
1213	{
1214	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1215	struct rdt_resource *r = s->res;
1216
1217	switch (r->membw.throttle_mode) {
1218	case THREAD_THROTTLE_PER_THREAD:
1219	seq_puts(m: seq, s: "per-thread\n");
1220	return `0`;
1221	case THREAD_THROTTLE_MAX:
1222	seq_puts(m: seq, s: "max\n");
1223	return `0`;
1224	case THREAD_THROTTLE_UNDEFINED:
1225	seq_puts(m: seq, s: "undefined\n");
1226	return `0`;
1227	}
1228
1229	WARN_ON_ONCE(`1`);
1230
1231	return `0`;
1232	}
1233
1234	static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1235	char *buf, size_t nbytes, loff_t off)
1236	{
1237	unsigned int bytes;
1238	int ret;
1239
1240	ret = kstrtouint(s: buf, base: `0`, res: &bytes);
1241	if (ret)
1242	return ret;
1243
1244	if (bytes > resctrl_rmid_realloc_limit)
1245	return -EINVAL;
1246
1247	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(val: bytes);
1248
1249	return nbytes;
1250	}
1251
1252	/*
1253	* rdtgroup_mode_show - Display mode of this resource group
1254	*/
1255	static int rdtgroup_mode_show(struct kernfs_open_file *of,
1256	struct seq_file s, void* *v)
1257	{
1258	struct rdtgroup *rdtgrp;
1259
1260	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
1261	if (!rdtgrp) {
1262	rdtgroup_kn_unlock(kn: of->kn);
1263	return -ENOENT;
1264	}
1265
1266	seq_printf(m: s, fmt: "%s\n", rdtgroup_mode_str(mode: rdtgrp->mode));
1267
1268	rdtgroup_kn_unlock(kn: of->kn);
1269	return `0`;
1270	}
1271
1272	enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1273	{
1274	switch (my_type) {
1275	case CDP_CODE:
1276	return CDP_DATA;
1277	case CDP_DATA:
1278	return CDP_CODE;
1279	default:
1280	case CDP_NONE:
1281	return CDP_NONE;
1282	}
1283	}
1284
1285	static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
1286	struct seq_file seq, void* *v)
1287	{
1288	struct resctrl_schema *s = rdt_kn_parent_priv(kn: of->kn);
1289	struct rdt_resource *r = s->res;
1290
1291	seq_printf(m: seq, fmt: "%u\n", r->cache.arch_has_sparse_bitmasks);
1292
1293	return `0`;
1294	}
1295
1296	/**
1297	* __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1298	* @r: Resource to which domain instance @d belongs.
1299	* @d: The domain instance for which @closid is being tested.
1300	* @cbm: Capacity bitmask being tested.
1301	* @closid: Intended closid for @cbm.
1302	* @type: CDP type of @r.
1303	* @exclusive: Only check if overlaps with exclusive resource groups
1304	*
1305	* Checks if provided @cbm intended to be used for @closid on domain
1306	* @d overlaps with any other closids or other hardware usage associated
1307	* with this domain. If @exclusive is true then only overlaps with
1308	* resource groups in exclusive mode will be considered. If @exclusive
1309	* is false then overlaps with any resource group or hardware entities
1310	* will be considered.
1311	*
1312	* @cbm is unsigned long, even if only 32 bits are used, to make the
1313	* bitmap functions work correctly.
1314	*
1315	* Return: false if CBM does not overlap, true if it does.
1316	*/
1317	static bool __rdtgroup_cbm_overlaps(struct rdt_resource r, struct* rdt_ctrl_domain *d,
1318	unsigned long cbm, int closid,
1319	enum resctrl_conf_type type, bool exclusive)
1320	{
1321	enum rdtgrp_mode mode;
1322	unsigned long ctrl_b;
1323	int i;
1324
1325	/ Check for any overlap with regions used by hardware directly /
1326	if (!exclusive) {
1327	ctrl_b = r->cache.shareable_bits;
1328	if (bitmap_intersects(src1: &cbm, src2: &ctrl_b, nbits: r->cache.cbm_len))
1329	return true;
1330	}
1331
1332	/ Check for overlap with other resource groups /
1333	for (i = `0`; i < closids_supported(); i++) {
1334	ctrl_b = resctrl_arch_get_config(r, d, closid: i, type);
1335	mode = rdtgroup_mode_by_closid(closid: i);
1336	if (closid_allocated(closid: i) && i != closid &&
1337	mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1338	if (bitmap_intersects(src1: &cbm, src2: &ctrl_b, nbits: r->cache.cbm_len)) {
1339	if (exclusive) {
1340	if (mode == RDT_MODE_EXCLUSIVE)
1341	return true;
1342	continue;
1343	}
1344	return true;
1345	}
1346	}
1347	}
1348
1349	return false;
1350	}
1351
1352	/**
1353	* rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1354	* @s: Schema for the resource to which domain instance @d belongs.
1355	* @d: The domain instance for which @closid is being tested.
1356	* @cbm: Capacity bitmask being tested.
1357	* @closid: Intended closid for @cbm.
1358	* @exclusive: Only check if overlaps with exclusive resource groups
1359	*
1360	* Resources that can be allocated using a CBM can use the CBM to control
1361	* the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1362	* for overlap. Overlap test is not limited to the specific resource for
1363	* which the CBM is intended though - when dealing with CDP resources that
1364	* share the underlying hardware the overlap check should be performed on
1365	* the CDP resource sharing the hardware also.
1366	*
1367	* Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1368	* overlap test.
1369	*
1370	* Return: true if CBM overlap detected, false if there is no overlap
1371	*/
1372	bool rdtgroup_cbm_overlaps(struct resctrl_schema s, struct* rdt_ctrl_domain *d,
1373	unsigned long cbm, int closid, bool exclusive)
1374	{
1375	enum resctrl_conf_type peer_type = resctrl_peer_type(my_type: s->conf_type);
1376	struct rdt_resource *r = s->res;
1377
1378	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, type: s->conf_type,
1379	exclusive))
1380	return true;
1381
1382	if (!resctrl_arch_get_cdp_enabled(l: r->rid))
1383	return false;
1384	return __rdtgroup_cbm_overlaps(r, d, cbm, closid, type: peer_type, exclusive);
1385	}
1386
1387	/**
1388	* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1389	* @rdtgrp: Resource group identified through its closid.
1390	*
1391	* An exclusive resource group implies that there should be no sharing of
1392	* its allocated resources. At the time this group is considered to be
1393	* exclusive this test can determine if its current schemata supports this
1394	* setting by testing for overlap with all other resource groups.
1395	*
1396	* Return: true if resource group can be exclusive, false if there is overlap
1397	* with allocations of other resource groups and thus this resource group
1398	* cannot be exclusive.
1399	*/
1400	static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1401	{
1402	int closid = rdtgrp->closid;
1403	struct rdt_ctrl_domain *d;
1404	struct resctrl_schema *s;
1405	struct rdt_resource *r;
1406	bool has_cache = false;
1407	u32 ctrl;
1408
1409	/ Walking r->domains, ensure it can't race with cpuhp /
1410	lockdep_assert_cpus_held();
1411
1412	list_for_each_entry(s, &resctrl_schema_all, list) {
1413	r = s->res;
1414	if (r->rid == RDT_RESOURCE_MBA \|\| r->rid == RDT_RESOURCE_SMBA)
1415	continue;
1416	has_cache = true;
1417	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1418	ctrl = resctrl_arch_get_config(r, d, closid,
1419	type: s->conf_type);
1420	if (rdtgroup_cbm_overlaps(s, d, cbm: ctrl, closid, exclusive: false)) {
1421	rdt_last_cmd_puts(s: "Schemata overlaps\n");
1422	return false;
1423	}
1424	}
1425	}
1426
1427	if (!has_cache) {
1428	rdt_last_cmd_puts(s: "Cannot be exclusive without CAT/CDP\n");
1429	return false;
1430	}
1431
1432	return true;
1433	}
1434
1435	/*
1436	* rdtgroup_mode_write - Modify the resource group's mode
1437	*/
1438	static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1439	char *buf, size_t nbytes, loff_t off)
1440	{
1441	struct rdtgroup *rdtgrp;
1442	enum rdtgrp_mode mode;
1443	int ret = `0`;
1444
1445	/ Valid input requires a trailing newline /
1446	if (nbytes == `0` \|\| buf[nbytes - `1`] != `'\n'`)
1447	return -EINVAL;
1448	buf[nbytes - `1`] = `'\0'`;
1449
1450	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
1451	if (!rdtgrp) {
1452	rdtgroup_kn_unlock(kn: of->kn);
1453	return -ENOENT;
1454	}
1455
1456	rdt_last_cmd_clear();
1457
1458	mode = rdtgrp->mode;
1459
1460	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) \|\|
1461	(!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) \|\|
1462	(!strcmp(buf, "pseudo-locksetup") &&
1463	mode == RDT_MODE_PSEUDO_LOCKSETUP) \|\|
1464	(!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1465	goto out;
1466
1467	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1468	rdt_last_cmd_puts(s: "Cannot change pseudo-locked group\n");
1469	ret = -EINVAL;
1470	goto out;
1471	}
1472
1473	if (!strcmp(buf, "shareable")) {
1474	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1475	ret = rdtgroup_locksetup_exit(rdtgrp);
1476	if (ret)
1477	goto out;
1478	}
1479	rdtgrp->mode = RDT_MODE_SHAREABLE;
1480	} else if (!strcmp(buf, "exclusive")) {
1481	if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1482	ret = -EINVAL;
1483	goto out;
1484	}
1485	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1486	ret = rdtgroup_locksetup_exit(rdtgrp);
1487	if (ret)
1488	goto out;
1489	}
1490	rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1491	} else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
1492	!strcmp(buf, "pseudo-locksetup")) {
1493	ret = rdtgroup_locksetup_enter(rdtgrp);
1494	if (ret)
1495	goto out;
1496	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1497	} else {
1498	rdt_last_cmd_puts(s: "Unknown or unsupported mode\n");
1499	ret = -EINVAL;
1500	}
1501
1502	out:
1503	rdtgroup_kn_unlock(kn: of->kn);
1504	return ret ?: nbytes;
1505	}
1506
1507	/**
1508	* rdtgroup_cbm_to_size - Translate CBM to size in bytes
1509	* @r: RDT resource to which @d belongs.
1510	* @d: RDT domain instance.
1511	* @cbm: bitmask for which the size should be computed.
1512	*
1513	* The bitmask provided associated with the RDT domain instance @d will be
1514	* translated into how many bytes it represents. The size in bytes is
1515	* computed by first dividing the total cache size by the CBM length to
1516	* determine how many bytes each bit in the bitmask represents. The result
1517	* is multiplied with the number of bits set in the bitmask.
1518	*
1519	* @cbm is unsigned long, even if only 32 bits are used to make the
1520	* bitmap functions work correctly.
1521	*/
1522	unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1523	struct rdt_ctrl_domain d, unsigned* long cbm)
1524	{
1525	unsigned int size = `0`;
1526	struct cacheinfo *ci;
1527	int num_b;
1528
1529	if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
1530	return size;
1531
1532	num_b = bitmap_weight(src: &cbm, nbits: r->cache.cbm_len);
1533	ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), level: r->ctrl_scope);
1534	if (ci)
1535	size = ci->size / r->cache.cbm_len * num_b;
1536
1537	return size;
1538	}
1539
1540	bool is_mba_sc(struct rdt_resource *r)
1541	{
1542	if (!r)
1543	r = resctrl_arch_get_resource(l: RDT_RESOURCE_MBA);
1544
1545	/*
1546	* The software controller support is only applicable to MBA resource.
1547	* Make sure to check for resource type.
1548	*/
1549	if (r->rid != RDT_RESOURCE_MBA)
1550	return false;
1551
1552	return r->membw.mba_sc;
1553	}
1554
1555	/*
1556	* rdtgroup_size_show - Display size in bytes of allocated regions
1557	*
1558	* The "size" file mirrors the layout of the "schemata" file, printing the
1559	* size in bytes of each region instead of the capacity bitmask.
1560	*/
1561	static int rdtgroup_size_show(struct kernfs_open_file *of,
1562	struct seq_file s, void* *v)
1563	{
1564	struct resctrl_schema *schema;
1565	enum resctrl_conf_type type;
1566	struct rdt_ctrl_domain *d;
1567	struct rdtgroup *rdtgrp;
1568	struct rdt_resource *r;
1569	unsigned int size;
1570	int ret = `0`;
1571	u32 closid;
1572	bool sep;
1573	u32 ctrl;
1574
1575	rdtgrp = rdtgroup_kn_lock_live(kn: of->kn);
1576	if (!rdtgrp) {
1577	rdtgroup_kn_unlock(kn: of->kn);
1578	return -ENOENT;
1579	}
1580
1581	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1582	if (!rdtgrp->plr->d) {
1583	rdt_last_cmd_clear();
1584	rdt_last_cmd_puts(s: "Cache domain offline\n");
1585	ret = -ENODEV;
1586	} else {
1587	seq_printf(m: s, fmt: "%*s:", max_name_width,
1588	rdtgrp->plr->s->name);
1589	size = rdtgroup_cbm_to_size(r: rdtgrp->plr->s->res,
1590	d: rdtgrp->plr->d,
1591	cbm: rdtgrp->plr->cbm);
1592	seq_printf(m: s, fmt: "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
1593	}
1594	goto out;
1595	}
1596
1597	closid = rdtgrp->closid;
1598
1599	list_for_each_entry(schema, &resctrl_schema_all, list) {
1600	r = schema->res;
1601	type = schema->conf_type;
1602	sep = false;
1603	seq_printf(m: s, fmt: "%*s:", max_name_width, schema->name);
1604	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
1605	if (sep)
1606	seq_putc(m: s, c: `';'`);
1607	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1608	size = `0`;
1609	} else {
1610	if (is_mba_sc(r))
1611	ctrl = d->mbps_val[closid];
1612	else
1613	ctrl = resctrl_arch_get_config(r, d,
1614	closid,
1615	type);
1616	if (r->rid == RDT_RESOURCE_MBA \|\|
1617	r->rid == RDT_RESOURCE_SMBA)
1618	size = ctrl;
1619	else
1620	size = rdtgroup_cbm_to_size(r, d, cbm: ctrl);
1621	}
1622	seq_printf(m: s, fmt: "%d=%u", d->hdr.id, size);
1623	sep = true;
1624	}
1625	seq_putc(m: s, c: `'\n'`);
1626	}
1627
1628	out:
1629	rdtgroup_kn_unlock(kn: of->kn);
1630
1631	return ret;
1632	}
1633
1634	static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
1635	{
1636	smp_call_function_any(mask: &mon_info->d->hdr.cpu_mask,
1637	func: resctrl_arch_mon_event_config_read, info: mon_info, wait: `1`);
1638	}
1639
1640	static int mbm_config_show(struct seq_file s, struct* rdt_resource *r, u32 evtid)
1641	{
1642	struct resctrl_mon_config_info mon_info;
1643	struct rdt_mon_domain *dom;
1644	bool sep = false;
1645
1646	cpus_read_lock();
1647	mutex_lock(&rdtgroup_mutex);
1648
1649	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
1650	if (sep)
1651	seq_puts(m: s, s: ";");
1652
1653	memset(&mon_info, `0`, sizeof(struct resctrl_mon_config_info));
1654	mon_info.r = r;
1655	mon_info.d = dom;
1656	mon_info.evtid = evtid;
1657	mondata_config_read(mon_info: &mon_info);
1658
1659	seq_printf(m: s, fmt: "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
1660	sep = true;
1661	}
1662	seq_puts(m: s, s: "\n");
1663
1664	mutex_unlock(lock: &rdtgroup_mutex);
1665	cpus_read_unlock();
1666
1667	return `0`;
1668	}
1669
1670	static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1671	struct seq_file seq, void* *v)
1672	{
1673	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1674
1675	mbm_config_show(s: seq, r, evtid: QOS_L3_MBM_TOTAL_EVENT_ID);
1676
1677	return `0`;
1678	}
1679
1680	static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1681	struct seq_file seq, void* *v)
1682	{
1683	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1684
1685	mbm_config_show(s: seq, r, evtid: QOS_L3_MBM_LOCAL_EVENT_ID);
1686
1687	return `0`;
1688	}
1689
1690	static void mbm_config_write_domain(struct rdt_resource *r,
1691	struct rdt_mon_domain *d, u32 evtid, u32 val)
1692	{
1693	struct resctrl_mon_config_info mon_info = {`0`};
1694
1695	/*
1696	* Read the current config value first. If both are the same then
1697	* no need to write it again.
1698	*/
1699	mon_info.r = r;
1700	mon_info.d = d;
1701	mon_info.evtid = evtid;
1702	mondata_config_read(mon_info: &mon_info);
1703	if (mon_info.mon_config == val)
1704	return;
1705
1706	mon_info.mon_config = val;
1707
1708	/*
1709	* Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1710	* domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1711	* are scoped at the domain level. Writing any of these MSRs
1712	* on one CPU is observed by all the CPUs in the domain.
1713	*/
1714	smp_call_function_any(mask: &d->hdr.cpu_mask, func: resctrl_arch_mon_event_config_write,
1715	info: &mon_info, wait: `1`);
1716
1717	/*
1718	* When an Event Configuration is changed, the bandwidth counters
1719	* for all RMIDs and Events will be cleared by the hardware. The
1720	* hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1721	* every RMID on the next read to any event for every RMID.
1722	* Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1723	* cleared while it is tracked by the hardware. Clear the
1724	* mbm_local and mbm_total counts for all the RMIDs.
1725	*/
1726	resctrl_arch_reset_rmid_all(r, d);
1727	}
1728
1729	static int mon_config_write(struct rdt_resource r, char* *tok, u32 evtid)
1730	{
1731	char dom_str = NULL, id_str;
1732	unsigned long dom_id, val;
1733	struct rdt_mon_domain *d;
1734
1735	/ Walking r->domains, ensure it can't race with cpuhp /
1736	lockdep_assert_cpus_held();
1737
1738	next:
1739	if (!tok \|\| tok[`0`] == `'\0'`)
1740	return `0`;
1741
1742	/ Start processing the strings for each domain /
1743	dom_str = strim(strsep(&tok, ";"));
1744	id_str = strsep(&dom_str, "=");
1745
1746	if (!id_str \|\| kstrtoul(s: id_str, base: `10`, res: &dom_id)) {
1747	rdt_last_cmd_puts(s: "Missing '=' or non-numeric domain id\n");
1748	return -EINVAL;
1749	}
1750
1751	if (!dom_str \|\| kstrtoul(s: dom_str, base: `16`, res: &val)) {
1752	rdt_last_cmd_puts(s: "Non-numeric event configuration value\n");
1753	return -EINVAL;
1754	}
1755
1756	/ Value from user cannot be more than the supported set of events /
1757	if ((val & r->mon.mbm_cfg_mask) != val) {
1758	rdt_last_cmd_printf(fmt: "Invalid event configuration: max valid mask is 0x%02x\n",
1759	r->mon.mbm_cfg_mask);
1760	return -EINVAL;
1761	}
1762
1763	list_for_each_entry(d, &r->mon_domains, hdr.list) {
1764	if (d->hdr.id == dom_id) {
1765	mbm_config_write_domain(r, d, evtid, val);
1766	goto next;
1767	}
1768	}
1769
1770	return -EINVAL;
1771	}
1772
1773	static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1774	char *buf, size_t nbytes,
1775	loff_t off)
1776	{
1777	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1778	int ret;
1779
1780	/ Valid input requires a trailing newline /
1781	if (nbytes == `0` \|\| buf[nbytes - `1`] != `'\n'`)
1782	return -EINVAL;
1783
1784	cpus_read_lock();
1785	mutex_lock(&rdtgroup_mutex);
1786
1787	rdt_last_cmd_clear();
1788
1789	buf[nbytes - `1`] = `'\0'`;
1790
1791	ret = mon_config_write(r, tok: buf, evtid: QOS_L3_MBM_TOTAL_EVENT_ID);
1792
1793	mutex_unlock(lock: &rdtgroup_mutex);
1794	cpus_read_unlock();
1795
1796	return ret ?: nbytes;
1797	}
1798
1799	static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1800	char *buf, size_t nbytes,
1801	loff_t off)
1802	{
1803	struct rdt_resource *r = rdt_kn_parent_priv(kn: of->kn);
1804	int ret;
1805
1806	/ Valid input requires a trailing newline /
1807	if (nbytes == `0` \|\| buf[nbytes - `1`] != `'\n'`)
1808	return -EINVAL;
1809
1810	cpus_read_lock();
1811	mutex_lock(&rdtgroup_mutex);
1812
1813	rdt_last_cmd_clear();
1814
1815	buf[nbytes - `1`] = `'\0'`;
1816
1817	ret = mon_config_write(r, tok: buf, evtid: QOS_L3_MBM_LOCAL_EVENT_ID);
1818
1819	mutex_unlock(lock: &rdtgroup_mutex);
1820	cpus_read_unlock();
1821
1822	return ret ?: nbytes;
1823	}
1824
1825	/*
1826	* resctrl_bmec_files_show() — Controls the visibility of BMEC-related resctrl
1827	* files. When @show is true, the files are displayed; when false, the files
1828	* are hidden.
1829	* Don't treat kernfs_find_and_get failure as an error, since this function may
1830	* be called regardless of whether BMEC is supported or the event is enabled.
1831	*/
1832	void resctrl_bmec_files_show(struct rdt_resource r, struct* kernfs_node *l3_mon_kn,
1833	bool show)
1834	{
1835	struct kernfs_node kn_config, mon_kn = NULL;
1836	char name[`32`];
1837
1838	if (!l3_mon_kn) {
1839	sprintf(buf: name, fmt: "%s_MON", r->name);
1840	mon_kn = kernfs_find_and_get(kn: kn_info, name);
1841	if (!mon_kn)
1842	return;
1843	l3_mon_kn = mon_kn;
1844	}
1845
1846	kn_config = kernfs_find_and_get(kn: l3_mon_kn, name: "mbm_total_bytes_config");
1847	if (kn_config) {
1848	kernfs_show(kn: kn_config, show);
1849	kernfs_put(kn: kn_config);
1850	}
1851
1852	kn_config = kernfs_find_and_get(kn: l3_mon_kn, name: "mbm_local_bytes_config");
1853	if (kn_config) {
1854	kernfs_show(kn: kn_config, show);
1855	kernfs_put(kn: kn_config);
1856	}
1857
1858	/ Release the reference only if it was acquired /
1859	if (mon_kn)
1860	kernfs_put(kn: mon_kn);
1861	}
1862
1863	const char *rdtgroup_name_by_closid(u32 closid)
1864	{
1865	struct rdtgroup *rdtgrp;
1866
1867	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
1868	if (rdtgrp->closid == closid)
1869	return rdt_kn_name(kn: rdtgrp->kn);
1870	}
1871
1872	return NULL;
1873	}
1874
1875	/ rdtgroup information files for one cache resource. /
1876	static struct rftype res_common_files[] = {
1877	{
1878	.name = "last_cmd_status",
1879	.mode = `0444`,
1880	.kf_ops = &rdtgroup_kf_single_ops,
1881	.seq_show = rdt_last_cmd_status_show,
1882	.fflags = RFTYPE_TOP_INFO,
1883	},
1884	{
1885	.name = "mbm_assign_on_mkdir",
1886	.mode = `0644`,
1887	.kf_ops = &rdtgroup_kf_single_ops,
1888	.seq_show = resctrl_mbm_assign_on_mkdir_show,
1889	.write = resctrl_mbm_assign_on_mkdir_write,
1890	},
1891	{
1892	.name = "num_closids",
1893	.mode = `0444`,
1894	.kf_ops = &rdtgroup_kf_single_ops,
1895	.seq_show = rdt_num_closids_show,
1896	.fflags = RFTYPE_CTRL_INFO,
1897	},
1898	{
1899	.name = "mon_features",
1900	.mode = `0444`,
1901	.kf_ops = &rdtgroup_kf_single_ops,
1902	.seq_show = rdt_mon_features_show,
1903	.fflags = RFTYPE_MON_INFO,
1904	},
1905	{
1906	.name = "available_mbm_cntrs",
1907	.mode = `0444`,
1908	.kf_ops = &rdtgroup_kf_single_ops,
1909	.seq_show = resctrl_available_mbm_cntrs_show,
1910	},
1911	{
1912	.name = "num_rmids",
1913	.mode = `0444`,
1914	.kf_ops = &rdtgroup_kf_single_ops,
1915	.seq_show = rdt_num_rmids_show,
1916	.fflags = RFTYPE_MON_INFO,
1917	},
1918	{
1919	.name = "cbm_mask",
1920	.mode = `0444`,
1921	.kf_ops = &rdtgroup_kf_single_ops,
1922	.seq_show = rdt_default_ctrl_show,
1923	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE,
1924	},
1925	{
1926	.name = "num_mbm_cntrs",
1927	.mode = `0444`,
1928	.kf_ops = &rdtgroup_kf_single_ops,
1929	.seq_show = resctrl_num_mbm_cntrs_show,
1930	},
1931	{
1932	.name = "min_cbm_bits",
1933	.mode = `0444`,
1934	.kf_ops = &rdtgroup_kf_single_ops,
1935	.seq_show = rdt_min_cbm_bits_show,
1936	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE,
1937	},
1938	{
1939	.name = "shareable_bits",
1940	.mode = `0444`,
1941	.kf_ops = &rdtgroup_kf_single_ops,
1942	.seq_show = rdt_shareable_bits_show,
1943	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE,
1944	},
1945	{
1946	.name = "bit_usage",
1947	.mode = `0444`,
1948	.kf_ops = &rdtgroup_kf_single_ops,
1949	.seq_show = rdt_bit_usage_show,
1950	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE,
1951	},
1952	{
1953	.name = "min_bandwidth",
1954	.mode = `0444`,
1955	.kf_ops = &rdtgroup_kf_single_ops,
1956	.seq_show = rdt_min_bw_show,
1957	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_MB,
1958	},
1959	{
1960	.name = "bandwidth_gran",
1961	.mode = `0444`,
1962	.kf_ops = &rdtgroup_kf_single_ops,
1963	.seq_show = rdt_bw_gran_show,
1964	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_MB,
1965	},
1966	{
1967	.name = "delay_linear",
1968	.mode = `0444`,
1969	.kf_ops = &rdtgroup_kf_single_ops,
1970	.seq_show = rdt_delay_linear_show,
1971	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_MB,
1972	},
1973	/*
1974	* Platform specific which (if any) capabilities are provided by
1975	* thread_throttle_mode. Defer "fflags" initialization to platform
1976	* discovery.
1977	*/
1978	{
1979	.name = "thread_throttle_mode",
1980	.mode = `0444`,
1981	.kf_ops = &rdtgroup_kf_single_ops,
1982	.seq_show = rdt_thread_throttle_mode_show,
1983	},
1984	{
1985	.name = "io_alloc",
1986	.mode = `0644`,
1987	.kf_ops = &rdtgroup_kf_single_ops,
1988	.seq_show = resctrl_io_alloc_show,
1989	.write = resctrl_io_alloc_write,
1990	},
1991	{
1992	.name = "io_alloc_cbm",
1993	.mode = `0644`,
1994	.kf_ops = &rdtgroup_kf_single_ops,
1995	.seq_show = resctrl_io_alloc_cbm_show,
1996	.write = resctrl_io_alloc_cbm_write,
1997	},
1998	{
1999	.name = "max_threshold_occupancy",
2000	.mode = `0644`,
2001	.kf_ops = &rdtgroup_kf_single_ops,
2002	.write = max_threshold_occ_write,
2003	.seq_show = max_threshold_occ_show,
2004	.fflags = RFTYPE_MON_INFO \| RFTYPE_RES_CACHE,
2005	},
2006	{
2007	.name = "mbm_total_bytes_config",
2008	.mode = `0644`,
2009	.kf_ops = &rdtgroup_kf_single_ops,
2010	.seq_show = mbm_total_bytes_config_show,
2011	.write = mbm_total_bytes_config_write,
2012	},
2013	{
2014	.name = "mbm_local_bytes_config",
2015	.mode = `0644`,
2016	.kf_ops = &rdtgroup_kf_single_ops,
2017	.seq_show = mbm_local_bytes_config_show,
2018	.write = mbm_local_bytes_config_write,
2019	},
2020	{
2021	.name = "event_filter",
2022	.mode = `0644`,
2023	.kf_ops = &rdtgroup_kf_single_ops,
2024	.seq_show = event_filter_show,
2025	.write = event_filter_write,
2026	},
2027	{
2028	.name = "mbm_L3_assignments",
2029	.mode = `0644`,
2030	.kf_ops = &rdtgroup_kf_single_ops,
2031	.seq_show = mbm_L3_assignments_show,
2032	.write = mbm_L3_assignments_write,
2033	},
2034	{
2035	.name = "mbm_assign_mode",
2036	.mode = `0644`,
2037	.kf_ops = &rdtgroup_kf_single_ops,
2038	.seq_show = resctrl_mbm_assign_mode_show,
2039	.write = resctrl_mbm_assign_mode_write,
2040	.fflags = RFTYPE_MON_INFO \| RFTYPE_RES_CACHE,
2041	},
2042	{
2043	.name = "cpus",
2044	.mode = `0644`,
2045	.kf_ops = &rdtgroup_kf_single_ops,
2046	.write = rdtgroup_cpus_write,
2047	.seq_show = rdtgroup_cpus_show,
2048	.fflags = RFTYPE_BASE,
2049	},
2050	{
2051	.name = "cpus_list",
2052	.mode = `0644`,
2053	.kf_ops = &rdtgroup_kf_single_ops,
2054	.write = rdtgroup_cpus_write,
2055	.seq_show = rdtgroup_cpus_show,
2056	.flags = RFTYPE_FLAGS_CPUS_LIST,
2057	.fflags = RFTYPE_BASE,
2058	},
2059	{
2060	.name = "tasks",
2061	.mode = `0644`,
2062	.kf_ops = &rdtgroup_kf_single_ops,
2063	.write = rdtgroup_tasks_write,
2064	.seq_show = rdtgroup_tasks_show,
2065	.fflags = RFTYPE_BASE,
2066	},
2067	{
2068	.name = "mon_hw_id",
2069	.mode = `0444`,
2070	.kf_ops = &rdtgroup_kf_single_ops,
2071	.seq_show = rdtgroup_rmid_show,
2072	.fflags = RFTYPE_MON_BASE \| RFTYPE_DEBUG,
2073	},
2074	{
2075	.name = "schemata",
2076	.mode = `0644`,
2077	.kf_ops = &rdtgroup_kf_single_ops,
2078	.write = rdtgroup_schemata_write,
2079	.seq_show = rdtgroup_schemata_show,
2080	.fflags = RFTYPE_CTRL_BASE,
2081	},
2082	{
2083	.name = "mba_MBps_event",
2084	.mode = `0644`,
2085	.kf_ops = &rdtgroup_kf_single_ops,
2086	.write = rdtgroup_mba_mbps_event_write,
2087	.seq_show = rdtgroup_mba_mbps_event_show,
2088	},
2089	{
2090	.name = "mode",
2091	.mode = `0644`,
2092	.kf_ops = &rdtgroup_kf_single_ops,
2093	.write = rdtgroup_mode_write,
2094	.seq_show = rdtgroup_mode_show,
2095	.fflags = RFTYPE_CTRL_BASE,
2096	},
2097	{
2098	.name = "size",
2099	.mode = `0444`,
2100	.kf_ops = &rdtgroup_kf_single_ops,
2101	.seq_show = rdtgroup_size_show,
2102	.fflags = RFTYPE_CTRL_BASE,
2103	},
2104	{
2105	.name = "sparse_masks",
2106	.mode = `0444`,
2107	.kf_ops = &rdtgroup_kf_single_ops,
2108	.seq_show = rdt_has_sparse_bitmasks_show,
2109	.fflags = RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE,
2110	},
2111	{
2112	.name = "ctrl_hw_id",
2113	.mode = `0444`,
2114	.kf_ops = &rdtgroup_kf_single_ops,
2115	.seq_show = rdtgroup_closid_show,
2116	.fflags = RFTYPE_CTRL_BASE \| RFTYPE_DEBUG,
2117	},
2118	};
2119
2120	static int rdtgroup_add_files(struct kernfs_node kn, unsigned* long fflags)
2121	{
2122	struct rftype rfts, rft;
2123	int ret, len;
2124
2125	rfts = res_common_files;
2126	len = ARRAY_SIZE(res_common_files);
2127
2128	lockdep_assert_held(&rdtgroup_mutex);
2129
2130	if (resctrl_debug)
2131	fflags \|= RFTYPE_DEBUG;
2132
2133	for (rft = rfts; rft < rfts + len; rft++) {
2134	if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
2135	ret = rdtgroup_add_file(parent_kn: kn, rft);
2136	if (ret)
2137	goto error;
2138	}
2139	}
2140
2141	return `0`;
2142	error:
2143	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
2144	while (--rft >= rfts) {
2145	if ((fflags & rft->fflags) == rft->fflags)
2146	kernfs_remove_by_name(parent: kn, name: rft->name);
2147	}
2148	return ret;
2149	}
2150
2151	static struct rftype rdtgroup_get_rftype_by_name(const* char *name)
2152	{
2153	struct rftype rfts, rft;
2154	int len;
2155
2156	rfts = res_common_files;
2157	len = ARRAY_SIZE(res_common_files);
2158
2159	for (rft = rfts; rft < rfts + len; rft++) {
2160	if (!strcmp(rft->name, name))
2161	return rft;
2162	}
2163
2164	return NULL;
2165	}
2166
2167	static void thread_throttle_mode_init(void)
2168	{
2169	enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
2170	struct rdt_resource r_mba, r_smba;
2171
2172	r_mba = resctrl_arch_get_resource(l: RDT_RESOURCE_MBA);
2173	if (r_mba->alloc_capable &&
2174	r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2175	throttle_mode = r_mba->membw.throttle_mode;
2176
2177	r_smba = resctrl_arch_get_resource(l: RDT_RESOURCE_SMBA);
2178	if (r_smba->alloc_capable &&
2179	r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
2180	throttle_mode = r_smba->membw.throttle_mode;
2181
2182	if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
2183	return;
2184
2185	resctrl_file_fflags_init(config: "thread_throttle_mode",
2186	RFTYPE_CTRL_INFO \| RFTYPE_RES_MB);
2187	}
2188
2189	/*
2190	* The resctrl file "io_alloc" is added using L3 resource. However, it results
2191	* in this file being visible for all cache resources (eg. L2 cache),
2192	* whether it supports "io_alloc" or not.
2193	*/
2194	static void io_alloc_init(void)
2195	{
2196	struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3);
2197
2198	if (r->cache.io_alloc_capable) {
2199	resctrl_file_fflags_init(config: "io_alloc", RFTYPE_CTRL_INFO \|
2200	RFTYPE_RES_CACHE);
2201	resctrl_file_fflags_init(config: "io_alloc_cbm",
2202	RFTYPE_CTRL_INFO \| RFTYPE_RES_CACHE);
2203	}
2204	}
2205
2206	void resctrl_file_fflags_init(const char config, unsigned* long fflags)
2207	{
2208	struct rftype *rft;
2209
2210	rft = rdtgroup_get_rftype_by_name(name: config);
2211	if (rft)
2212	rft->fflags = fflags;
2213	}
2214
2215	/**
2216	* rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
2217	* @r: The resource group with which the file is associated.
2218	* @name: Name of the file
2219	*
2220	* The permissions of named resctrl file, directory, or link are modified
2221	* to not allow read, write, or execute by any user.
2222	*
2223	* WARNING: This function is intended to communicate to the user that the
2224	* resctrl file has been locked down - that it is not relevant to the
2225	* particular state the system finds itself in. It should not be relied
2226	* on to protect from user access because after the file's permissions
2227	* are restricted the user can still change the permissions using chmod
2228	* from the command line.
2229	*
2230	* Return: 0 on success, <0 on failure.
2231	*/
2232	int rdtgroup_kn_mode_restrict(struct rdtgroup r, const* char *name)
2233	{
2234	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2235	struct kernfs_node *kn;
2236	int ret = `0`;
2237
2238	kn = kernfs_find_and_get_ns(parent: r->kn, name, NULL);
2239	if (!kn)
2240	return -ENOENT;
2241
2242	switch (kernfs_type(kn)) {
2243	case KERNFS_DIR:
2244	iattr.ia_mode = S_IFDIR;
2245	break;
2246	case KERNFS_FILE:
2247	iattr.ia_mode = S_IFREG;
2248	break;
2249	case KERNFS_LINK:
2250	iattr.ia_mode = S_IFLNK;
2251	break;
2252	}
2253
2254	ret = kernfs_setattr(kn, iattr: &iattr);
2255	kernfs_put(kn);
2256	return ret;
2257	}
2258
2259	/**
2260	* rdtgroup_kn_mode_restore - Restore user access to named resctrl file
2261	* @r: The resource group with which the file is associated.
2262	* @name: Name of the file
2263	* @mask: Mask of permissions that should be restored
2264	*
2265	* Restore the permissions of the named file. If @name is a directory the
2266	* permissions of its parent will be used.
2267	*
2268	* Return: 0 on success, <0 on failure.
2269	*/
2270	int rdtgroup_kn_mode_restore(struct rdtgroup r, const* char *name,
2271	umode_t mask)
2272	{
2273	struct iattr iattr = {.ia_valid = ATTR_MODE,};
2274	struct kernfs_node kn, parent;
2275	struct rftype rfts, rft;
2276	int ret, len;
2277
2278	rfts = res_common_files;
2279	len = ARRAY_SIZE(res_common_files);
2280
2281	for (rft = rfts; rft < rfts + len; rft++) {
2282	if (!strcmp(rft->name, name))
2283	iattr.ia_mode = rft->mode & mask;
2284	}
2285
2286	kn = kernfs_find_and_get_ns(parent: r->kn, name, NULL);
2287	if (!kn)
2288	return -ENOENT;
2289
2290	switch (kernfs_type(kn)) {
2291	case KERNFS_DIR:
2292	parent = kernfs_get_parent(kn);
2293	if (parent) {
2294	iattr.ia_mode \|= parent->mode;
2295	kernfs_put(kn: parent);
2296	}
2297	iattr.ia_mode \|= S_IFDIR;
2298	break;
2299	case KERNFS_FILE:
2300	iattr.ia_mode \|= S_IFREG;
2301	break;
2302	case KERNFS_LINK:
2303	iattr.ia_mode \|= S_IFLNK;
2304	break;
2305	}
2306
2307	ret = kernfs_setattr(kn, iattr: &iattr);
2308	kernfs_put(kn);
2309	return ret;
2310	}
2311
2312	static int resctrl_mkdir_event_configs(struct rdt_resource r, struct* kernfs_node *l3_mon_kn)
2313	{
2314	struct kernfs_node kn_subdir, kn_subdir2;
2315	struct mon_evt *mevt;
2316	int ret;
2317
2318	kn_subdir = kernfs_create_dir(parent: l3_mon_kn, name: "event_configs", mode: l3_mon_kn->mode, NULL);
2319	if (IS_ERR(ptr: kn_subdir))
2320	return PTR_ERR(ptr: kn_subdir);
2321
2322	ret = rdtgroup_kn_set_ugid(kn: kn_subdir);
2323	if (ret)
2324	return ret;
2325
2326	for_each_mon_event(mevt) {
2327	if (mevt->rid != r->rid \|\| !mevt->enabled \|\| !resctrl_is_mbm_event(eventid: mevt->evtid))
2328	continue;
2329
2330	kn_subdir2 = kernfs_create_dir(parent: kn_subdir, name: mevt->name, mode: kn_subdir->mode, priv: mevt);
2331	if (IS_ERR(ptr: kn_subdir2)) {
2332	ret = PTR_ERR(ptr: kn_subdir2);
2333	goto out;
2334	}
2335
2336	ret = rdtgroup_kn_set_ugid(kn: kn_subdir2);
2337	if (ret)
2338	goto out;
2339
2340	ret = rdtgroup_add_files(kn: kn_subdir2, RFTYPE_ASSIGN_CONFIG);
2341	if (ret)
2342	break;
2343	}
2344
2345	out:
2346	return ret;
2347	}
2348
2349	static int rdtgroup_mkdir_info_resdir(void priv, char* *name,
2350	unsigned long fflags)
2351	{
2352	struct kernfs_node *kn_subdir;
2353	struct rdt_resource *r;
2354	int ret;
2355
2356	kn_subdir = kernfs_create_dir(parent: kn_info, name,
2357	mode: kn_info->mode, priv);
2358	if (IS_ERR(ptr: kn_subdir))
2359	return PTR_ERR(ptr: kn_subdir);
2360
2361	ret = rdtgroup_kn_set_ugid(kn: kn_subdir);
2362	if (ret)
2363	return ret;
2364
2365	ret = rdtgroup_add_files(kn: kn_subdir, fflags);
2366	if (ret)
2367	return ret;
2368
2369	if ((fflags & RFTYPE_MON_INFO) == RFTYPE_MON_INFO) {
2370	r = priv;
2371	if (r->mon.mbm_cntr_assignable) {
2372	ret = resctrl_mkdir_event_configs(r, l3_mon_kn: kn_subdir);
2373	if (ret)
2374	return ret;
2375	/*
2376	* Hide BMEC related files if mbm_event mode
2377	* is enabled.
2378	*/
2379	if (resctrl_arch_mbm_cntr_assign_enabled(r))
2380	resctrl_bmec_files_show(r, l3_mon_kn: kn_subdir, show: false);
2381	}
2382	}
2383
2384	kernfs_activate(kn: kn_subdir);
2385
2386	return ret;
2387	}
2388
2389	static unsigned long fflags_from_resource(struct rdt_resource *r)
2390	{
2391	switch (r->rid) {
2392	case RDT_RESOURCE_L3:
2393	case RDT_RESOURCE_L2:
2394	return RFTYPE_RES_CACHE;
2395	case RDT_RESOURCE_MBA:
2396	case RDT_RESOURCE_SMBA:
2397	return RFTYPE_RES_MB;
2398	}
2399
2400	return WARN_ON_ONCE(`1`);
2401	}
2402
2403	static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2404	{
2405	struct resctrl_schema *s;
2406	struct rdt_resource *r;
2407	unsigned long fflags;
2408	char name[`32`];
2409	int ret;
2410
2411	/ create the directory /
2412	kn_info = kernfs_create_dir(parent: parent_kn, name: "info", mode: parent_kn->mode, NULL);
2413	if (IS_ERR(ptr: kn_info))
2414	return PTR_ERR(ptr: kn_info);
2415
2416	ret = rdtgroup_add_files(kn: kn_info, RFTYPE_TOP_INFO);
2417	if (ret)
2418	goto out_destroy;
2419
2420	/ loop over enabled controls, these are all alloc_capable /
2421	list_for_each_entry(s, &resctrl_schema_all, list) {
2422	r = s->res;
2423	fflags = fflags_from_resource(r) \| RFTYPE_CTRL_INFO;
2424	ret = rdtgroup_mkdir_info_resdir(priv: s, name: s->name, fflags);
2425	if (ret)
2426	goto out_destroy;
2427	}
2428
2429	for_each_mon_capable_rdt_resource(r) {
2430	fflags = fflags_from_resource(r) \| RFTYPE_MON_INFO;
2431	sprintf(buf: name, fmt: "%s_MON", r->name);
2432	ret = rdtgroup_mkdir_info_resdir(priv: r, name, fflags);
2433	if (ret)
2434	goto out_destroy;
2435	}
2436
2437	ret = rdtgroup_kn_set_ugid(kn: kn_info);
2438	if (ret)
2439	goto out_destroy;
2440
2441	kernfs_activate(kn: kn_info);
2442
2443	return `0`;
2444
2445	out_destroy:
2446	kernfs_remove(kn: kn_info);
2447	return ret;
2448	}
2449
2450	static int
2451	mongroup_create_dir(struct kernfs_node parent_kn, struct* rdtgroup *prgrp,
2452	char name, struct* kernfs_node **dest_kn)
2453	{
2454	struct kernfs_node *kn;
2455	int ret;
2456
2457	/ create the directory /
2458	kn = kernfs_create_dir(parent: parent_kn, name, mode: parent_kn->mode, priv: prgrp);
2459	if (IS_ERR(ptr: kn))
2460	return PTR_ERR(ptr: kn);
2461
2462	if (dest_kn)
2463	*dest_kn = kn;
2464
2465	ret = rdtgroup_kn_set_ugid(kn);
2466	if (ret)
2467	goto out_destroy;
2468
2469	kernfs_activate(kn);
2470
2471	return `0`;
2472
2473	out_destroy:
2474	kernfs_remove(kn);
2475	return ret;
2476	}
2477
2478	static inline bool is_mba_linear(void)
2479	{
2480	return resctrl_arch_get_resource(l: RDT_RESOURCE_MBA)->membw.delay_linear;
2481	}
2482
2483	static int mba_sc_domain_allocate(struct rdt_resource r, struct* rdt_ctrl_domain *d)
2484	{
2485	u32 num_closid = resctrl_arch_get_num_closid(r);
2486	int cpu = cpumask_any(&d->hdr.cpu_mask);
2487	int i;
2488
2489	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2490	GFP_KERNEL, cpu_to_node(cpu));
2491	if (!d->mbps_val)
2492	return -ENOMEM;
2493
2494	for (i = `0`; i < num_closid; i++)
2495	d->mbps_val[i] = MBA_MAX_MBPS;
2496
2497	return `0`;
2498	}
2499
2500	static void mba_sc_domain_destroy(struct rdt_resource *r,
2501	struct rdt_ctrl_domain *d)
2502	{
2503	kfree(objp: d->mbps_val);
2504	d->mbps_val = NULL;
2505	}
2506
2507	/*
2508	* MBA software controller is supported only if
2509	* MBM is supported and MBA is in linear scale,
2510	* and the MBM monitor scope is the same as MBA
2511	* control scope.
2512	*/
2513	static bool supports_mba_mbps(void)
2514	{
2515	struct rdt_resource *rmbm = resctrl_arch_get_resource(l: RDT_RESOURCE_L3);
2516	struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_MBA);
2517
2518	return (resctrl_is_mbm_enabled() &&
2519	r->alloc_capable && is_mba_linear() &&
2520	r->ctrl_scope == rmbm->mon_scope);
2521	}
2522
2523	/*
2524	* Enable or disable the MBA software controller
2525	* which helps user specify bandwidth in MBps.
2526	*/
2527	static int set_mba_sc(bool mba_sc)
2528	{
2529	struct rdt_resource *r = resctrl_arch_get_resource(l: RDT_RESOURCE_MBA);
2530	u32 num_closid = resctrl_arch_get_num_closid(r);
2531	struct rdt_ctrl_domain *d;
2532	unsigned long fflags;
2533	int i;
2534
2535	if (!supports_mba_mbps() \|\| mba_sc == is_mba_sc(r))
2536	return -EINVAL;
2537
2538	r->membw.mba_sc = mba_sc;
2539
2540	rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
2541
2542	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
2543	for (i = `0`; i < num_closid; i++)
2544	d->mbps_val[i] = MBA_MAX_MBPS;
2545	}
2546
2547	fflags = mba_sc ? RFTYPE_CTRL_BASE \| RFTYPE_MON_BASE : `0`;
2548	resctrl_file_fflags_init(config: "mba_MBps_event", fflags);
2549
2550	return `0`;
2551	}
2552
2553	/*
2554	* We don't allow rdtgroup directories to be created anywhere
2555	* except the root directory. Thus when looking for the rdtgroup
2556	* structure for a kernfs node we are either looking at a directory,
2557	* in which case the rdtgroup structure is pointed at by the "priv"
2558	* field, otherwise we have a file, and need only look to the parent
2559	* to find the rdtgroup.
2560	*/
2561	static struct rdtgroup kernfs_to_rdtgroup(struct* kernfs_node *kn)
2562	{
2563	if (kernfs_type(kn) == KERNFS_DIR) {
2564	/*
2565	* All the resource directories use "kn->priv"
2566	* to point to the "struct rdtgroup" for the
2567	* resource. "info" and its subdirectories don't
2568	* have rdtgroup structures, so return NULL here.
2569	*/
2570	if (kn == kn_info \|\|
2571	rcu_access_pointer(kn->__parent) == kn_info)
2572	return NULL;
2573	else
2574	return kn->priv;
2575	} else {
2576	return rdt_kn_parent_priv(kn);
2577	}
2578	}
2579
2580	static void rdtgroup_kn_get(struct rdtgroup rdtgrp, struct* kernfs_node *kn)
2581	{
2582	atomic_inc(v: &rdtgrp->waitcount);
2583	kernfs_break_active_protection(kn);
2584	}
2585
2586	static void rdtgroup_kn_put(struct rdtgroup rdtgrp, struct* kernfs_node *kn)
2587	{
2588	if (atomic_dec_and_test(v: &rdtgrp->waitcount) &&
2589	(rdtgrp->flags & RDT_DELETED)) {
2590	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP \|\|
2591	rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2592	rdtgroup_pseudo_lock_remove(rdtgrp);
2593	kernfs_unbreak_active_protection(kn);
2594	rdtgroup_remove(rdtgrp);
2595	} else {
2596	kernfs_unbreak_active_protection(kn);
2597	}
2598	}
2599
2600	struct rdtgroup rdtgroup_kn_lock_live(struct* kernfs_node *kn)
2601	{
2602	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2603
2604	if (!rdtgrp)
2605	return NULL;
2606
2607	rdtgroup_kn_get(rdtgrp, kn);
2608
2609	cpus_read_lock();
2610	mutex_lock(&rdtgroup_mutex);
2611
2612	/ Was this group deleted while we waited? /
2613	if (rdtgrp->flags & RDT_DELETED)
2614	return NULL;
2615
2616	return rdtgrp;
2617	}
2618
2619	void rdtgroup_kn_unlock(struct kernfs_node *kn)
2620	{
2621	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2622
2623	if (!rdtgrp)
2624	return;
2625
2626	mutex_unlock(lock: &rdtgroup_mutex);
2627	cpus_read_unlock();
2628
2629	rdtgroup_kn_put(rdtgrp, kn);
2630	}
2631
2632	static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2633	struct rdtgroup *prgrp,
2634	struct kernfs_node **mon_data_kn);
2635
2636	static void rdt_disable_ctx(void)
2637	{
2638	resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L3, enable: false);
2639	resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L2, enable: false);
2640	set_mba_sc(false);
2641
2642	resctrl_debug = false;
2643	}
2644
2645	static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2646	{
2647	int ret = `0`;
2648
2649	if (ctx->enable_cdpl2) {
2650	ret = resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L2, enable: true);
2651	if (ret)
2652	goto out_done;
2653	}
2654
2655	if (ctx->enable_cdpl3) {
2656	ret = resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L3, enable: true);
2657	if (ret)
2658	goto out_cdpl2;
2659	}
2660
2661	if (ctx->enable_mba_mbps) {
2662	ret = set_mba_sc(true);
2663	if (ret)
2664	goto out_cdpl3;
2665	}
2666
2667	if (ctx->enable_debug)
2668	resctrl_debug = true;
2669
2670	return `0`;
2671
2672	out_cdpl3:
2673	resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L3, enable: false);
2674	out_cdpl2:
2675	resctrl_arch_set_cdp_enabled(l: RDT_RESOURCE_L2, enable: false);
2676	out_done:
2677	return ret;
2678	}
2679
2680	static int schemata_list_add(struct rdt_resource r, enum* resctrl_conf_type type)
2681	{
2682	struct resctrl_schema *s;
2683	const char *suffix = "";
2684	int ret, cl;
2685
2686	s = kzalloc(sizeof(*s), GFP_KERNEL);
2687	if (!s)
2688	return -ENOMEM;
2689
2690	s->res = r;
2691	s->num_closid = resctrl_arch_get_num_closid(r);
2692	if (resctrl_arch_get_cdp_enabled(l: r->rid))
2693	s->num_closid /= `2`;
2694
2695	s->conf_type = type;
2696	switch (type) {
2697	case CDP_CODE:
2698	suffix = "CODE";
2699	break;
2700	case CDP_DATA:
2701	suffix = "DATA";
2702	break;
2703	case CDP_NONE:
2704	suffix = "";
2705	break;
2706	}
2707
2708	ret = snprintf(buf: s->name, size: sizeof(s->name), fmt: "%s%s", r->name, suffix);
2709	if (ret >= sizeof(s->name)) {
2710	kfree(objp: s);
2711	return -EINVAL;
2712	}
2713
2714	cl = strlen(s->name);
2715
2716	/*
2717	* If CDP is supported by this resource, but not enabled,
2718	* include the suffix. This ensures the tabular format of the
2719	* schemata file does not change between mounts of the filesystem.
2720	*/
2721	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(l: r->rid))
2722	cl += `4`;
2723
2724	if (cl > max_name_width)
2725	max_name_width = cl;
2726
2727	switch (r->schema_fmt) {
2728	case RESCTRL_SCHEMA_BITMAP:
2729	s->fmt_str = "%d=%x";
2730	break;
2731	case RESCTRL_SCHEMA_RANGE:
2732	s->fmt_str = "%d=%u";
2733	break;
2734	}
2735
2736	if (WARN_ON_ONCE(!s->fmt_str)) {
2737	kfree(objp: s);
2738	return -EINVAL;
2739	}
2740
2741	INIT_LIST_HEAD(list: &s->list);
2742	list_add(new: &s->list, head: &resctrl_schema_all);
2743
2744	return `0`;
2745	}
2746
2747	static int schemata_list_create(void)
2748	{
2749	struct rdt_resource *r;
2750	int ret = `0`;
2751
2752	for_each_alloc_capable_rdt_resource(r) {
2753	if (resctrl_arch_get_cdp_enabled(l: r->rid)) {
2754	ret = schemata_list_add(r, type: CDP_CODE);
2755	if (ret)
2756	break;
2757
2758	ret = schemata_list_add(r, type: CDP_DATA);
2759	} else {
2760	ret = schemata_list_add(r, type: CDP_NONE);
2761	}
2762
2763	if (ret)
2764	break;
2765	}
2766
2767	return ret;
2768	}
2769
2770	static void schemata_list_destroy(void)
2771	{
2772	struct resctrl_schema s, tmp;
2773
2774	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2775	list_del(entry: &s->list);
2776	kfree(objp: s);
2777	}
2778	}
2779
2780	static int rdt_get_tree(struct fs_context *fc)
2781	{
2782	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2783	unsigned long flags = RFTYPE_CTRL_BASE;
2784	struct rdt_mon_domain *dom;
2785	struct rdt_resource *r;
2786	int ret;
2787
2788	cpus_read_lock();
2789	mutex_lock(&rdtgroup_mutex);
2790	/*
2791	* resctrl file system can only be mounted once.
2792	*/
2793	if (resctrl_mounted) {
2794	ret = -EBUSY;
2795	goto out;
2796	}
2797
2798	ret = rdtgroup_setup_root(ctx);
2799	if (ret)
2800	goto out;
2801
2802	ret = rdt_enable_ctx(ctx);
2803	if (ret)
2804	goto out_root;
2805
2806	ret = schemata_list_create();
2807	if (ret)
2808	goto out_schemata_free;
2809
2810	ret = closid_init();
2811	if (ret)
2812	goto out_schemata_free;
2813
2814	if (resctrl_arch_mon_capable())
2815	flags \|= RFTYPE_MON;
2816
2817	ret = rdtgroup_add_files(kn: rdtgroup_default.kn, fflags: flags);
2818	if (ret)
2819	goto out_closid_exit;
2820
2821	kernfs_activate(kn: rdtgroup_default.kn);
2822
2823	ret = rdtgroup_create_info_dir(parent_kn: rdtgroup_default.kn);
2824	if (ret < `0`)
2825	goto out_closid_exit;
2826
2827	if (resctrl_arch_mon_capable()) {
2828	ret = mongroup_create_dir(parent_kn: rdtgroup_default.kn,
2829	prgrp: &rdtgroup_default, name: "mon_groups",
2830	dest_kn: &kn_mongrp);
2831	if (ret < `0`)
2832	goto out_info;
2833
2834	rdtgroup_assign_cntrs(rdtgrp: &rdtgroup_default);
2835
2836	ret = mkdir_mondata_all(parent_kn: rdtgroup_default.kn,
2837	prgrp: &rdtgroup_default, mon_data_kn: &kn_mondata);
2838	if (ret < `0`)
2839	goto out_mongrp;
2840	rdtgroup_default.mon.mon_data_kn = kn_mondata;
2841	}
2842
2843	ret = rdt_pseudo_lock_init();
2844	if (ret)
2845	goto out_mondata;
2846
2847	ret = kernfs_get_tree(fc);
2848	if (ret < `0`)
2849	goto out_psl;
2850
2851	if (resctrl_arch_alloc_capable())
2852	resctrl_arch_enable_alloc();
2853	if (resctrl_arch_mon_capable())
2854	resctrl_arch_enable_mon();
2855
2856	if (resctrl_arch_alloc_capable() \|\| resctrl_arch_mon_capable())
2857	resctrl_mounted = true;
2858
2859	if (resctrl_is_mbm_enabled()) {
2860	r = resctrl_arch_get_resource(l: RDT_RESOURCE_L3);
2861	list_for_each_entry(dom, &r->mon_domains, hdr.list)
2862	mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
2863	RESCTRL_PICK_ANY_CPU);
2864	}
2865
2866	goto out;
2867
2868	out_psl:
2869	rdt_pseudo_lock_release();
2870	out_mondata:
2871	if (resctrl_arch_mon_capable())
2872	kernfs_remove(kn: kn_mondata);
2873	out_mongrp:
2874	if (resctrl_arch_mon_capable()) {
2875	rdtgroup_unassign_cntrs(rdtgrp: &rdtgroup_default);
2876	kernfs_remove(kn: kn_mongrp);
2877	}
2878	out_info:
2879	kernfs_remove(kn: kn_info);
2880	out_closid_exit:
2881	closid_exit();
2882	out_schemata_free:
2883	schemata_list_destroy();
2884	rdt_disable_ctx();
2885	out_root:
2886	rdtgroup_destroy_root();
2887	out:
2888	rdt_last_cmd_clear();
2889	mutex_unlock(lock: &rdtgroup_mutex);
2890	cpus_read_unlock();
2891	return ret;
2892	}
2893
2894	enum rdt_param {
2895	Opt_cdp,
2896	Opt_cdpl2,
2897	Opt_mba_mbps,
2898	Opt_debug,
2899	nr__rdt_params
2900	};
2901
2902	static const struct fs_parameter_spec rdt_fs_parameters[] = {
2903	fsparam_flag("cdp", Opt_cdp),
2904	fsparam_flag("cdpl2", Opt_cdpl2),
2905	fsparam_flag("mba_MBps", Opt_mba_mbps),
2906	fsparam_flag("debug", Opt_debug),
2907	{}
2908	};
2909
2910	static int rdt_parse_param(struct fs_context fc, struct* fs_parameter *param)
2911	{
2912	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2913	struct fs_parse_result result;
2914	const char *msg;
2915	int opt;
2916
2917	opt = fs_parse(fc, desc: rdt_fs_parameters, param, result: &result);
2918	if (opt < `0`)
2919	return opt;
2920
2921	switch (opt) {
2922	case Opt_cdp:
2923	ctx->enable_cdpl3 = true;
2924	return `0`;
2925	case Opt_cdpl2:
2926	ctx->enable_cdpl2 = true;
2927	return `0`;
2928	case Opt_mba_mbps:
2929	msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
2930	if (!supports_mba_mbps())
2931	return invalfc(fc, msg);
2932	ctx->enable_mba_mbps = true;
2933	return `0`;
2934	case Opt_debug:
2935	ctx->enable_debug = true;
2936	return `0`;
2937	}
2938
2939	return -EINVAL;
2940	}
2941
2942	static void rdt_fs_context_free(struct fs_context *fc)
2943	{
2944	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2945
2946	kernfs_free_fs_context(fc);
2947	kfree(objp: ctx);
2948	}
2949
2950	static const struct fs_context_operations rdt_fs_context_ops = {
2951	.free = rdt_fs_context_free,
2952	.parse_param = rdt_parse_param,
2953	.get_tree = rdt_get_tree,
2954	};
2955
2956	static int rdt_init_fs_context(struct fs_context *fc)
2957	{
2958	struct rdt_fs_context *ctx;
2959
2960	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
2961	if (!ctx)
2962	return -ENOMEM;
2963
2964	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2965	fc->fs_private = &ctx->kfc;
2966	fc->ops = &rdt_fs_context_ops;
2967	put_user_ns(ns: fc->user_ns);
2968	fc->user_ns = get_user_ns(ns: &init_user_ns);
2969	fc->global = true;
2970	return `0`;
2971	}
2972
2973	/*
2974	* Move tasks from one to the other group. If @from is NULL, then all tasks
2975	* in the systems are moved unconditionally (used for teardown).
2976	*
2977	* If @mask is not NULL the cpus on which moved tasks are running are set
2978	* in that mask so the update smp function call is restricted to affected
2979	* cpus.
2980	*/
2981	static void rdt_move_group_tasks(struct rdtgroup from, struct* rdtgroup *to,
2982	struct cpumask *mask)
2983	{
2984	struct task_struct p, t;
2985
2986	read_lock(&tasklist_lock);
2987	for_each_process_thread(p, t) {
2988	if (!from \|\| is_closid_match(t, r: from) \|\|
2989	is_rmid_match(t, r: from)) {
2990	resctrl_arch_set_closid_rmid(tsk: t, closid: to->closid,
2991	rmid: to->mon.rmid);
2992
2993	/*
2994	* Order the closid/rmid stores above before the loads
2995	* in task_curr(). This pairs with the full barrier
2996	* between the rq->curr update and
2997	* resctrl_arch_sched_in() during context switch.
2998	*/
2999	smp_mb();
3000
3001	/*
3002	* If the task is on a CPU, set the CPU in the mask.
3003	* The detection is inaccurate as tasks might move or
3004	* schedule before the smp function call takes place.
3005	* In such a case the function call is pointless, but
3006	* there is no other side effect.
3007	*/
3008	if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(p: t))
3009	cpumask_set_cpu(cpu: task_cpu(p: t), dstp: mask);
3010	}
3011	}
3012	read_unlock(&tasklist_lock);
3013	}
3014
3015	static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
3016	{
3017	struct rdtgroup sentry, stmp;
3018	struct list_head *head;
3019
3020	head = &rdtgrp->mon.crdtgrp_list;
3021	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
3022	rdtgroup_unassign_cntrs(rdtgrp: sentry);
3023	free_rmid(closid: sentry->closid, rmid: sentry->mon.rmid);
3024	list_del(entry: &sentry->mon.crdtgrp_list);
3025
3026	if (atomic_read(v: &sentry->waitcount) != `0`)
3027	sentry->flags = RDT_DELETED;
3028	else
3029	rdtgroup_remove(rdtgrp: sentry);
3030	}
3031	}
3032
3033	/*
3034	* Forcibly remove all of subdirectories under root.
3035	*/
3036	static void rmdir_all_sub(void)
3037	{
3038	struct rdtgroup rdtgrp, tmp;
3039
3040	/ Move all tasks to the default resource group /
3041	rdt_move_group_tasks(NULL, to: &rdtgroup_default, NULL);
3042
3043	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
3044	/ Free any child rmids /
3045	free_all_child_rdtgrp(rdtgrp);
3046
3047	/ Remove each rdtgroup other than root /
3048	if (rdtgrp == &rdtgroup_default)
3049	continue;
3050
3051	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP \|\|
3052	rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
3053	rdtgroup_pseudo_lock_remove(rdtgrp);
3054
3055	/*
3056	* Give any CPUs back to the default group. We cannot copy
3057	* cpu_online_mask because a CPU might have executed the
3058	* offline callback already, but is still marked online.
3059	*/
3060	cpumask_or(dstp: &rdtgroup_default.cpu_mask,
3061	src1p: &rdtgroup_default.cpu_mask, src2p: &rdtgrp->cpu_mask);
3062
3063	rdtgroup_unassign_cntrs(rdtgrp);
3064
3065	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
3066
3067	kernfs_remove(kn: rdtgrp->kn);
3068	list_del(entry: &rdtgrp->rdtgroup_list);
3069
3070	if (atomic_read(v: &rdtgrp->waitcount) != `0`)
3071	rdtgrp->flags = RDT_DELETED;
3072	else
3073	rdtgroup_remove(rdtgrp);
3074	}
3075	/ Notify online CPUs to update per cpu storage and PQR_ASSOC MSR /
3076	update_closid_rmid(cpu_online_mask, r: &rdtgroup_default);
3077
3078	kernfs_remove(kn: kn_info);
3079	kernfs_remove(kn: kn_mongrp);
3080	kernfs_remove(kn: kn_mondata);
3081	}
3082
3083	/**
3084	* mon_get_kn_priv() - Get the mon_data priv data for this event.
3085	*
3086	* The same values are used across the mon_data directories of all control and
3087	* monitor groups for the same event in the same domain. Keep a list of
3088	* allocated structures and re-use an existing one with the same values for
3089	* @rid, @domid, etc.
3090	*
3091	* @rid: The resource id for the event file being created.
3092	* @domid: The domain id for the event file being created.
3093	* @mevt: The type of event file being created.
3094	* @do_sum: Whether SNC summing monitors are being created.
3095	*/
3096	static struct mon_data mon_get_kn_priv(enum* resctrl_res_level rid, int domid,
3097	struct mon_evt *mevt,
3098	bool do_sum)
3099	{
3100	struct mon_data *priv;
3101
3102	lockdep_assert_held(&rdtgroup_mutex);
3103
3104	list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
3105	if (priv->rid == rid && priv->domid == domid &&
3106	priv->sum == do_sum && priv->evtid == mevt->evtid)
3107	return priv;
3108	}
3109
3110	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
3111	if (!priv)
3112	return NULL;
3113
3114	priv->rid = rid;
3115	priv->domid = domid;
3116	priv->sum = do_sum;
3117	priv->evtid = mevt->evtid;
3118	list_add_tail(new: &priv->list, head: &mon_data_kn_priv_list);
3119
3120	return priv;
3121	}
3122
3123	/**
3124	* mon_put_kn_priv() - Free all allocated mon_data structures.
3125	*
3126	* Called when resctrl file system is unmounted.
3127	*/
3128	static void mon_put_kn_priv(void)
3129	{
3130	struct mon_data priv, tmp;
3131
3132	lockdep_assert_held(&rdtgroup_mutex);
3133
3134	list_for_each_entry_safe(priv, tmp, &mon_data_kn_priv_list, list) {
3135	list_del(entry: &priv->list);
3136	kfree(objp: priv);
3137	}
3138	}
3139
3140	static void resctrl_fs_teardown(void)
3141	{
3142	lockdep_assert_held(&rdtgroup_mutex);
3143
3144	/ Cleared by rdtgroup_destroy_root() /
3145	if (!rdtgroup_default.kn)
3146	return;
3147
3148	rmdir_all_sub();
3149	rdtgroup_unassign_cntrs(rdtgrp: &rdtgroup_default);
3150	mon_put_kn_priv();
3151	rdt_pseudo_lock_release();
3152	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
3153	closid_exit();
3154	schemata_list_destroy();
3155	rdtgroup_destroy_root();
3156	}
3157
3158	static void rdt_kill_sb(struct super_block *sb)
3159	{
3160	struct rdt_resource *r;
3161
3162	cpus_read_lock();
3163	mutex_lock(&rdtgroup_mutex);
3164
3165	rdt_disable_ctx();
3166
3167	/ Put everything back to default values. /
3168	for_each_alloc_capable_rdt_resource(r)
3169	resctrl_arch_reset_all_ctrls(r);
3170
3171	resctrl_fs_teardown();
3172	if (resctrl_arch_alloc_capable())
3173	resctrl_arch_disable_alloc();
3174	if (resctrl_arch_mon_capable())
3175	resctrl_arch_disable_mon();
3176	resctrl_mounted = false;
3177	kernfs_kill_sb(sb);
3178	mutex_unlock(lock: &rdtgroup_mutex);
3179	cpus_read_unlock();
3180	}
3181
3182	static struct file_system_type rdt_fs_type = {
3183	.name = "resctrl",
3184	.init_fs_context = rdt_init_fs_context,
3185	.parameters = rdt_fs_parameters,
3186	.kill_sb = rdt_kill_sb,
3187	};
3188
3189	static int mon_addfile(struct kernfs_node parent_kn, const* char *name,
3190	void *priv)
3191	{
3192	struct kernfs_node *kn;
3193	int ret = `0`;
3194
3195	kn = __kernfs_create_file(parent: parent_kn, name, mode: `0444`,
3196	GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, size: `0`,
3197	ops: &kf_mondata_ops, priv, NULL, NULL);
3198	if (IS_ERR(kn))
3199	return PTR_ERR(kn);
3200
3201	ret = rdtgroup_kn_set_ugid(kn);
3202	if (ret) {
3203	kernfs_remove(kn);
3204	return ret;
3205	}
3206
3207	return ret;
3208	}
3209
3210	static void mon_rmdir_one_subdir(struct kernfs_node pkn, char* name, char* *subname)
3211	{
3212	struct kernfs_node *kn;
3213
3214	kn = kernfs_find_and_get(kn: pkn, name);
3215	if (!kn)
3216	return;
3217	kernfs_put(kn);
3218
3219	if (kn->dir.subdirs <= `1`)
3220	kernfs_remove(kn);
3221	else
3222	kernfs_remove_by_name(parent: kn, name: subname);
3223	}
3224
3225	/*
3226	* Remove all subdirectories of mon_data of ctrl_mon groups
3227	* and monitor groups for the given domain.
3228	* Remove files and directories containing "sum" of domain data
3229	* when last domain being summed is removed.
3230	*/
3231	static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3232	struct rdt_mon_domain *d)
3233	{
3234	struct rdtgroup prgrp, crgrp;
3235	char subname[`32`];
3236	bool snc_mode;
3237	char name[`32`];
3238
3239	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3240	sprintf(buf: name, fmt: "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
3241	if (snc_mode)
3242	sprintf(buf: subname, fmt: "mon_sub_%s_%02d", r->name, d->hdr.id);
3243
3244	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3245	mon_rmdir_one_subdir(pkn: prgrp->mon.mon_data_kn, name, subname);
3246
3247	list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
3248	mon_rmdir_one_subdir(pkn: crgrp->mon.mon_data_kn, name, subname);
3249	}
3250	}
3251
3252	static int mon_add_all_files(struct kernfs_node kn, struct* rdt_mon_domain *d,
3253	struct rdt_resource r, struct* rdtgroup *prgrp,
3254	bool do_sum)
3255	{
3256	struct rmid_read rr = {`0`};
3257	struct mon_data *priv;
3258	struct mon_evt *mevt;
3259	int ret, domid;
3260
3261	for_each_mon_event(mevt) {
3262	if (mevt->rid != r->rid \|\| !mevt->enabled)
3263	continue;
3264	domid = do_sum ? d->ci_id : d->hdr.id;
3265	priv = mon_get_kn_priv(rid: r->rid, domid, mevt, do_sum);
3266	if (WARN_ON_ONCE(!priv))
3267	return -EINVAL;
3268
3269	ret = mon_addfile(parent_kn: kn, name: mevt->name, priv);
3270	if (ret)
3271	return ret;
3272
3273	if (!do_sum && resctrl_is_mbm_event(eventid: mevt->evtid))
3274	mon_event_read(rr: &rr, r, d, rdtgrp: prgrp, cpumask: &d->hdr.cpu_mask, evtid: mevt->evtid, first: true);
3275	}
3276
3277	return `0`;
3278	}
3279
3280	static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
3281	struct rdt_mon_domain *d,
3282	struct rdt_resource r, struct* rdtgroup *prgrp)
3283	{
3284	struct kernfs_node kn, ckn;
3285	char name[`32`];
3286	bool snc_mode;
3287	int ret = `0`;
3288
3289	lockdep_assert_held(&rdtgroup_mutex);
3290
3291	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
3292	sprintf(buf: name, fmt: "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
3293	kn = kernfs_find_and_get(kn: parent_kn, name);
3294	if (kn) {
3295	/*
3296	* rdtgroup_mutex will prevent this directory from being
3297	* removed. No need to keep this hold.
3298	*/
3299	kernfs_put(kn);
3300	} else {
3301	kn = kernfs_create_dir(parent: parent_kn, name, mode: parent_kn->mode, priv: prgrp);
3302	if (IS_ERR(ptr: kn))
3303	return PTR_ERR(ptr: kn);
3304
3305	ret = rdtgroup_kn_set_ugid(kn);
3306	if (ret)
3307	goto out_destroy;
3308	ret = mon_add_all_files(kn, d, r, prgrp, do_sum: snc_mode);
3309	if (ret)
3310	goto out_destroy;
3311	}
3312
3313	if (snc_mode) {
3314	sprintf(buf: name, fmt: "mon_sub_%s_%02d", r->name, d->hdr.id);
3315	ckn = kernfs_create_dir(parent: kn, name, mode: parent_kn->mode, priv: prgrp);
3316	if (IS_ERR(ptr: ckn)) {
3317	ret = -EINVAL;
3318	goto out_destroy;
3319	}
3320
3321	ret = rdtgroup_kn_set_ugid(kn: ckn);
3322	if (ret)
3323	goto out_destroy;
3324
3325	ret = mon_add_all_files(kn: ckn, d, r, prgrp, do_sum: false);
3326	if (ret)
3327	goto out_destroy;
3328	}
3329
3330	kernfs_activate(kn);
3331	return `0`;
3332
3333	out_destroy:
3334	kernfs_remove(kn);
3335	return ret;
3336	}
3337
3338	/*
3339	* Add all subdirectories of mon_data for "ctrl_mon" groups
3340	* and "monitor" groups with given domain id.
3341	*/
3342	static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
3343	struct rdt_mon_domain *d)
3344	{
3345	struct kernfs_node *parent_kn;
3346	struct rdtgroup prgrp, crgrp;
3347	struct list_head *head;
3348
3349	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
3350	parent_kn = prgrp->mon.mon_data_kn;
3351	mkdir_mondata_subdir(parent_kn, d, r, prgrp);
3352
3353	head = &prgrp->mon.crdtgrp_list;
3354	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
3355	parent_kn = crgrp->mon.mon_data_kn;
3356	mkdir_mondata_subdir(parent_kn, d, r, prgrp: crgrp);
3357	}
3358	}
3359	}
3360
3361	static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
3362	struct rdt_resource *r,
3363	struct rdtgroup *prgrp)
3364	{
3365	struct rdt_mon_domain *dom;
3366	int ret;
3367
3368	/ Walking r->domains, ensure it can't race with cpuhp /
3369	lockdep_assert_cpus_held();
3370
3371	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
3372	ret = mkdir_mondata_subdir(parent_kn, d: dom, r, prgrp);
3373	if (ret)
3374	return ret;
3375	}
3376
3377	return `0`;
3378	}
3379
3380	/*
3381	* This creates a directory mon_data which contains the monitored data.
3382	*
3383	* mon_data has one directory for each domain which are named
3384	* in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
3385	* with L3 domain looks as below:
3386	* ./mon_data:
3387	* mon_L3_00
3388	* mon_L3_01
3389	* mon_L3_02
3390	* ...
3391	*
3392	* Each domain directory has one file per event:
3393	* ./mon_L3_00/:
3394	* llc_occupancy
3395	*
3396	*/
3397	static int mkdir_mondata_all(struct kernfs_node *parent_kn,
3398	struct rdtgroup *prgrp,
3399	struct kernfs_node **dest_kn)
3400	{
3401	struct rdt_resource *r;
3402	struct kernfs_node *kn;
3403	int ret;
3404
3405	/*
3406	* Create the mon_data directory first.
3407	*/
3408	ret = mongroup_create_dir(parent_kn, prgrp, name: "mon_data", dest_kn: &kn);
3409	if (ret)
3410	return ret;
3411
3412	if (dest_kn)
3413	*dest_kn = kn;
3414
3415	/*
3416	* Create the subdirectories for each domain. Note that all events
3417	* in a domain like L3 are grouped into a resource whose domain is L3
3418	*/
3419	for_each_mon_capable_rdt_resource(r) {
3420	ret = mkdir_mondata_subdir_alldom(parent_kn: kn, r, prgrp);
3421	if (ret)
3422	goto out_destroy;
3423	}
3424
3425	return `0`;
3426
3427	out_destroy:
3428	kernfs_remove(kn);
3429	return ret;
3430	}
3431
3432	/**
3433	* cbm_ensure_valid - Enforce validity on provided CBM
3434	* @_val: Candidate CBM
3435	* @r: RDT resource to which the CBM belongs
3436	*
3437	* The provided CBM represents all cache portions available for use. This
3438	* may be represented by a bitmap that does not consist of contiguous ones
3439	* and thus be an invalid CBM.
3440	* Here the provided CBM is forced to be a valid CBM by only considering
3441	* the first set of contiguous bits as valid and clearing all bits.
3442	* The intention here is to provide a valid default CBM with which a new
3443	* resource group is initialized. The user can follow this with a
3444	* modification to the CBM if the default does not satisfy the
3445	* requirements.
3446	*/
3447	static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3448	{
3449	unsigned int cbm_len = r->cache.cbm_len;
3450	unsigned long first_bit, zero_bit;
3451	unsigned long val;
3452
3453	if (!_val \|\| r->cache.arch_has_sparse_bitmasks)
3454	return _val;
3455
3456	val = _val;
3457	first_bit = find_first_bit(addr: &val, size: cbm_len);
3458	zero_bit = find_next_zero_bit(addr: &val, size: cbm_len, offset: first_bit);
3459
3460	/ Clear any remaining bits to ensure contiguous region /
3461	bitmap_clear(map: &val, start: zero_bit, nbits: cbm_len - zero_bit);
3462	return (u32)val;
3463	}
3464
3465	/*
3466	* Initialize cache resources per RDT domain
3467	*
3468	* Set the RDT domain up to start off with all usable allocations. That is,
3469	* all shareable and unused bits. All-zero CBM is invalid.
3470	*/
3471	static int __init_one_rdt_domain(struct rdt_ctrl_domain d, struct* resctrl_schema *s,
3472	u32 closid)
3473	{
3474	enum resctrl_conf_type peer_type = resctrl_peer_type(my_type: s->conf_type);
3475	enum resctrl_conf_type t = s->conf_type;
3476	struct resctrl_staged_config *cfg;
3477	struct rdt_resource *r = s->res;
3478	u32 used_b = `0`, unused_b = `0`;
3479	unsigned long tmp_cbm;
3480	enum rdtgrp_mode mode;
3481	u32 peer_ctl, ctrl_val;
3482	int i;
3483
3484	cfg = &d->staged_config[t];
3485	cfg->have_new_ctrl = false;
3486	cfg->new_ctrl = r->cache.shareable_bits;
3487	used_b = r->cache.shareable_bits;
3488	for (i = `0`; i < closids_supported(); i++) {
3489	if (closid_allocated(closid: i) && i != closid) {
3490	mode = rdtgroup_mode_by_closid(closid: i);
3491	if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3492	/*
3493	* ctrl values for locksetup aren't relevant
3494	* until the schemata is written, and the mode
3495	* becomes RDT_MODE_PSEUDO_LOCKED.
3496	*/
3497	continue;
3498	/*
3499	* If CDP is active include peer domain's
3500	* usage to ensure there is no overlap
3501	* with an exclusive group.
3502	*/
3503	if (resctrl_arch_get_cdp_enabled(l: r->rid))
3504	peer_ctl = resctrl_arch_get_config(r, d, closid: i,
3505	type: peer_type);
3506	else
3507	peer_ctl = `0`;
3508	ctrl_val = resctrl_arch_get_config(r, d, closid: i,
3509	type: s->conf_type);
3510	used_b \|= ctrl_val \| peer_ctl;
3511	if (mode == RDT_MODE_SHAREABLE)
3512	cfg->new_ctrl \|= ctrl_val \| peer_ctl;
3513	}
3514	}
3515	if (d->plr && d->plr->cbm > `0`)
3516	used_b \|= d->plr->cbm;
3517	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - `1`);
3518	unused_b &= BIT_MASK(r->cache.cbm_len) - `1`;
3519	cfg->new_ctrl \|= unused_b;
3520	/*
3521	* Force the initial CBM to be valid, user can
3522	* modify the CBM based on system availability.
3523	*/
3524	cfg->new_ctrl = cbm_ensure_valid(val: cfg->new_ctrl, r);
3525	/*
3526	* Assign the u32 CBM to an unsigned long to ensure that
3527	* bitmap_weight() does not access out-of-bound memory.
3528	*/
3529	tmp_cbm = cfg->new_ctrl;
3530	if (bitmap_weight(src: &tmp_cbm, nbits: r->cache.cbm_len) < r->cache.min_cbm_bits) {
3531	rdt_last_cmd_printf(fmt: "No space on %s:%d\n", s->name, d->hdr.id);
3532	return -ENOSPC;
3533	}
3534	cfg->have_new_ctrl = true;
3535
3536	return `0`;
3537	}
3538
3539	/*
3540	* Initialize cache resources with default values.
3541	*
3542	* A new RDT group is being created on an allocation capable (CAT)
3543	* supporting system. Set this group up to start off with all usable
3544	* allocations.
3545	*
3546	* If there are no more shareable bits available on any domain then
3547	* the entire allocation will fail.
3548	*/
3549	int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3550	{
3551	struct rdt_ctrl_domain *d;
3552	int ret;
3553
3554	list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
3555	ret = __init_one_rdt_domain(d, s, closid);
3556	if (ret < `0`)
3557	return ret;
3558	}
3559
3560	return `0`;
3561	}
3562
3563	/ Initialize MBA resource with default values. /
3564	static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3565	{
3566	struct resctrl_staged_config *cfg;
3567	struct rdt_ctrl_domain *d;
3568
3569	list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
3570	if (is_mba_sc(r)) {
3571	d->mbps_val[closid] = MBA_MAX_MBPS;
3572	continue;
3573	}
3574
3575	cfg = &d->staged_config[CDP_NONE];
3576	cfg->new_ctrl = resctrl_get_default_ctrl(r);
3577	cfg->have_new_ctrl = true;
3578	}
3579	}
3580
3581	/ Initialize the RDT group's allocations. /
3582	static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3583	{
3584	struct resctrl_schema *s;
3585	struct rdt_resource *r;
3586	int ret = `0`;
3587
3588	rdt_staged_configs_clear();
3589
3590	list_for_each_entry(s, &resctrl_schema_all, list) {
3591	r = s->res;
3592	if (r->rid == RDT_RESOURCE_MBA \|\|
3593	r->rid == RDT_RESOURCE_SMBA) {
3594	rdtgroup_init_mba(r, closid: rdtgrp->closid);
3595	if (is_mba_sc(r))
3596	continue;
3597	} else {
3598	ret = rdtgroup_init_cat(s, closid: rdtgrp->closid);
3599	if (ret < `0`)
3600	goto out;
3601	}
3602
3603	ret = resctrl_arch_update_domains(r, closid: rdtgrp->closid);
3604	if (ret < `0`) {
3605	rdt_last_cmd_puts(s: "Failed to initialize allocations\n");
3606	goto out;
3607	}
3608	}
3609
3610	rdtgrp->mode = RDT_MODE_SHAREABLE;
3611
3612	out:
3613	rdt_staged_configs_clear();
3614	return ret;
3615	}
3616
3617	static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
3618	{
3619	int ret;
3620
3621	if (!resctrl_arch_mon_capable())
3622	return `0`;
3623
3624	ret = alloc_rmid(closid: rdtgrp->closid);
3625	if (ret < `0`) {
3626	rdt_last_cmd_puts(s: "Out of RMIDs\n");
3627	return ret;
3628	}
3629	rdtgrp->mon.rmid = ret;
3630
3631	rdtgroup_assign_cntrs(rdtgrp);
3632
3633	ret = mkdir_mondata_all(parent_kn: rdtgrp->kn, prgrp: rdtgrp, dest_kn: &rdtgrp->mon.mon_data_kn);
3634	if (ret) {
3635	rdt_last_cmd_puts(s: "kernfs subdir error\n");
3636	rdtgroup_unassign_cntrs(rdtgrp);
3637	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
3638	return ret;
3639	}
3640
3641	return `0`;
3642	}
3643
3644	static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
3645	{
3646	if (resctrl_arch_mon_capable()) {
3647	rdtgroup_unassign_cntrs(rdtgrp: rgrp);
3648	free_rmid(closid: rgrp->closid, rmid: rgrp->mon.rmid);
3649	}
3650	}
3651
3652	/*
3653	* We allow creating mon groups only with in a directory called "mon_groups"
3654	* which is present in every ctrl_mon group. Check if this is a valid
3655	* "mon_groups" directory.
3656	*
3657	* 1. The directory should be named "mon_groups".
3658	* 2. The mon group itself should "not" be named "mon_groups".
3659	* This makes sure "mon_groups" directory always has a ctrl_mon group
3660	* as parent.
3661	*/
3662	static bool is_mon_groups(struct kernfs_node kn, const* char *name)
3663	{
3664	return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
3665	strcmp(name, "mon_groups"));
3666	}
3667
3668	static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3669	const char *name, umode_t mode,
3670	enum rdt_group_type rtype, struct rdtgroup **r)
3671	{
3672	struct rdtgroup prdtgrp, rdtgrp;
3673	unsigned long files = `0`;
3674	struct kernfs_node *kn;
3675	int ret;
3676
3677	prdtgrp = rdtgroup_kn_lock_live(kn: parent_kn);
3678	if (!prdtgrp) {
3679	ret = -ENODEV;
3680	goto out_unlock;
3681	}
3682
3683	rdt_last_cmd_clear();
3684
3685	/*
3686	* Check that the parent directory for a monitor group is a "mon_groups"
3687	* directory.
3688	*/
3689	if (rtype == RDTMON_GROUP && !is_mon_groups(kn: parent_kn, name)) {
3690	ret = -EPERM;
3691	goto out_unlock;
3692	}
3693
3694	if (rtype == RDTMON_GROUP &&
3695	(prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP \|\|
3696	prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3697	ret = -EINVAL;
3698	rdt_last_cmd_puts(s: "Pseudo-locking in progress\n");
3699	goto out_unlock;
3700	}
3701
3702	/ allocate the rdtgroup. /
3703	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3704	if (!rdtgrp) {
3705	ret = -ENOSPC;
3706	rdt_last_cmd_puts(s: "Kernel out of memory\n");
3707	goto out_unlock;
3708	}
3709	*r = rdtgrp;
3710	rdtgrp->mon.parent = prdtgrp;
3711	rdtgrp->type = rtype;
3712	INIT_LIST_HEAD(list: &rdtgrp->mon.crdtgrp_list);
3713
3714	/ kernfs creates the directory for rdtgrp /
3715	kn = kernfs_create_dir(parent: parent_kn, name, mode, priv: rdtgrp);
3716	if (IS_ERR(ptr: kn)) {
3717	ret = PTR_ERR(ptr: kn);
3718	rdt_last_cmd_puts(s: "kernfs create error\n");
3719	goto out_free_rgrp;
3720	}
3721	rdtgrp->kn = kn;
3722
3723	/*
3724	* kernfs_remove() will drop the reference count on "kn" which
3725	* will free it. But we still need it to stick around for the
3726	* rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3727	* which will be dropped by kernfs_put() in rdtgroup_remove().
3728	*/
3729	kernfs_get(kn);
3730
3731	ret = rdtgroup_kn_set_ugid(kn);
3732	if (ret) {
3733	rdt_last_cmd_puts(s: "kernfs perm error\n");
3734	goto out_destroy;
3735	}
3736
3737	if (rtype == RDTCTRL_GROUP) {
3738	files = RFTYPE_BASE \| RFTYPE_CTRL;
3739	if (resctrl_arch_mon_capable())
3740	files \|= RFTYPE_MON;
3741	} else {
3742	files = RFTYPE_BASE \| RFTYPE_MON;
3743	}
3744
3745	ret = rdtgroup_add_files(kn, fflags: files);
3746	if (ret) {
3747	rdt_last_cmd_puts(s: "kernfs fill error\n");
3748	goto out_destroy;
3749	}
3750
3751	/*
3752	* The caller unlocks the parent_kn upon success.
3753	*/
3754	return `0`;
3755
3756	out_destroy:
3757	kernfs_put(kn: rdtgrp->kn);
3758	kernfs_remove(kn: rdtgrp->kn);
3759	out_free_rgrp:
3760	kfree(objp: rdtgrp);
3761	out_unlock:
3762	rdtgroup_kn_unlock(kn: parent_kn);
3763	return ret;
3764	}
3765
3766	static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3767	{
3768	kernfs_remove(kn: rgrp->kn);
3769	rdtgroup_remove(rdtgrp: rgrp);
3770	}
3771
3772	/*
3773	* Create a monitor group under "mon_groups" directory of a control
3774	* and monitor group(ctrl_mon). This is a resource group
3775	* to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3776	*/
3777	static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3778	const char *name, umode_t mode)
3779	{
3780	struct rdtgroup rdtgrp, prgrp;
3781	int ret;
3782
3783	ret = mkdir_rdt_prepare(parent_kn, name, mode, rtype: RDTMON_GROUP, r: &rdtgrp);
3784	if (ret)
3785	return ret;
3786
3787	prgrp = rdtgrp->mon.parent;
3788	rdtgrp->closid = prgrp->closid;
3789
3790	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3791	if (ret) {
3792	mkdir_rdt_prepare_clean(rgrp: rdtgrp);
3793	goto out_unlock;
3794	}
3795
3796	kernfs_activate(kn: rdtgrp->kn);
3797
3798	/*
3799	* Add the rdtgrp to the list of rdtgrps the parent
3800	* ctrl_mon group has to track.
3801	*/
3802	list_add_tail(new: &rdtgrp->mon.crdtgrp_list, head: &prgrp->mon.crdtgrp_list);
3803
3804	out_unlock:
3805	rdtgroup_kn_unlock(kn: parent_kn);
3806	return ret;
3807	}
3808
3809	/*
3810	* These are rdtgroups created under the root directory. Can be used
3811	* to allocate and monitor resources.
3812	*/
3813	static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3814	const char *name, umode_t mode)
3815	{
3816	struct rdtgroup *rdtgrp;
3817	struct kernfs_node *kn;
3818	u32 closid;
3819	int ret;
3820
3821	ret = mkdir_rdt_prepare(parent_kn, name, mode, rtype: RDTCTRL_GROUP, r: &rdtgrp);
3822	if (ret)
3823	return ret;
3824
3825	kn = rdtgrp->kn;
3826	ret = closid_alloc();
3827	if (ret < `0`) {
3828	rdt_last_cmd_puts(s: "Out of CLOSIDs\n");
3829	goto out_common_fail;
3830	}
3831	closid = ret;
3832	ret = `0`;
3833
3834	rdtgrp->closid = closid;
3835
3836	ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
3837	if (ret)
3838	goto out_closid_free;
3839
3840	kernfs_activate(kn: rdtgrp->kn);
3841
3842	ret = rdtgroup_init_alloc(rdtgrp);
3843	if (ret < `0`)
3844	goto out_rmid_free;
3845
3846	list_add(new: &rdtgrp->rdtgroup_list, head: &rdt_all_groups);
3847
3848	if (resctrl_arch_mon_capable()) {
3849	/*
3850	* Create an empty mon_groups directory to hold the subset
3851	* of tasks and cpus to monitor.
3852	*/
3853	ret = mongroup_create_dir(parent_kn: kn, prgrp: rdtgrp, name: "mon_groups", NULL);
3854	if (ret) {
3855	rdt_last_cmd_puts(s: "kernfs subdir error\n");
3856	goto out_del_list;
3857	}
3858	if (is_mba_sc(NULL))
3859	rdtgrp->mba_mbps_event = mba_mbps_default_event;
3860	}
3861
3862	goto out_unlock;
3863
3864	out_del_list:
3865	list_del(entry: &rdtgrp->rdtgroup_list);
3866	out_rmid_free:
3867	mkdir_rdt_prepare_rmid_free(rgrp: rdtgrp);
3868	out_closid_free:
3869	closid_free(closid);
3870	out_common_fail:
3871	mkdir_rdt_prepare_clean(rgrp: rdtgrp);
3872	out_unlock:
3873	rdtgroup_kn_unlock(kn: parent_kn);
3874	return ret;
3875	}
3876
3877	static int rdtgroup_mkdir(struct kernfs_node parent_kn, const* char *name,
3878	umode_t mode)
3879	{
3880	/ Do not accept '\n' to avoid unparsable situation. /
3881	if (strchr(name, `'\n'`))
3882	return -EINVAL;
3883
3884	/*
3885	* If the parent directory is the root directory and RDT
3886	* allocation is supported, add a control and monitoring
3887	* subdirectory
3888	*/
3889	if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
3890	return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3891
3892	/ Else, attempt to add a monitoring subdirectory. /
3893	if (resctrl_arch_mon_capable())
3894	return rdtgroup_mkdir_mon(parent_kn, name, mode);
3895
3896	return -EPERM;
3897	}
3898
3899	static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3900	{
3901	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3902	u32 closid, rmid;
3903	int cpu;
3904
3905	/ Give any tasks back to the parent group /
3906	rdt_move_group_tasks(from: rdtgrp, to: prdtgrp, mask: tmpmask);
3907
3908	/*
3909	* Update per cpu closid/rmid of the moved CPUs first.
3910	* Note: the closid will not change, but the arch code still needs it.
3911	*/
3912	closid = prdtgrp->closid;
3913	rmid = prdtgrp->mon.rmid;
3914	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3915	resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3916
3917	/*
3918	* Update the MSR on moved CPUs and CPUs which have moved
3919	* task running on them.
3920	*/
3921	cpumask_or(dstp: tmpmask, src1p: tmpmask, src2p: &rdtgrp->cpu_mask);
3922	update_closid_rmid(cpu_mask: tmpmask, NULL);
3923
3924	rdtgrp->flags = RDT_DELETED;
3925
3926	rdtgroup_unassign_cntrs(rdtgrp);
3927
3928	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
3929
3930	/*
3931	* Remove the rdtgrp from the parent ctrl_mon group's list
3932	*/
3933	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3934	list_del(entry: &rdtgrp->mon.crdtgrp_list);
3935
3936	kernfs_remove(kn: rdtgrp->kn);
3937
3938	return `0`;
3939	}
3940
3941	static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3942	{
3943	rdtgrp->flags = RDT_DELETED;
3944	list_del(entry: &rdtgrp->rdtgroup_list);
3945
3946	kernfs_remove(kn: rdtgrp->kn);
3947	return `0`;
3948	}
3949
3950	static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3951	{
3952	u32 closid, rmid;
3953	int cpu;
3954
3955	/ Give any tasks back to the default group /
3956	rdt_move_group_tasks(from: rdtgrp, to: &rdtgroup_default, mask: tmpmask);
3957
3958	/ Give any CPUs back to the default group /
3959	cpumask_or(dstp: &rdtgroup_default.cpu_mask,
3960	src1p: &rdtgroup_default.cpu_mask, src2p: &rdtgrp->cpu_mask);
3961
3962	/ Update per cpu closid and rmid of the moved CPUs first /
3963	closid = rdtgroup_default.closid;
3964	rmid = rdtgroup_default.mon.rmid;
3965	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3966	resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
3967
3968	/*
3969	* Update the MSR on moved CPUs and CPUs which have moved
3970	* task running on them.
3971	*/
3972	cpumask_or(dstp: tmpmask, src1p: tmpmask, src2p: &rdtgrp->cpu_mask);
3973	update_closid_rmid(cpu_mask: tmpmask, NULL);
3974
3975	rdtgroup_unassign_cntrs(rdtgrp);
3976
3977	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
3978	closid_free(closid: rdtgrp->closid);
3979
3980	rdtgroup_ctrl_remove(rdtgrp);
3981
3982	/*
3983	* Free all the child monitor group rmids.
3984	*/
3985	free_all_child_rdtgrp(rdtgrp);
3986
3987	return `0`;
3988	}
3989
3990	static struct kernfs_node rdt_kn_parent(struct* kernfs_node *kn)
3991	{
3992	/*
3993	* Valid within the RCU section it was obtained or while rdtgroup_mutex
3994	* is held.
3995	*/
3996	return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
3997	}
3998
3999	static int rdtgroup_rmdir(struct kernfs_node *kn)
4000	{
4001	struct kernfs_node *parent_kn;
4002	struct rdtgroup *rdtgrp;
4003	cpumask_var_t tmpmask;
4004	int ret = `0`;
4005
4006	if (!zalloc_cpumask_var(mask: &tmpmask, GFP_KERNEL))
4007	return -ENOMEM;
4008
4009	rdtgrp = rdtgroup_kn_lock_live(kn);
4010	if (!rdtgrp) {
4011	ret = -EPERM;
4012	goto out;
4013	}
4014	parent_kn = rdt_kn_parent(kn);
4015
4016	/*
4017	* If the rdtgroup is a ctrl_mon group and parent directory
4018	* is the root directory, remove the ctrl_mon group.
4019	*
4020	* If the rdtgroup is a mon group and parent directory
4021	* is a valid "mon_groups" directory, remove the mon group.
4022	*/
4023	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
4024	rdtgrp != &rdtgroup_default) {
4025	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP \|\|
4026	rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
4027	ret = rdtgroup_ctrl_remove(rdtgrp);
4028	} else {
4029	ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
4030	}
4031	} else if (rdtgrp->type == RDTMON_GROUP &&
4032	is_mon_groups(kn: parent_kn, name: rdt_kn_name(kn))) {
4033	ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
4034	} else {
4035	ret = -EPERM;
4036	}
4037
4038	out:
4039	rdtgroup_kn_unlock(kn);
4040	free_cpumask_var(mask: tmpmask);
4041	return ret;
4042	}
4043
4044	/**
4045	* mongrp_reparent() - replace parent CTRL_MON group of a MON group
4046	* @rdtgrp: the MON group whose parent should be replaced
4047	* @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
4048	* @cpus: cpumask provided by the caller for use during this call
4049	*
4050	* Replaces the parent CTRL_MON group for a MON group, resulting in all member
4051	* tasks' CLOSID immediately changing to that of the new parent group.
4052	* Monitoring data for the group is unaffected by this operation.
4053	*/
4054	static void mongrp_reparent(struct rdtgroup *rdtgrp,
4055	struct rdtgroup *new_prdtgrp,
4056	cpumask_var_t cpus)
4057	{
4058	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
4059
4060	WARN_ON(rdtgrp->type != RDTMON_GROUP);
4061	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
4062
4063	/ Nothing to do when simply renaming a MON group. /
4064	if (prdtgrp == new_prdtgrp)
4065	return;
4066
4067	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
4068	list_move_tail(list: &rdtgrp->mon.crdtgrp_list,
4069	head: &new_prdtgrp->mon.crdtgrp_list);
4070
4071	rdtgrp->mon.parent = new_prdtgrp;
4072	rdtgrp->closid = new_prdtgrp->closid;
4073
4074	/ Propagate updated closid to all tasks in this group. /
4075	rdt_move_group_tasks(from: rdtgrp, to: rdtgrp, mask: cpus);
4076
4077	update_closid_rmid(cpu_mask: cpus, NULL);
4078	}
4079
4080	static int rdtgroup_rename(struct kernfs_node *kn,
4081	struct kernfs_node new_parent, const* char *new_name)
4082	{
4083	struct kernfs_node *kn_parent;
4084	struct rdtgroup *new_prdtgrp;
4085	struct rdtgroup *rdtgrp;
4086	cpumask_var_t tmpmask;
4087	int ret;
4088
4089	rdtgrp = kernfs_to_rdtgroup(kn);
4090	new_prdtgrp = kernfs_to_rdtgroup(kn: new_parent);
4091	if (!rdtgrp \|\| !new_prdtgrp)
4092	return -ENOENT;
4093
4094	/ Release both kernfs active_refs before obtaining rdtgroup mutex. /
4095	rdtgroup_kn_get(rdtgrp, kn);
4096	rdtgroup_kn_get(rdtgrp: new_prdtgrp, kn: new_parent);
4097
4098	mutex_lock(&rdtgroup_mutex);
4099
4100	rdt_last_cmd_clear();
4101
4102	/*
4103	* Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
4104	* either kernfs_node is a file.
4105	*/
4106	if (kernfs_type(kn) != KERNFS_DIR \|\|
4107	kernfs_type(kn: new_parent) != KERNFS_DIR) {
4108	rdt_last_cmd_puts(s: "Source and destination must be directories");
4109	ret = -EPERM;
4110	goto out;
4111	}
4112
4113	if ((rdtgrp->flags & RDT_DELETED) \|\| (new_prdtgrp->flags & RDT_DELETED)) {
4114	ret = -ENOENT;
4115	goto out;
4116	}
4117
4118	kn_parent = rdt_kn_parent(kn);
4119	if (rdtgrp->type != RDTMON_GROUP \|\| !kn_parent \|\|
4120	!is_mon_groups(kn: kn_parent, name: rdt_kn_name(kn))) {
4121	rdt_last_cmd_puts(s: "Source must be a MON group\n");
4122	ret = -EPERM;
4123	goto out;
4124	}
4125
4126	if (!is_mon_groups(kn: new_parent, name: new_name)) {
4127	rdt_last_cmd_puts(s: "Destination must be a mon_groups subdirectory\n");
4128	ret = -EPERM;
4129	goto out;
4130	}
4131
4132	/*
4133	* If the MON group is monitoring CPUs, the CPUs must be assigned to the
4134	* current parent CTRL_MON group and therefore cannot be assigned to
4135	* the new parent, making the move illegal.
4136	*/
4137	if (!cpumask_empty(srcp: &rdtgrp->cpu_mask) &&
4138	rdtgrp->mon.parent != new_prdtgrp) {
4139	rdt_last_cmd_puts(s: "Cannot move a MON group that monitors CPUs\n");
4140	ret = -EPERM;
4141	goto out;
4142	}
4143
4144	/*
4145	* Allocate the cpumask for use in mongrp_reparent() to avoid the
4146	* possibility of failing to allocate it after kernfs_rename() has
4147	* succeeded.
4148	*/
4149	if (!zalloc_cpumask_var(mask: &tmpmask, GFP_KERNEL)) {
4150	ret = -ENOMEM;
4151	goto out;
4152	}
4153
4154	/*
4155	* Perform all input validation and allocations needed to ensure
4156	* mongrp_reparent() will succeed before calling kernfs_rename(),
4157	* otherwise it would be necessary to revert this call if
4158	* mongrp_reparent() failed.
4159	*/
4160	ret = kernfs_rename(kn, new_parent, new_name);
4161	if (!ret)
4162	mongrp_reparent(rdtgrp, new_prdtgrp, cpus: tmpmask);
4163
4164	free_cpumask_var(mask: tmpmask);
4165
4166	out:
4167	mutex_unlock(lock: &rdtgroup_mutex);
4168	rdtgroup_kn_put(rdtgrp, kn);
4169	rdtgroup_kn_put(rdtgrp: new_prdtgrp, kn: new_parent);
4170	return ret;
4171	}
4172
4173	static int rdtgroup_show_options(struct seq_file seq, struct* kernfs_root *kf)
4174	{
4175	if (resctrl_arch_get_cdp_enabled(l: RDT_RESOURCE_L3))
4176	seq_puts(m: seq, s: ",cdp");
4177
4178	if (resctrl_arch_get_cdp_enabled(l: RDT_RESOURCE_L2))
4179	seq_puts(m: seq, s: ",cdpl2");
4180
4181	if (is_mba_sc(r: resctrl_arch_get_resource(l: RDT_RESOURCE_MBA)))
4182	seq_puts(m: seq, s: ",mba_MBps");
4183
4184	if (resctrl_debug)
4185	seq_puts(m: seq, s: ",debug");
4186
4187	return `0`;
4188	}
4189
4190	static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
4191	.mkdir = rdtgroup_mkdir,
4192	.rmdir = rdtgroup_rmdir,
4193	.rename = rdtgroup_rename,
4194	.show_options = rdtgroup_show_options,
4195	};
4196
4197	static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
4198	{
4199	rdt_root = kernfs_create_root(scops: &rdtgroup_kf_syscall_ops,
4200	flags: KERNFS_ROOT_CREATE_DEACTIVATED \|
4201	KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
4202	priv: &rdtgroup_default);
4203	if (IS_ERR(ptr: rdt_root))
4204	return PTR_ERR(ptr: rdt_root);
4205
4206	ctx->kfc.root = rdt_root;
4207	rdtgroup_default.kn = kernfs_root_to_node(root: rdt_root);
4208
4209	return `0`;
4210	}
4211
4212	static void rdtgroup_destroy_root(void)
4213	{
4214	lockdep_assert_held(&rdtgroup_mutex);
4215
4216	kernfs_destroy_root(root: rdt_root);
4217	rdtgroup_default.kn = NULL;
4218	}
4219
4220	static void rdtgroup_setup_default(void)
4221	{
4222	mutex_lock(&rdtgroup_mutex);
4223
4224	rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
4225	rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
4226	rdtgroup_default.type = RDTCTRL_GROUP;
4227	INIT_LIST_HEAD(list: &rdtgroup_default.mon.crdtgrp_list);
4228
4229	list_add(new: &rdtgroup_default.rdtgroup_list, head: &rdt_all_groups);
4230
4231	mutex_unlock(lock: &rdtgroup_mutex);
4232	}
4233
4234	static void domain_destroy_mon_state(struct rdt_mon_domain *d)
4235	{
4236	int idx;
4237
4238	kfree(objp: d->cntr_cfg);
4239	bitmap_free(bitmap: d->rmid_busy_llc);
4240	for_each_mbm_idx(idx) {
4241	kfree(objp: d->mbm_states[idx]);
4242	d->mbm_states[idx] = NULL;
4243	}
4244	}
4245
4246	void resctrl_offline_ctrl_domain(struct rdt_resource r, struct* rdt_ctrl_domain *d)
4247	{
4248	mutex_lock(&rdtgroup_mutex);
4249
4250	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
4251	mba_sc_domain_destroy(r, d);
4252
4253	mutex_unlock(lock: &rdtgroup_mutex);
4254	}
4255
4256	void resctrl_offline_mon_domain(struct rdt_resource r, struct* rdt_mon_domain *d)
4257	{
4258	mutex_lock(&rdtgroup_mutex);
4259
4260	/*
4261	* If resctrl is mounted, remove all the
4262	* per domain monitor data directories.
4263	*/
4264	if (resctrl_mounted && resctrl_arch_mon_capable())
4265	rmdir_mondata_subdir_allrdtgrp(r, d);
4266
4267	if (resctrl_is_mbm_enabled())
4268	cancel_delayed_work(dwork: &d->mbm_over);
4269	if (resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
4270	/*
4271	* When a package is going down, forcefully
4272	* decrement rmid->ebusy. There is no way to know
4273	* that the L3 was flushed and hence may lead to
4274	* incorrect counts in rare scenarios, but leaving
4275	* the RMID as busy creates RMID leaks if the
4276	* package never comes back.
4277	*/
4278	__check_limbo(d, force_free: true);
4279	cancel_delayed_work(dwork: &d->cqm_limbo);
4280	}
4281
4282	domain_destroy_mon_state(d);
4283
4284	mutex_unlock(lock: &rdtgroup_mutex);
4285	}
4286
4287	/**
4288	* domain_setup_mon_state() - Initialise domain monitoring structures.
4289	* @r: The resource for the newly online domain.
4290	* @d: The newly online domain.
4291	*
4292	* Allocate monitor resources that belong to this domain.
4293	* Called when the first CPU of a domain comes online, regardless of whether
4294	* the filesystem is mounted.
4295	* During boot this may be called before global allocations have been made by
4296	* resctrl_mon_resource_init().
4297	*
4298	* Returns 0 for success, or -ENOMEM.
4299	*/
4300	static int domain_setup_mon_state(struct rdt_resource r, struct* rdt_mon_domain *d)
4301	{
4302	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
4303	size_t tsize = sizeof(*d->mbm_states[`0`]);
4304	enum resctrl_event_id eventid;
4305	int idx;
4306
4307	if (resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID)) {
4308	d->rmid_busy_llc = bitmap_zalloc(nbits: idx_limit, GFP_KERNEL);
4309	if (!d->rmid_busy_llc)
4310	return -ENOMEM;
4311	}
4312
4313	for_each_mbm_event_id(eventid) {
4314	if (!resctrl_is_mon_event_enabled(eventid))
4315	continue;
4316	idx = MBM_STATE_IDX(eventid);
4317	d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL);
4318	if (!d->mbm_states[idx])
4319	goto cleanup;
4320	}
4321
4322	if (resctrl_is_mbm_enabled() && r->mon.mbm_cntr_assignable) {
4323	tsize = sizeof(*d->cntr_cfg);
4324	d->cntr_cfg = kcalloc(r->mon.num_mbm_cntrs, tsize, GFP_KERNEL);
4325	if (!d->cntr_cfg)
4326	goto cleanup;
4327	}
4328
4329	return `0`;
4330	cleanup:
4331	bitmap_free(bitmap: d->rmid_busy_llc);
4332	for_each_mbm_idx(idx) {
4333	kfree(objp: d->mbm_states[idx]);
4334	d->mbm_states[idx] = NULL;
4335	}
4336
4337	return -ENOMEM;
4338	}
4339
4340	int resctrl_online_ctrl_domain(struct rdt_resource r, struct* rdt_ctrl_domain *d)
4341	{
4342	int err = `0`;
4343
4344	mutex_lock(&rdtgroup_mutex);
4345
4346	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
4347	/ RDT_RESOURCE_MBA is never mon_capable /
4348	err = mba_sc_domain_allocate(r, d);
4349	}
4350
4351	mutex_unlock(lock: &rdtgroup_mutex);
4352
4353	return err;
4354	}
4355
4356	int resctrl_online_mon_domain(struct rdt_resource r, struct* rdt_mon_domain *d)
4357	{
4358	int err;
4359
4360	mutex_lock(&rdtgroup_mutex);
4361
4362	err = domain_setup_mon_state(r, d);
4363	if (err)
4364	goto out_unlock;
4365
4366	if (resctrl_is_mbm_enabled()) {
4367	INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
4368	mbm_setup_overflow_handler(dom: d, MBM_OVERFLOW_INTERVAL,
4369	RESCTRL_PICK_ANY_CPU);
4370	}
4371
4372	if (resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID))
4373	INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
4374
4375	/*
4376	* If the filesystem is not mounted then only the default resource group
4377	* exists. Creation of its directories is deferred until mount time
4378	* by rdt_get_tree() calling mkdir_mondata_all().
4379	* If resctrl is mounted, add per domain monitor data directories.
4380	*/
4381	if (resctrl_mounted && resctrl_arch_mon_capable())
4382	mkdir_mondata_subdir_allrdtgrp(r, d);
4383
4384	out_unlock:
4385	mutex_unlock(lock: &rdtgroup_mutex);
4386
4387	return err;
4388	}
4389
4390	void resctrl_online_cpu(unsigned int cpu)
4391	{
4392	mutex_lock(&rdtgroup_mutex);
4393	/ The CPU is set in default rdtgroup after online. /
4394	cpumask_set_cpu(cpu, dstp: &rdtgroup_default.cpu_mask);
4395	mutex_unlock(lock: &rdtgroup_mutex);
4396	}
4397
4398	static void clear_childcpus(struct rdtgroup r, unsigned* int cpu)
4399	{
4400	struct rdtgroup *cr;
4401
4402	list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
4403	if (cpumask_test_and_clear_cpu(cpu, cpumask: &cr->cpu_mask))
4404	break;
4405	}
4406	}
4407
4408	static struct rdt_mon_domain get_mon_domain_from_cpu(int* cpu,
4409	struct rdt_resource *r)
4410	{
4411	struct rdt_mon_domain *d;
4412
4413	lockdep_assert_cpus_held();
4414
4415	list_for_each_entry(d, &r->mon_domains, hdr.list) {
4416	/ Find the domain that contains this CPU /
4417	if (cpumask_test_cpu(cpu, cpumask: &d->hdr.cpu_mask))
4418	return d;
4419	}
4420
4421	return NULL;
4422	}
4423
4424	void resctrl_offline_cpu(unsigned int cpu)
4425	{
4426	struct rdt_resource *l3 = resctrl_arch_get_resource(l: RDT_RESOURCE_L3);
4427	struct rdt_mon_domain *d;
4428	struct rdtgroup *rdtgrp;
4429
4430	mutex_lock(&rdtgroup_mutex);
4431	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
4432	if (cpumask_test_and_clear_cpu(cpu, cpumask: &rdtgrp->cpu_mask)) {
4433	clear_childcpus(r: rdtgrp, cpu);
4434	break;
4435	}
4436	}
4437
4438	if (!l3->mon_capable)
4439	goto out_unlock;
4440
4441	d = get_mon_domain_from_cpu(cpu, r: l3);
4442	if (d) {
4443	if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
4444	cancel_delayed_work(dwork: &d->mbm_over);
4445	mbm_setup_overflow_handler(dom: d, delay_ms: `0`, exclude_cpu: cpu);
4446	}
4447	if (resctrl_is_mon_event_enabled(eventid: QOS_L3_OCCUP_EVENT_ID) &&
4448	cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
4449	cancel_delayed_work(dwork: &d->cqm_limbo);
4450	cqm_setup_limbo_handler(dom: d, delay_ms: `0`, exclude_cpu: cpu);
4451	}
4452	}
4453
4454	out_unlock:
4455	mutex_unlock(lock: &rdtgroup_mutex);
4456	}
4457
4458	/*
4459	* resctrl_init - resctrl filesystem initialization
4460	*
4461	* Setup resctrl file system including set up root, create mount point,
4462	* register resctrl filesystem, and initialize files under root directory.
4463	*
4464	* Return: 0 on success or -errno
4465	*/
4466	int resctrl_init(void)
4467	{
4468	int ret = `0`;
4469
4470	seq_buf_init(s: &last_cmd_status, buf: last_cmd_status_buf,
4471	size: sizeof(last_cmd_status_buf));
4472
4473	rdtgroup_setup_default();
4474
4475	thread_throttle_mode_init();
4476
4477	io_alloc_init();
4478
4479	ret = resctrl_mon_resource_init();
4480	if (ret)
4481	return ret;
4482
4483	ret = sysfs_create_mount_point(parent_kobj: fs_kobj, name: "resctrl");
4484	if (ret) {
4485	resctrl_mon_resource_exit();
4486	return ret;
4487	}
4488
4489	ret = register_filesystem(&rdt_fs_type);
4490	if (ret)
4491	goto cleanup_mountpoint;
4492
4493	/*
4494	* Adding the resctrl debugfs directory here may not be ideal since
4495	* it would let the resctrl debugfs directory appear on the debugfs
4496	* filesystem before the resctrl filesystem is mounted.
4497	* It may also be ok since that would enable debugging of RDT before
4498	* resctrl is mounted.
4499	* The reason why the debugfs directory is created here and not in
4500	* rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
4501	* during the debugfs directory creation also &sb->s_type->i_mutex_key
4502	* (the lockdep class of inode->i_rwsem). Other filesystem
4503	* interactions (eg. SyS_getdents) have the lock ordering:
4504	* &sb->s_type->i_mutex_key --> &mm->mmap_lock
4505	* During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
4506	* is taken, thus creating dependency:
4507	* &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
4508	* issues considering the other two lock dependencies.
4509	* By creating the debugfs directory here we avoid a dependency
4510	* that may cause deadlock (even though file operations cannot
4511	* occur until the filesystem is mounted, but I do not know how to
4512	* tell lockdep that).
4513	*/
4514	debugfs_resctrl = debugfs_create_dir(name: "resctrl", NULL);
4515
4516	return `0`;
4517
4518	cleanup_mountpoint:
4519	sysfs_remove_mount_point(parent_kobj: fs_kobj, name: "resctrl");
4520	resctrl_mon_resource_exit();
4521
4522	return ret;
4523	}
4524
4525	static bool resctrl_online_domains_exist(void)
4526	{
4527	struct rdt_resource *r;
4528
4529	/*
4530	* Only walk capable resources to allow resctrl_arch_get_resource()
4531	* to return dummy 'not capable' resources.
4532	*/
4533	for_each_alloc_capable_rdt_resource(r) {
4534	if (!list_empty(head: &r->ctrl_domains))
4535	return true;
4536	}
4537
4538	for_each_mon_capable_rdt_resource(r) {
4539	if (!list_empty(head: &r->mon_domains))
4540	return true;
4541	}
4542
4543	return false;
4544	}
4545
4546	/**
4547	* resctrl_exit() - Remove the resctrl filesystem and free resources.
4548	*
4549	* Called by the architecture code in response to a fatal error.
4550	* Removes resctrl files and structures from kernfs to prevent further
4551	* configuration.
4552	*
4553	* When called by the architecture code, all CPUs and resctrl domains must be
4554	* offline. This ensures the limbo and overflow handlers are not scheduled to
4555	* run, meaning the data structures they access can be freed by
4556	* resctrl_mon_resource_exit().
4557	*
4558	* After resctrl_exit() returns, the architecture code should return an
4559	* error from all resctrl_arch_ functions that can do this.
4560	* resctrl_arch_get_resource() must continue to return struct rdt_resources
4561	* with the correct rid field to ensure the filesystem can be unmounted.
4562	*/
4563	void resctrl_exit(void)
4564	{
4565	cpus_read_lock();
4566	WARN_ON_ONCE(resctrl_online_domains_exist());
4567
4568	mutex_lock(&rdtgroup_mutex);
4569	resctrl_fs_teardown();
4570	mutex_unlock(lock: &rdtgroup_mutex);
4571
4572	cpus_read_unlock();
4573
4574	debugfs_remove_recursive(dentry: debugfs_resctrl);
4575	debugfs_resctrl = NULL;
4576	unregister_filesystem(&rdt_fs_type);
4577
4578	/*
4579	* Do not remove the sysfs mount point added by resctrl_init() so that
4580	* it can be used to umount resctrl.
4581	*/
4582
4583	resctrl_mon_resource_exit();
4584	}
4585

source code of linux/fs/resctrl/rdtgroup.c