pseudo_lock.c source code [linux/fs/resctrl/pseudo_lock.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Resource Director Technology (RDT)
4	*
5	* Pseudo-locking support built on top of Cache Allocation Technology (CAT)
6	*
7	* Copyright (C) 2018 Intel Corporation
8	*
9	* Author: Reinette Chatre <reinette.chatre@intel.com>
10	*/
11
12	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
14	#include <linux/cacheinfo.h>
15	#include <linux/cpu.h>
16	#include <linux/cpumask.h>
17	#include <linux/debugfs.h>
18	#include <linux/kthread.h>
19	#include <linux/mman.h>
20	#include <linux/pm_qos.h>
21	#include <linux/resctrl.h>
22	#include <linux/slab.h>
23	#include <linux/uaccess.h>
24
25	#include "internal.h"
26
27	/*
28	* Major number assigned to and shared by all devices exposing
29	* pseudo-locked regions.
30	*/
31	static unsigned int pseudo_lock_major;
32
33	static unsigned long pseudo_lock_minor_avail = GENMASK(MINORBITS, `0`);
34
35	static char pseudo_lock_devnode(const* struct device dev, umode_t mode)
36	{
37	const struct rdtgroup *rdtgrp;
38
39	rdtgrp = dev_get_drvdata(dev);
40	if (mode)
41	*mode = `0600`;
42	guard(mutex)(T: &rdtgroup_mutex);
43	return kasprintf(GFP_KERNEL, fmt: "pseudo_lock/%s", rdt_kn_name(kn: rdtgrp->kn));
44	}
45
46	static const struct class pseudo_lock_class = {
47	.name = "pseudo_lock",
48	.devnode = pseudo_lock_devnode,
49	};
50
51	/**
52	* pseudo_lock_minor_get - Obtain available minor number
53	* @minor: Pointer to where new minor number will be stored
54	*
55	* A bitmask is used to track available minor numbers. Here the next free
56	* minor number is marked as unavailable and returned.
57	*
58	* Return: 0 on success, <0 on failure.
59	*/
60	static int pseudo_lock_minor_get(unsigned int *minor)
61	{
62	unsigned long first_bit;
63
64	first_bit = find_first_bit(addr: &pseudo_lock_minor_avail, MINORBITS);
65
66	if (first_bit == MINORBITS)
67	return -ENOSPC;
68
69	__clear_bit(first_bit, &pseudo_lock_minor_avail);
70	*minor = first_bit;
71
72	return `0`;
73	}
74
75	/**
76	* pseudo_lock_minor_release - Return minor number to available
77	* @minor: The minor number made available
78	*/
79	static void pseudo_lock_minor_release(unsigned int minor)
80	{
81	__set_bit(minor, &pseudo_lock_minor_avail);
82	}
83
84	/**
85	* region_find_by_minor - Locate a pseudo-lock region by inode minor number
86	* @minor: The minor number of the device representing pseudo-locked region
87	*
88	* When the character device is accessed we need to determine which
89	* pseudo-locked region it belongs to. This is done by matching the minor
90	* number of the device to the pseudo-locked region it belongs.
91	*
92	* Minor numbers are assigned at the time a pseudo-locked region is associated
93	* with a cache instance.
94	*
95	* Return: On success return pointer to resource group owning the pseudo-locked
96	* region, NULL on failure.
97	*/
98	static struct rdtgroup region_find_by_minor(unsigned* int minor)
99	{
100	struct rdtgroup rdtgrp, rdtgrp_match = NULL;
101
102	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
103	if (rdtgrp->plr && rdtgrp->plr->minor == minor) {
104	rdtgrp_match = rdtgrp;
105	break;
106	}
107	}
108	return rdtgrp_match;
109	}
110
111	/**
112	* struct pseudo_lock_pm_req - A power management QoS request list entry
113	* @list: Entry within the @pm_reqs list for a pseudo-locked region
114	* @req: PM QoS request
115	*/
116	struct pseudo_lock_pm_req {
117	struct list_head list;
118	struct dev_pm_qos_request req;
119	};
120
121	static void pseudo_lock_cstates_relax(struct pseudo_lock_region *plr)
122	{
123	struct pseudo_lock_pm_req pm_req, next;
124
125	list_for_each_entry_safe(pm_req, next, &plr->pm_reqs, list) {
126	dev_pm_qos_remove_request(req: &pm_req->req);
127	list_del(entry: &pm_req->list);
128	kfree(objp: pm_req);
129	}
130	}
131
132	/**
133	* pseudo_lock_cstates_constrain - Restrict cores from entering C6
134	* @plr: Pseudo-locked region
135	*
136	* To prevent the cache from being affected by power management entering
137	* C6 has to be avoided. This is accomplished by requesting a latency
138	* requirement lower than lowest C6 exit latency of all supported
139	* platforms as found in the cpuidle state tables in the intel_idle driver.
140	* At this time it is possible to do so with a single latency requirement
141	* for all supported platforms.
142	*
143	* Since Goldmont is supported, which is affected by X86_BUG_MONITOR,
144	* the ACPI latencies need to be considered while keeping in mind that C2
145	* may be set to map to deeper sleep states. In this case the latency
146	* requirement needs to prevent entering C2 also.
147	*
148	* Return: 0 on success, <0 on failure
149	*/
150	static int pseudo_lock_cstates_constrain(struct pseudo_lock_region *plr)
151	{
152	struct pseudo_lock_pm_req *pm_req;
153	int cpu;
154	int ret;
155
156	for_each_cpu(cpu, &plr->d->hdr.cpu_mask) {
157	pm_req = kzalloc(sizeof(*pm_req), GFP_KERNEL);
158	if (!pm_req) {
159	rdt_last_cmd_puts(s: "Failure to allocate memory for PM QoS\n");
160	ret = -ENOMEM;
161	goto out_err;
162	}
163	ret = dev_pm_qos_add_request(dev: get_cpu_device(cpu),
164	req: &pm_req->req,
165	type: DEV_PM_QOS_RESUME_LATENCY,
166	value: `30`);
167	if (ret < `0`) {
168	rdt_last_cmd_printf(fmt: "Failed to add latency req CPU%d\n",
169	cpu);
170	kfree(objp: pm_req);
171	ret = -`1`;
172	goto out_err;
173	}
174	list_add(new: &pm_req->list, head: &plr->pm_reqs);
175	}
176
177	return `0`;
178
179	out_err:
180	pseudo_lock_cstates_relax(plr);
181	return ret;
182	}
183
184	/**
185	* pseudo_lock_region_clear - Reset pseudo-lock region data
186	* @plr: pseudo-lock region
187	*
188	* All content of the pseudo-locked region is reset - any memory allocated
189	* freed.
190	*
191	* Return: void
192	*/
193	static void pseudo_lock_region_clear(struct pseudo_lock_region *plr)
194	{
195	plr->size = `0`;
196	plr->line_size = `0`;
197	kfree(objp: plr->kmem);
198	plr->kmem = NULL;
199	plr->s = NULL;
200	if (plr->d)
201	plr->d->plr = NULL;
202	plr->d = NULL;
203	plr->cbm = `0`;
204	plr->debugfs_dir = NULL;
205	}
206
207	/**
208	* pseudo_lock_region_init - Initialize pseudo-lock region information
209	* @plr: pseudo-lock region
210	*
211	* Called after user provided a schemata to be pseudo-locked. From the
212	* schemata the &struct pseudo_lock_region is on entry already initialized
213	* with the resource, domain, and capacity bitmask. Here the information
214	* required for pseudo-locking is deduced from this data and &struct
215	* pseudo_lock_region initialized further. This information includes:
216	* - size in bytes of the region to be pseudo-locked
217	* - cache line size to know the stride with which data needs to be accessed
218	* to be pseudo-locked
219	* - a cpu associated with the cache instance on which the pseudo-locking
220	* flow can be executed
221	*
222	* Return: 0 on success, <0 on failure. Descriptive error will be written
223	* to last_cmd_status buffer.
224	*/
225	static int pseudo_lock_region_init(struct pseudo_lock_region *plr)
226	{
227	enum resctrl_scope scope = plr->s->res->ctrl_scope;
228	struct cacheinfo *ci;
229	int ret;
230
231	if (WARN_ON_ONCE(scope != RESCTRL_L2_CACHE && scope != RESCTRL_L3_CACHE))
232	return -ENODEV;
233
234	/ Pick the first cpu we find that is associated with the cache. /
235	plr->cpu = cpumask_first(srcp: &plr->d->hdr.cpu_mask);
236
237	if (!cpu_online(cpu: plr->cpu)) {
238	rdt_last_cmd_printf(fmt: "CPU %u associated with cache not online\n",
239	plr->cpu);
240	ret = -ENODEV;
241	goto out_region;
242	}
243
244	ci = get_cpu_cacheinfo_level(cpu: plr->cpu, level: scope);
245	if (ci) {
246	plr->line_size = ci->coherency_line_size;
247	plr->size = rdtgroup_cbm_to_size(r: plr->s->res, d: plr->d, cbm: plr->cbm);
248	return `0`;
249	}
250
251	ret = -`1`;
252	rdt_last_cmd_puts(s: "Unable to determine cache line size\n");
253	out_region:
254	pseudo_lock_region_clear(plr);
255	return ret;
256	}
257
258	/**
259	* pseudo_lock_init - Initialize a pseudo-lock region
260	* @rdtgrp: resource group to which new pseudo-locked region will belong
261	*
262	* A pseudo-locked region is associated with a resource group. When this
263	* association is created the pseudo-locked region is initialized. The
264	* details of the pseudo-locked region are not known at this time so only
265	* allocation is done and association established.
266	*
267	* Return: 0 on success, <0 on failure
268	*/
269	static int pseudo_lock_init(struct rdtgroup *rdtgrp)
270	{
271	struct pseudo_lock_region *plr;
272
273	plr = kzalloc(sizeof(*plr), GFP_KERNEL);
274	if (!plr)
275	return -ENOMEM;
276
277	init_waitqueue_head(&plr->lock_thread_wq);
278	INIT_LIST_HEAD(list: &plr->pm_reqs);
279	rdtgrp->plr = plr;
280	return `0`;
281	}
282
283	/**
284	* pseudo_lock_region_alloc - Allocate kernel memory that will be pseudo-locked
285	* @plr: pseudo-lock region
286	*
287	* Initialize the details required to set up the pseudo-locked region and
288	* allocate the contiguous memory that will be pseudo-locked to the cache.
289	*
290	* Return: 0 on success, <0 on failure. Descriptive error will be written
291	* to last_cmd_status buffer.
292	*/
293	static int pseudo_lock_region_alloc(struct pseudo_lock_region *plr)
294	{
295	int ret;
296
297	ret = pseudo_lock_region_init(plr);
298	if (ret < `0`)
299	return ret;
300
301	/*
302	* We do not yet support contiguous regions larger than
303	* KMALLOC_MAX_SIZE.
304	*/
305	if (plr->size > KMALLOC_MAX_SIZE) {
306	rdt_last_cmd_puts(s: "Requested region exceeds maximum size\n");
307	ret = -E2BIG;
308	goto out_region;
309	}
310
311	plr->kmem = kzalloc(plr->size, GFP_KERNEL);
312	if (!plr->kmem) {
313	rdt_last_cmd_puts(s: "Unable to allocate memory\n");
314	ret = -ENOMEM;
315	goto out_region;
316	}
317
318	ret = `0`;
319	goto out;
320	out_region:
321	pseudo_lock_region_clear(plr);
322	out:
323	return ret;
324	}
325
326	/**
327	* pseudo_lock_free - Free a pseudo-locked region
328	* @rdtgrp: resource group to which pseudo-locked region belonged
329	*
330	* The pseudo-locked region's resources have already been released, or not
331	* yet created at this point. Now it can be freed and disassociated from the
332	* resource group.
333	*
334	* Return: void
335	*/
336	static void pseudo_lock_free(struct rdtgroup *rdtgrp)
337	{
338	pseudo_lock_region_clear(plr: rdtgrp->plr);
339	kfree(objp: rdtgrp->plr);
340	rdtgrp->plr = NULL;
341	}
342
343	/**
344	* rdtgroup_monitor_in_progress - Test if monitoring in progress
345	* @rdtgrp: resource group being queried
346	*
347	* Return: 1 if monitor groups have been created for this resource
348	* group, 0 otherwise.
349	*/
350	static int rdtgroup_monitor_in_progress(struct rdtgroup *rdtgrp)
351	{
352	return !list_empty(head: &rdtgrp->mon.crdtgrp_list);
353	}
354
355	/**
356	* rdtgroup_locksetup_user_restrict - Restrict user access to group
357	* @rdtgrp: resource group needing access restricted
358	*
359	* A resource group used for cache pseudo-locking cannot have cpus or tasks
360	* assigned to it. This is communicated to the user by restricting access
361	* to all the files that can be used to make such changes.
362	*
363	* Permissions restored with rdtgroup_locksetup_user_restore()
364	*
365	* Return: 0 on success, <0 on failure. If a failure occurs during the
366	* restriction of access an attempt will be made to restore permissions but
367	* the state of the mode of these files will be uncertain when a failure
368	* occurs.
369	*/
370	static int rdtgroup_locksetup_user_restrict(struct rdtgroup *rdtgrp)
371	{
372	int ret;
373
374	ret = rdtgroup_kn_mode_restrict(r: rdtgrp, name: "tasks");
375	if (ret)
376	return ret;
377
378	ret = rdtgroup_kn_mode_restrict(r: rdtgrp, name: "cpus");
379	if (ret)
380	goto err_tasks;
381
382	ret = rdtgroup_kn_mode_restrict(r: rdtgrp, name: "cpus_list");
383	if (ret)
384	goto err_cpus;
385
386	if (resctrl_arch_mon_capable()) {
387	ret = rdtgroup_kn_mode_restrict(r: rdtgrp, name: "mon_groups");
388	if (ret)
389	goto err_cpus_list;
390	}
391
392	ret = `0`;
393	goto out;
394
395	err_cpus_list:
396	rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus_list", mask: `0777`);
397	err_cpus:
398	rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus", mask: `0777`);
399	err_tasks:
400	rdtgroup_kn_mode_restore(r: rdtgrp, name: "tasks", mask: `0777`);
401	out:
402	return ret;
403	}
404
405	/**
406	* rdtgroup_locksetup_user_restore - Restore user access to group
407	* @rdtgrp: resource group needing access restored
408	*
409	* Restore all file access previously removed using
410	* rdtgroup_locksetup_user_restrict()
411	*
412	* Return: 0 on success, <0 on failure. If a failure occurs during the
413	* restoration of access an attempt will be made to restrict permissions
414	* again but the state of the mode of these files will be uncertain when
415	* a failure occurs.
416	*/
417	static int rdtgroup_locksetup_user_restore(struct rdtgroup *rdtgrp)
418	{
419	int ret;
420
421	ret = rdtgroup_kn_mode_restore(r: rdtgrp, name: "tasks", mask: `0777`);
422	if (ret)
423	return ret;
424
425	ret = rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus", mask: `0777`);
426	if (ret)
427	goto err_tasks;
428
429	ret = rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus_list", mask: `0777`);
430	if (ret)
431	goto err_cpus;
432
433	if (resctrl_arch_mon_capable()) {
434	ret = rdtgroup_kn_mode_restore(r: rdtgrp, name: "mon_groups", mask: `0777`);
435	if (ret)
436	goto err_cpus_list;
437	}
438
439	ret = `0`;
440	goto out;
441
442	err_cpus_list:
443	rdtgroup_kn_mode_restrict(r: rdtgrp, name: "cpus_list");
444	err_cpus:
445	rdtgroup_kn_mode_restrict(r: rdtgrp, name: "cpus");
446	err_tasks:
447	rdtgroup_kn_mode_restrict(r: rdtgrp, name: "tasks");
448	out:
449	return ret;
450	}
451
452	/**
453	* rdtgroup_locksetup_enter - Resource group enters locksetup mode
454	* @rdtgrp: resource group requested to enter locksetup mode
455	*
456	* A resource group enters locksetup mode to reflect that it would be used
457	* to represent a pseudo-locked region and is in the process of being set
458	* up to do so. A resource group used for a pseudo-locked region would
459	* lose the closid associated with it so we cannot allow it to have any
460	* tasks or cpus assigned nor permit tasks or cpus to be assigned in the
461	* future. Monitoring of a pseudo-locked region is not allowed either.
462	*
463	* The above and more restrictions on a pseudo-locked region are checked
464	* for and enforced before the resource group enters the locksetup mode.
465	*
466	* Returns: 0 if the resource group successfully entered locksetup mode, <0
467	* on failure. On failure the last_cmd_status buffer is updated with text to
468	* communicate details of failure to the user.
469	*/
470	int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp)
471	{
472	int ret;
473
474	/*
475	* The default resource group can neither be removed nor lose the
476	* default closid associated with it.
477	*/
478	if (rdtgrp == &rdtgroup_default) {
479	rdt_last_cmd_puts(s: "Cannot pseudo-lock default group\n");
480	return -EINVAL;
481	}
482
483	/*
484	* Cache Pseudo-locking not supported when CDP is enabled.
485	*
486	* Some things to consider if you would like to enable this
487	* support (using L3 CDP as example):
488	* - When CDP is enabled two separate resources are exposed,
489	* L3DATA and L3CODE, but they are actually on the same cache.
490	* The implication for pseudo-locking is that if a
491	* pseudo-locked region is created on a domain of one
492	* resource (eg. L3CODE), then a pseudo-locked region cannot
493	* be created on that same domain of the other resource
494	* (eg. L3DATA). This is because the creation of a
495	* pseudo-locked region involves a call to wbinvd that will
496	* affect all cache allocations on particular domain.
497	* - Considering the previous, it may be possible to only
498	* expose one of the CDP resources to pseudo-locking and
499	* hide the other. For example, we could consider to only
500	* expose L3DATA and since the L3 cache is unified it is
501	* still possible to place instructions there are execute it.
502	* - If only one region is exposed to pseudo-locking we should
503	* still keep in mind that availability of a portion of cache
504	* for pseudo-locking should take into account both resources.
505	* Similarly, if a pseudo-locked region is created in one
506	* resource, the portion of cache used by it should be made
507	* unavailable to all future allocations from both resources.
508	*/
509	if (resctrl_arch_get_cdp_enabled(l: RDT_RESOURCE_L3) \|\|
510	resctrl_arch_get_cdp_enabled(l: RDT_RESOURCE_L2)) {
511	rdt_last_cmd_puts(s: "CDP enabled\n");
512	return -EINVAL;
513	}
514
515	/*
516	* Not knowing the bits to disable prefetching implies that this
517	* platform does not support Cache Pseudo-Locking.
518	*/
519	if (resctrl_arch_get_prefetch_disable_bits() == `0`) {
520	rdt_last_cmd_puts(s: "Pseudo-locking not supported\n");
521	return -EINVAL;
522	}
523
524	if (rdtgroup_monitor_in_progress(rdtgrp)) {
525	rdt_last_cmd_puts(s: "Monitoring in progress\n");
526	return -EINVAL;
527	}
528
529	if (rdtgroup_tasks_assigned(r: rdtgrp)) {
530	rdt_last_cmd_puts(s: "Tasks assigned to resource group\n");
531	return -EINVAL;
532	}
533
534	if (!cpumask_empty(srcp: &rdtgrp->cpu_mask)) {
535	rdt_last_cmd_puts(s: "CPUs assigned to resource group\n");
536	return -EINVAL;
537	}
538
539	if (rdtgroup_locksetup_user_restrict(rdtgrp)) {
540	rdt_last_cmd_puts(s: "Unable to modify resctrl permissions\n");
541	return -EIO;
542	}
543
544	ret = pseudo_lock_init(rdtgrp);
545	if (ret) {
546	rdt_last_cmd_puts(s: "Unable to init pseudo-lock region\n");
547	goto out_release;
548	}
549
550	/*
551	* If this system is capable of monitoring a rmid would have been
552	* allocated when the control group was created. This is not needed
553	* anymore when this group would be used for pseudo-locking. This
554	* is safe to call on platforms not capable of monitoring.
555	*/
556	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
557
558	ret = `0`;
559	goto out;
560
561	out_release:
562	rdtgroup_locksetup_user_restore(rdtgrp);
563	out:
564	return ret;
565	}
566
567	/**
568	* rdtgroup_locksetup_exit - resource group exist locksetup mode
569	* @rdtgrp: resource group
570	*
571	* When a resource group exits locksetup mode the earlier restrictions are
572	* lifted.
573	*
574	* Return: 0 on success, <0 on failure
575	*/
576	int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp)
577	{
578	int ret;
579
580	if (resctrl_arch_mon_capable()) {
581	ret = alloc_rmid(closid: rdtgrp->closid);
582	if (ret < `0`) {
583	rdt_last_cmd_puts(s: "Out of RMIDs\n");
584	return ret;
585	}
586	rdtgrp->mon.rmid = ret;
587	}
588
589	ret = rdtgroup_locksetup_user_restore(rdtgrp);
590	if (ret) {
591	free_rmid(closid: rdtgrp->closid, rmid: rdtgrp->mon.rmid);
592	return ret;
593	}
594
595	pseudo_lock_free(rdtgrp);
596	return `0`;
597	}
598
599	/**
600	* rdtgroup_cbm_overlaps_pseudo_locked - Test if CBM or portion is pseudo-locked
601	* @d: RDT domain
602	* @cbm: CBM to test
603	*
604	* @d represents a cache instance and @cbm a capacity bitmask that is
605	* considered for it. Determine if @cbm overlaps with any existing
606	* pseudo-locked region on @d.
607	*
608	* @cbm is unsigned long, even if only 32 bits are used, to make the
609	* bitmap functions work correctly.
610	*
611	* Return: true if @cbm overlaps with pseudo-locked region on @d, false
612	* otherwise.
613	*/
614	bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_ctrl_domain d, unsigned* long cbm)
615	{
616	unsigned int cbm_len;
617	unsigned long cbm_b;
618
619	if (d->plr) {
620	cbm_len = d->plr->s->res->cache.cbm_len;
621	cbm_b = d->plr->cbm;
622	if (bitmap_intersects(src1: &cbm, src2: &cbm_b, nbits: cbm_len))
623	return true;
624	}
625	return false;
626	}
627
628	/**
629	* rdtgroup_pseudo_locked_in_hierarchy - Pseudo-locked region in cache hierarchy
630	* @d: RDT domain under test
631	*
632	* The setup of a pseudo-locked region affects all cache instances within
633	* the hierarchy of the region. It is thus essential to know if any
634	* pseudo-locked regions exist within a cache hierarchy to prevent any
635	* attempts to create new pseudo-locked regions in the same hierarchy.
636	*
637	* Return: true if a pseudo-locked region exists in the hierarchy of @d or
638	* if it is not possible to test due to memory allocation issue,
639	* false otherwise.
640	*/
641	bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_ctrl_domain *d)
642	{
643	struct rdt_ctrl_domain *d_i;
644	cpumask_var_t cpu_with_psl;
645	struct rdt_resource *r;
646	bool ret = false;
647
648	/ Walking r->domains, ensure it can't race with cpuhp /
649	lockdep_assert_cpus_held();
650
651	if (!zalloc_cpumask_var(mask: &cpu_with_psl, GFP_KERNEL))
652	return true;
653
654	/*
655	* First determine which cpus have pseudo-locked regions
656	* associated with them.
657	*/
658	for_each_alloc_capable_rdt_resource(r) {
659	list_for_each_entry(d_i, &r->ctrl_domains, hdr.list) {
660	if (d_i->plr)
661	cpumask_or(dstp: cpu_with_psl, src1p: cpu_with_psl,
662	src2p: &d_i->hdr.cpu_mask);
663	}
664	}
665
666	/*
667	* Next test if new pseudo-locked region would intersect with
668	* existing region.
669	*/
670	if (cpumask_intersects(src1p: &d->hdr.cpu_mask, src2p: cpu_with_psl))
671	ret = true;
672
673	free_cpumask_var(mask: cpu_with_psl);
674	return ret;
675	}
676
677	/**
678	* pseudo_lock_measure_cycles - Trigger latency measure to pseudo-locked region
679	* @rdtgrp: Resource group to which the pseudo-locked region belongs.
680	* @sel: Selector of which measurement to perform on a pseudo-locked region.
681	*
682	* The measurement of latency to access a pseudo-locked region should be
683	* done from a cpu that is associated with that pseudo-locked region.
684	* Determine which cpu is associated with this region and start a thread on
685	* that cpu to perform the measurement, wait for that thread to complete.
686	*
687	* Return: 0 on success, <0 on failure
688	*/
689	static int pseudo_lock_measure_cycles(struct rdtgroup rdtgrp, int* sel)
690	{
691	struct pseudo_lock_region *plr = rdtgrp->plr;
692	struct task_struct *thread;
693	unsigned int cpu;
694	int ret = -`1`;
695
696	cpus_read_lock();
697	mutex_lock(&rdtgroup_mutex);
698
699	if (rdtgrp->flags & RDT_DELETED) {
700	ret = -ENODEV;
701	goto out;
702	}
703
704	if (!plr->d) {
705	ret = -ENODEV;
706	goto out;
707	}
708
709	plr->thread_done = `0`;
710	cpu = cpumask_first(srcp: &plr->d->hdr.cpu_mask);
711	if (!cpu_online(cpu)) {
712	ret = -ENODEV;
713	goto out;
714	}
715
716	plr->cpu = cpu;
717
718	if (sel == `1`)
719	thread = kthread_run_on_cpu(threadfn: resctrl_arch_measure_cycles_lat_fn,
720	data: plr, cpu, namefmt: "pseudo_lock_measure/%u");
721	else if (sel == `2`)
722	thread = kthread_run_on_cpu(threadfn: resctrl_arch_measure_l2_residency,
723	data: plr, cpu, namefmt: "pseudo_lock_measure/%u");
724	else if (sel == `3`)
725	thread = kthread_run_on_cpu(threadfn: resctrl_arch_measure_l3_residency,
726	data: plr, cpu, namefmt: "pseudo_lock_measure/%u");
727	else
728	goto out;
729
730	if (IS_ERR(ptr: thread)) {
731	ret = PTR_ERR(ptr: thread);
732	goto out;
733	}
734
735	ret = wait_event_interruptible(plr->lock_thread_wq,
736	plr->thread_done == `1`);
737	if (ret < `0`)
738	goto out;
739
740	ret = `0`;
741
742	out:
743	mutex_unlock(lock: &rdtgroup_mutex);
744	cpus_read_unlock();
745	return ret;
746	}
747
748	static ssize_t pseudo_lock_measure_trigger(struct file *file,
749	const char __user *user_buf,
750	size_t count, loff_t *ppos)
751	{
752	struct rdtgroup *rdtgrp = file->private_data;
753	size_t buf_size;
754	char buf[`32`];
755	int ret;
756	int sel;
757
758	buf_size = min(count, (sizeof(buf) - `1`));
759	if (copy_from_user(to: buf, from: user_buf, n: buf_size))
760	return -EFAULT;
761
762	buf[buf_size] = `'\0'`;
763	ret = kstrtoint(s: buf, base: `10`, res: &sel);
764	if (ret == `0`) {
765	if (sel != `1` && sel != `2` && sel != `3`)
766	return -EINVAL;
767	ret = pseudo_lock_measure_cycles(rdtgrp, sel);
768	if (ret == `0`)
769	ret = count;
770	}
771
772	return ret;
773	}
774
775	static const struct file_operations pseudo_measure_fops = {
776	.write = pseudo_lock_measure_trigger,
777	.open = simple_open,
778	.llseek = default_llseek,
779	};
780
781	/**
782	* rdtgroup_pseudo_lock_create - Create a pseudo-locked region
783	* @rdtgrp: resource group to which pseudo-lock region belongs
784	*
785	* Called when a resource group in the pseudo-locksetup mode receives a
786	* valid schemata that should be pseudo-locked. Since the resource group is
787	* in pseudo-locksetup mode the &struct pseudo_lock_region has already been
788	* allocated and initialized with the essential information. If a failure
789	* occurs the resource group remains in the pseudo-locksetup mode with the
790	* &struct pseudo_lock_region associated with it, but cleared from all
791	* information and ready for the user to re-attempt pseudo-locking by
792	* writing the schemata again.
793	*
794	* Return: 0 if the pseudo-locked region was successfully pseudo-locked, <0
795	* on failure. Descriptive error will be written to last_cmd_status buffer.
796	*/
797	int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp)
798	{
799	struct pseudo_lock_region *plr = rdtgrp->plr;
800	struct task_struct *thread;
801	unsigned int new_minor;
802	struct device *dev;
803	char *kn_name __free(kfree) = NULL;
804	int ret;
805
806	ret = pseudo_lock_region_alloc(plr);
807	if (ret < `0`)
808	return ret;
809
810	ret = pseudo_lock_cstates_constrain(plr);
811	if (ret < `0`) {
812	ret = -EINVAL;
813	goto out_region;
814	}
815	kn_name = kstrdup(s: rdt_kn_name(kn: rdtgrp->kn), GFP_KERNEL);
816	if (!kn_name) {
817	ret = -ENOMEM;
818	goto out_cstates;
819	}
820
821	plr->thread_done = `0`;
822
823	thread = kthread_run_on_cpu(threadfn: resctrl_arch_pseudo_lock_fn, data: plr,
824	cpu: plr->cpu, namefmt: "pseudo_lock/%u");
825	if (IS_ERR(ptr: thread)) {
826	ret = PTR_ERR(ptr: thread);
827	rdt_last_cmd_printf(fmt: "Locking thread returned error %d\n", ret);
828	goto out_cstates;
829	}
830
831	ret = wait_event_interruptible(plr->lock_thread_wq,
832	plr->thread_done == `1`);
833	if (ret < `0`) {
834	/*
835	* If the thread does not get on the CPU for whatever
836	* reason and the process which sets up the region is
837	* interrupted then this will leave the thread in runnable
838	* state and once it gets on the CPU it will dereference
839	* the cleared, but not freed, plr struct resulting in an
840	* empty pseudo-locking loop.
841	*/
842	rdt_last_cmd_puts(s: "Locking thread interrupted\n");
843	goto out_cstates;
844	}
845
846	ret = pseudo_lock_minor_get(minor: &new_minor);
847	if (ret < `0`) {
848	rdt_last_cmd_puts(s: "Unable to obtain a new minor number\n");
849	goto out_cstates;
850	}
851
852	/*
853	* Unlock access but do not release the reference. The
854	* pseudo-locked region will still be here on return.
855	*
856	* The mutex has to be released temporarily to avoid a potential
857	* deadlock with the mm->mmap_lock which is obtained in the
858	* device_create() and debugfs_create_dir() callpath below as well as
859	* before the mmap() callback is called.
860	*/
861	mutex_unlock(lock: &rdtgroup_mutex);
862
863	if (!IS_ERR_OR_NULL(ptr: debugfs_resctrl)) {
864	plr->debugfs_dir = debugfs_create_dir(name: kn_name, parent: debugfs_resctrl);
865	if (!IS_ERR_OR_NULL(ptr: plr->debugfs_dir))
866	debugfs_create_file("pseudo_lock_measure", `0200`,
867	plr->debugfs_dir, rdtgrp,
868	&pseudo_measure_fops);
869	}
870
871	dev = device_create(cls: &pseudo_lock_class, NULL,
872	MKDEV(pseudo_lock_major, new_minor),
873	drvdata: rdtgrp, fmt: "%s", kn_name);
874
875	mutex_lock(&rdtgroup_mutex);
876
877	if (IS_ERR(ptr: dev)) {
878	ret = PTR_ERR(ptr: dev);
879	rdt_last_cmd_printf(fmt: "Failed to create character device: %d\n",
880	ret);
881	goto out_debugfs;
882	}
883
884	/ We released the mutex - check if group was removed while we did so /
885	if (rdtgrp->flags & RDT_DELETED) {
886	ret = -ENODEV;
887	goto out_device;
888	}
889
890	plr->minor = new_minor;
891
892	rdtgrp->mode = RDT_MODE_PSEUDO_LOCKED;
893	closid_free(closid: rdtgrp->closid);
894	rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus", mask: `0444`);
895	rdtgroup_kn_mode_restore(r: rdtgrp, name: "cpus_list", mask: `0444`);
896
897	ret = `0`;
898	goto out;
899
900	out_device:
901	device_destroy(cls: &pseudo_lock_class, MKDEV(pseudo_lock_major, new_minor));
902	out_debugfs:
903	debugfs_remove_recursive(dentry: plr->debugfs_dir);
904	pseudo_lock_minor_release(minor: new_minor);
905	out_cstates:
906	pseudo_lock_cstates_relax(plr);
907	out_region:
908	pseudo_lock_region_clear(plr);
909	out:
910	return ret;
911	}
912
913	/**
914	* rdtgroup_pseudo_lock_remove - Remove a pseudo-locked region
915	* @rdtgrp: resource group to which the pseudo-locked region belongs
916	*
917	* The removal of a pseudo-locked region can be initiated when the resource
918	* group is removed from user space via a "rmdir" from userspace or the
919	* unmount of the resctrl filesystem. On removal the resource group does
920	* not go back to pseudo-locksetup mode before it is removed, instead it is
921	* removed directly. There is thus asymmetry with the creation where the
922	* &struct pseudo_lock_region is removed here while it was not created in
923	* rdtgroup_pseudo_lock_create().
924	*
925	* Return: void
926	*/
927	void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp)
928	{
929	struct pseudo_lock_region *plr = rdtgrp->plr;
930
931	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
932	/*
933	* Default group cannot be a pseudo-locked region so we can
934	* free closid here.
935	*/
936	closid_free(closid: rdtgrp->closid);
937	goto free;
938	}
939
940	pseudo_lock_cstates_relax(plr);
941	debugfs_remove_recursive(dentry: rdtgrp->plr->debugfs_dir);
942	device_destroy(cls: &pseudo_lock_class, MKDEV(pseudo_lock_major, plr->minor));
943	pseudo_lock_minor_release(minor: plr->minor);
944
945	free:
946	pseudo_lock_free(rdtgrp);
947	}
948
949	static int pseudo_lock_dev_open(struct inode inode, struct* file *filp)
950	{
951	struct rdtgroup *rdtgrp;
952
953	mutex_lock(&rdtgroup_mutex);
954
955	rdtgrp = region_find_by_minor(minor: iminor(inode));
956	if (!rdtgrp) {
957	mutex_unlock(lock: &rdtgroup_mutex);
958	return -ENODEV;
959	}
960
961	filp->private_data = rdtgrp;
962	atomic_inc(v: &rdtgrp->waitcount);
963	/ Perform a non-seekable open - llseek is not supported /
964	filp->f_mode &= ~(FMODE_LSEEK \| FMODE_PREAD \| FMODE_PWRITE);
965
966	mutex_unlock(lock: &rdtgroup_mutex);
967
968	return `0`;
969	}
970
971	static int pseudo_lock_dev_release(struct inode inode, struct* file *filp)
972	{
973	struct rdtgroup *rdtgrp;
974
975	mutex_lock(&rdtgroup_mutex);
976	rdtgrp = filp->private_data;
977	WARN_ON(!rdtgrp);
978	if (!rdtgrp) {
979	mutex_unlock(lock: &rdtgroup_mutex);
980	return -ENODEV;
981	}
982	filp->private_data = NULL;
983	atomic_dec(v: &rdtgrp->waitcount);
984	mutex_unlock(lock: &rdtgroup_mutex);
985	return `0`;
986	}
987
988	static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
989	{
990	/ Not supported /
991	return -EINVAL;
992	}
993
994	static const struct vm_operations_struct pseudo_mmap_ops = {
995	.mremap = pseudo_lock_dev_mremap,
996	};
997
998	static int pseudo_lock_dev_mmap_prepare(struct vm_area_desc *desc)
999	{
1000	unsigned long off = desc->pgoff << PAGE_SHIFT;
1001	unsigned long vsize = vma_desc_size(desc);
1002	struct file *filp = desc->file;
1003	struct pseudo_lock_region *plr;
1004	struct rdtgroup *rdtgrp;
1005	unsigned long physical;
1006	unsigned long psize;
1007
1008	mutex_lock(&rdtgroup_mutex);
1009
1010	rdtgrp = filp->private_data;
1011	WARN_ON(!rdtgrp);
1012	if (!rdtgrp) {
1013	mutex_unlock(lock: &rdtgroup_mutex);
1014	return -ENODEV;
1015	}
1016
1017	plr = rdtgrp->plr;
1018
1019	if (!plr->d) {
1020	mutex_unlock(lock: &rdtgroup_mutex);
1021	return -ENODEV;
1022	}
1023
1024	/*
1025	* Task is required to run with affinity to the cpus associated
1026	* with the pseudo-locked region. If this is not the case the task
1027	* may be scheduled elsewhere and invalidate entries in the
1028	* pseudo-locked region.
1029	*/
1030	if (!cpumask_subset(current->cpus_ptr, src2p: &plr->d->hdr.cpu_mask)) {
1031	mutex_unlock(lock: &rdtgroup_mutex);
1032	return -EINVAL;
1033	}
1034
1035	physical = __pa(plr->kmem) >> PAGE_SHIFT;
1036	psize = plr->size - off;
1037
1038	if (off > plr->size) {
1039	mutex_unlock(lock: &rdtgroup_mutex);
1040	return -ENOSPC;
1041	}
1042
1043	/*
1044	* Ensure changes are carried directly to the memory being mapped,
1045	* do not allow copy-on-write mapping.
1046	*/
1047	if (!(desc->vm_flags & VM_SHARED)) {
1048	mutex_unlock(lock: &rdtgroup_mutex);
1049	return -EINVAL;
1050	}
1051
1052	if (vsize > psize) {
1053	mutex_unlock(lock: &rdtgroup_mutex);
1054	return -ENOSPC;
1055	}
1056
1057	memset(plr->kmem + off, `0`, vsize);
1058
1059	desc->vm_ops = &pseudo_mmap_ops;
1060	mmap_action_remap_full(desc, start_pfn: physical + desc->pgoff);
1061
1062	mutex_unlock(lock: &rdtgroup_mutex);
1063	return `0`;
1064	}
1065
1066	static const struct file_operations pseudo_lock_dev_fops = {
1067	.owner = THIS_MODULE,
1068	.read = NULL,
1069	.write = NULL,
1070	.open = pseudo_lock_dev_open,
1071	.release = pseudo_lock_dev_release,
1072	.mmap_prepare = pseudo_lock_dev_mmap_prepare,
1073	};
1074
1075	int rdt_pseudo_lock_init(void)
1076	{
1077	int ret;
1078
1079	ret = register_chrdev(major: `0`, name: "pseudo_lock", fops: &pseudo_lock_dev_fops);
1080	if (ret < `0`)
1081	return ret;
1082
1083	pseudo_lock_major = ret;
1084
1085	ret = class_register(class: &pseudo_lock_class);
1086	if (ret) {
1087	unregister_chrdev(major: pseudo_lock_major, name: "pseudo_lock");
1088	return ret;
1089	}
1090
1091	return `0`;
1092	}
1093
1094	void rdt_pseudo_lock_release(void)
1095	{
1096	class_unregister(class: &pseudo_lock_class);
1097	unregister_chrdev(major: pseudo_lock_major, name: "pseudo_lock");
1098	pseudo_lock_major = `0`;
1099	}
1100

source code of linux/fs/resctrl/pseudo_lock.c