bpf_trace.c source code [linux/kernel/trace/bpf_trace.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/ Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com*
3	* Copyright (c) 2016 Facebook
4	*/
5	#include <linux/kernel.h>
6	#include <linux/types.h>
7	#include <linux/slab.h>
8	#include <linux/bpf.h>
9	#include <linux/bpf_verifier.h>
10	#include <linux/bpf_perf_event.h>
11	#include <linux/btf.h>
12	#include <linux/filter.h>
13	#include <linux/uaccess.h>
14	#include <linux/ctype.h>
15	#include <linux/kprobes.h>
16	#include <linux/spinlock.h>
17	#include <linux/syscalls.h>
18	#include <linux/error-injection.h>
19	#include <linux/btf_ids.h>
20	#include <linux/bpf_lsm.h>
21	#include <linux/fprobe.h>
22	#include <linux/bsearch.h>
23	#include <linux/sort.h>
24	#include <linux/key.h>
25	#include <linux/namei.h>
26
27	#include <net/bpf_sk_storage.h>
28
29	#include <uapi/linux/bpf.h>
30	#include <uapi/linux/btf.h>
31
32	#include <asm/tlb.h>
33
34	#include "trace_probe.h"
35	#include "trace.h"
36
37	#define CREATE_TRACE_POINTS
38	#include "bpf_trace.h"
39
40	#define bpf_event_rcu_dereference(p) \
41	rcu_dereference_protected(p, lockdep_is_held(&bpf_event_mutex))
42
43	#define MAX_UPROBE_MULTI_CNT (1U << 20)
44	#define MAX_KPROBE_MULTI_CNT (1U << 20)
45
46	#ifdef CONFIG_MODULES
47	struct bpf_trace_module {
48	struct module *module;
49	struct list_head list;
50	};
51
52	static LIST_HEAD(bpf_trace_modules);
53	static DEFINE_MUTEX(bpf_module_mutex);
54
55	static struct bpf_raw_event_map bpf_get_raw_tracepoint_module(const* char *name)
56	{
57	struct bpf_raw_event_map btp, ret = NULL;
58	struct bpf_trace_module *btm;
59	unsigned int i;
60
61	mutex_lock(&bpf_module_mutex);
62	list_for_each_entry(btm, &bpf_trace_modules, list) {
63	for (i = `0`; i < btm->module->num_bpf_raw_events; ++i) {
64	btp = &btm->module->bpf_raw_events[i];
65	if (!strcmp(btp->tp->name, name)) {
66	if (try_module_get(module: btm->module))
67	ret = btp;
68	goto out;
69	}
70	}
71	}
72	out:
73	mutex_unlock(lock: &bpf_module_mutex);
74	return ret;
75	}
76	#else
77	static struct bpf_raw_event_map bpf_get_raw_tracepoint_module(const* char *name)
78	{
79	return NULL;
80	}
81	#endif /* CONFIG_MODULES */
82
83	u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
84	u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
85
86	static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
87	u64 flags, const struct btf **btf,
88	s32 *btf_id);
89	static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx);
90	static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
91
92	static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx);
93	static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx);
94
95	/**
96	* trace_call_bpf - invoke BPF program
97	* @call: tracepoint event
98	* @ctx: opaque context pointer
99	*
100	* kprobe handlers execute BPF programs via this helper.
101	* Can be used from static tracepoints in the future.
102	*
103	* Return: BPF programs always return an integer which is interpreted by
104	* kprobe handler as:
105	* 0 - return from kprobe (event is filtered out)
106	* 1 - store kprobe event into ring buffer
107	* Other values are reserved and currently alias to 1
108	*/
109	unsigned int trace_call_bpf(struct trace_event_call call, void* *ctx)
110	{
111	unsigned int ret;
112
113	cant_sleep();
114
115	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != `1`)) {
116	/*
117	* since some bpf program is already running on this cpu,
118	* don't call into another bpf program (same or different)
119	* and don't send kprobe event into ring-buffer,
120	* so return zero here
121	*/
122	rcu_read_lock();
123	bpf_prog_inc_misses_counters(rcu_dereference(call->prog_array));
124	rcu_read_unlock();
125	ret = `0`;
126	goto out;
127	}
128
129	/*
130	* Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
131	* to all call sites, we did a bpf_prog_array_valid() there to check
132	* whether call->prog_array is empty or not, which is
133	* a heuristic to speed up execution.
134	*
135	* If bpf_prog_array_valid() fetched prog_array was
136	* non-NULL, we go into trace_call_bpf() and do the actual
137	* proper rcu_dereference() under RCU lock.
138	* If it turns out that prog_array is NULL then, we bail out.
139	* For the opposite, if the bpf_prog_array_valid() fetched pointer
140	* was NULL, you'll skip the prog_array with the risk of missing
141	* out of events when it was updated in between this and the
142	* rcu_dereference() which is accepted risk.
143	*/
144	rcu_read_lock();
145	ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
146	ctx, run_prog: bpf_prog_run);
147	rcu_read_unlock();
148
149	out:
150	__this_cpu_dec(bpf_prog_active);
151
152	return ret;
153	}
154
155	#ifdef CONFIG_BPF_KPROBE_OVERRIDE
156	BPF_CALL_2(bpf_override_return, struct pt_regs , regs, unsigned* long, rc)
157	{
158	regs_set_return_value(regs, rc);
159	override_function_with_return(regs);
160	return `0`;
161	}
162
163	static const struct bpf_func_proto bpf_override_return_proto = {
164	.func = bpf_override_return,
165	.gpl_only = true,
166	.ret_type = RET_INTEGER,
167	.arg1_type = ARG_PTR_TO_CTX,
168	.arg2_type = ARG_ANYTHING,
169	};
170	#endif
171
172	static __always_inline int
173	bpf_probe_read_user_common(void dst, u32 size, const* void __user *unsafe_ptr)
174	{
175	int ret;
176
177	ret = copy_from_user_nofault(dst, src: unsafe_ptr, size);
178	if (unlikely(ret < `0`))
179	memset(dst, `0`, size);
180	return ret;
181	}
182
183	BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
184	const void __user *, unsafe_ptr)
185	{
186	return bpf_probe_read_user_common(dst, size, unsafe_ptr);
187	}
188
189	const struct bpf_func_proto bpf_probe_read_user_proto = {
190	.func = bpf_probe_read_user,
191	.gpl_only = true,
192	.ret_type = RET_INTEGER,
193	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
194	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
195	.arg3_type = ARG_ANYTHING,
196	};
197
198	static __always_inline int
199	bpf_probe_read_user_str_common(void *dst, u32 size,
200	const void __user *unsafe_ptr)
201	{
202	int ret;
203
204	/*
205	* NB: We rely on strncpy_from_user() not copying junk past the NUL
206	* terminator into `dst`.
207	*
208	* strncpy_from_user() does long-sized strides in the fast path. If the
209	* strncpy does not mask out the bytes after the NUL in `unsafe_ptr`,
210	* then there could be junk after the NUL in `dst`. If user takes `dst`
211	* and keys a hash map with it, then semantically identical strings can
212	* occupy multiple entries in the map.
213	*/
214	ret = strncpy_from_user_nofault(dst, unsafe_addr: unsafe_ptr, count: size);
215	if (unlikely(ret < `0`))
216	memset(dst, `0`, size);
217	return ret;
218	}
219
220	BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
221	const void __user *, unsafe_ptr)
222	{
223	return bpf_probe_read_user_str_common(dst, size, unsafe_ptr);
224	}
225
226	const struct bpf_func_proto bpf_probe_read_user_str_proto = {
227	.func = bpf_probe_read_user_str,
228	.gpl_only = true,
229	.ret_type = RET_INTEGER,
230	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
231	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
232	.arg3_type = ARG_ANYTHING,
233	};
234
235	BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
236	const void *, unsafe_ptr)
237	{
238	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
239	}
240
241	const struct bpf_func_proto bpf_probe_read_kernel_proto = {
242	.func = bpf_probe_read_kernel,
243	.gpl_only = true,
244	.ret_type = RET_INTEGER,
245	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
246	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
247	.arg3_type = ARG_ANYTHING,
248	};
249
250	static __always_inline int
251	bpf_probe_read_kernel_str_common(void dst, u32 size, const* void *unsafe_ptr)
252	{
253	int ret;
254
255	/*
256	* The strncpy_from_kernel_nofault() call will likely not fill the
257	* entire buffer, but that's okay in this circumstance as we're probing
258	* arbitrary memory anyway similar to bpf_probe_read_*() and might
259	* as well probe the stack. Thus, memory is explicitly cleared
260	* only in error case, so that improper users ignoring return
261	* code altogether don't copy garbage; otherwise length of string
262	* is returned that can be used for bpf_perf_event_output() et al.
263	*/
264	ret = strncpy_from_kernel_nofault(dst, unsafe_addr: unsafe_ptr, count: size);
265	if (unlikely(ret < `0`))
266	memset(dst, `0`, size);
267	return ret;
268	}
269
270	BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
271	const void *, unsafe_ptr)
272	{
273	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
274	}
275
276	const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
277	.func = bpf_probe_read_kernel_str,
278	.gpl_only = true,
279	.ret_type = RET_INTEGER,
280	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
281	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
282	.arg3_type = ARG_ANYTHING,
283	};
284
285	#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
286	BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
287	const void *, unsafe_ptr)
288	{
289	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
290	return bpf_probe_read_user_common(dst, size,
291	unsafe_ptr: (__force void __user *)unsafe_ptr);
292	}
293	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr);
294	}
295
296	static const struct bpf_func_proto bpf_probe_read_compat_proto = {
297	.func = bpf_probe_read_compat,
298	.gpl_only = true,
299	.ret_type = RET_INTEGER,
300	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
301	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
302	.arg3_type = ARG_ANYTHING,
303	};
304
305	BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
306	const void *, unsafe_ptr)
307	{
308	if ((unsigned long)unsafe_ptr < TASK_SIZE) {
309	return bpf_probe_read_user_str_common(dst, size,
310	unsafe_ptr: (__force void __user *)unsafe_ptr);
311	}
312	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr);
313	}
314
315	static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
316	.func = bpf_probe_read_compat_str,
317	.gpl_only = true,
318	.ret_type = RET_INTEGER,
319	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
320	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
321	.arg3_type = ARG_ANYTHING,
322	};
323	#endif /* CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE */
324
325	BPF_CALL_3(bpf_probe_write_user, void __user , unsafe_ptr, const* void *, src,
326	u32, size)
327	{
328	/*
329	* Ensure we're in user context which is safe for the helper to
330	* run. This helper has no business in a kthread.
331	*
332	* access_ok() should prevent writing to non-user memory, but in
333	* some situations (nommu, temporary switch, etc) access_ok() does
334	* not provide enough validation, hence the check on KERNEL_DS.
335	*
336	* nmi_uaccess_okay() ensures the probe is not run in an interim
337	* state, when the task or mm are switched. This is specifically
338	* required to prevent the use of temporary mm.
339	*/
340
341	if (unlikely(in_interrupt() \|\|
342	current->flags & (PF_KTHREAD \| PF_EXITING)))
343	return -EPERM;
344	if (unlikely(!nmi_uaccess_okay()))
345	return -EPERM;
346
347	return copy_to_user_nofault(dst: unsafe_ptr, src, size);
348	}
349
350	static const struct bpf_func_proto bpf_probe_write_user_proto = {
351	.func = bpf_probe_write_user,
352	.gpl_only = true,
353	.ret_type = RET_INTEGER,
354	.arg1_type = ARG_ANYTHING,
355	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
356	.arg3_type = ARG_CONST_SIZE,
357	};
358
359	#define MAX_TRACE_PRINTK_VARARGS 3
360	#define BPF_TRACE_PRINTK_SIZE 1024
361
362	BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
363	u64, arg2, u64, arg3)
364	{
365	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
366	struct bpf_bprintf_data data = {
367	.get_bin_args = true,
368	.get_buf = true,
369	};
370	int ret;
371
372	ret = bpf_bprintf_prepare(fmt, fmt_size, raw_args: args,
373	MAX_TRACE_PRINTK_VARARGS, data: &data);
374	if (ret < `0`)
375	return ret;
376
377	ret = bstr_printf(buf: data.buf, MAX_BPRINTF_BUF, fmt, bin_buf: data.bin_args);
378
379	trace_bpf_trace_printk(bpf_string: data.buf);
380
381	bpf_bprintf_cleanup(data: &data);
382
383	return ret;
384	}
385
386	static const struct bpf_func_proto bpf_trace_printk_proto = {
387	.func = bpf_trace_printk,
388	.gpl_only = true,
389	.ret_type = RET_INTEGER,
390	.arg1_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
391	.arg2_type = ARG_CONST_SIZE,
392	};
393
394	static void __set_printk_clr_event(struct work_struct *work)
395	{
396	/*
397	* This program might be calling bpf_trace_printk,
398	* so enable the associated bpf_trace/bpf_trace_printk event.
399	* Repeat this each time as it is possible a user has
400	* disabled bpf_trace_printk events. By loading a program
401	* calling bpf_trace_printk() however the user has expressed
402	* the intent to see such events.
403	*/
404	if (trace_set_clr_event(system: "bpf_trace", event: "bpf_trace_printk", set: `1`))
405	pr_warn_ratelimited("could not enable bpf_trace_printk events");
406	}
407	static DECLARE_WORK(set_printk_work, __set_printk_clr_event);
408
409	const struct bpf_func_proto bpf_get_trace_printk_proto(void*)
410	{
411	schedule_work(work: &set_printk_work);
412	return &bpf_trace_printk_proto;
413	}
414
415	BPF_CALL_4(bpf_trace_vprintk, char , fmt, u32, fmt_size, const* void *, args,
416	u32, data_len)
417	{
418	struct bpf_bprintf_data data = {
419	.get_bin_args = true,
420	.get_buf = true,
421	};
422	int ret, num_args;
423
424	if (data_len & `7` \|\| data_len > MAX_BPRINTF_VARARGS * `8` \|\|
425	(data_len && !args))
426	return -EINVAL;
427	num_args = data_len / `8`;
428
429	ret = bpf_bprintf_prepare(fmt, fmt_size, raw_args: args, num_args, data: &data);
430	if (ret < `0`)
431	return ret;
432
433	ret = bstr_printf(buf: data.buf, MAX_BPRINTF_BUF, fmt, bin_buf: data.bin_args);
434
435	trace_bpf_trace_printk(bpf_string: data.buf);
436
437	bpf_bprintf_cleanup(data: &data);
438
439	return ret;
440	}
441
442	static const struct bpf_func_proto bpf_trace_vprintk_proto = {
443	.func = bpf_trace_vprintk,
444	.gpl_only = true,
445	.ret_type = RET_INTEGER,
446	.arg1_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
447	.arg2_type = ARG_CONST_SIZE,
448	.arg3_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
449	.arg4_type = ARG_CONST_SIZE_OR_ZERO,
450	};
451
452	const struct bpf_func_proto bpf_get_trace_vprintk_proto(void*)
453	{
454	schedule_work(work: &set_printk_work);
455	return &bpf_trace_vprintk_proto;
456	}
457
458	BPF_CALL_5(bpf_seq_printf, struct seq_file , m, char* *, fmt, u32, fmt_size,
459	const void *, args, u32, data_len)
460	{
461	struct bpf_bprintf_data data = {
462	.get_bin_args = true,
463	};
464	int err, num_args;
465
466	if (data_len & `7` \|\| data_len > MAX_BPRINTF_VARARGS * `8` \|\|
467	(data_len && !args))
468	return -EINVAL;
469	num_args = data_len / `8`;
470
471	err = bpf_bprintf_prepare(fmt, fmt_size, raw_args: args, num_args, data: &data);
472	if (err < `0`)
473	return err;
474
475	seq_bprintf(m, f: fmt, binary: data.bin_args);
476
477	bpf_bprintf_cleanup(data: &data);
478
479	return seq_has_overflowed(m) ? -EOVERFLOW : `0`;
480	}
481
482	BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)
483
484	static const struct bpf_func_proto bpf_seq_printf_proto = {
485	.func = bpf_seq_printf,
486	.gpl_only = true,
487	.ret_type = RET_INTEGER,
488	.arg1_type = ARG_PTR_TO_BTF_ID,
489	.arg1_btf_id = &btf_seq_file_ids[`0`],
490	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
491	.arg3_type = ARG_CONST_SIZE,
492	.arg4_type = ARG_PTR_TO_MEM \| PTR_MAYBE_NULL \| MEM_RDONLY,
493	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
494	};
495
496	BPF_CALL_3(bpf_seq_write, struct seq_file , m, const* void *, data, u32, len)
497	{
498	return seq_write(seq: m, data, len) ? -EOVERFLOW : `0`;
499	}
500
501	static const struct bpf_func_proto bpf_seq_write_proto = {
502	.func = bpf_seq_write,
503	.gpl_only = true,
504	.ret_type = RET_INTEGER,
505	.arg1_type = ARG_PTR_TO_BTF_ID,
506	.arg1_btf_id = &btf_seq_file_ids[`0`],
507	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
508	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
509	};
510
511	BPF_CALL_4(bpf_seq_printf_btf, struct seq_file , m, struct* btf_ptr *, ptr,
512	u32, btf_ptr_size, u64, flags)
513	{
514	const struct btf *btf;
515	s32 btf_id;
516	int ret;
517
518	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, btf: &btf, btf_id: &btf_id);
519	if (ret)
520	return ret;
521
522	return btf_type_seq_show_flags(btf, type_id: btf_id, obj: ptr->ptr, m, flags);
523	}
524
525	static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
526	.func = bpf_seq_printf_btf,
527	.gpl_only = true,
528	.ret_type = RET_INTEGER,
529	.arg1_type = ARG_PTR_TO_BTF_ID,
530	.arg1_btf_id = &btf_seq_file_ids[`0`],
531	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
532	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
533	.arg4_type = ARG_ANYTHING,
534	};
535
536	static __always_inline int
537	get_map_perf_counter(struct bpf_map *map, u64 flags,
538	u64 value, u64 enabled, u64 *running)
539	{
540	struct bpf_array array = container_of(map, struct* bpf_array, map);
541	unsigned int cpu = smp_processor_id();
542	u64 index = flags & BPF_F_INDEX_MASK;
543	struct bpf_event_entry *ee;
544
545	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
546	return -EINVAL;
547	if (index == BPF_F_CURRENT_CPU)
548	index = cpu;
549	if (unlikely(index >= array->map.max_entries))
550	return -E2BIG;
551
552	ee = READ_ONCE(array->ptrs[index]);
553	if (!ee)
554	return -ENOENT;
555
556	return perf_event_read_local(event: ee->event, value, enabled, running);
557	}
558
559	BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
560	{
561	u64 value = `0`;
562	int err;
563
564	err = get_map_perf_counter(map, flags, value: &value, NULL, NULL);
565	/*
566	* this api is ugly since we miss [-22..-2] range of valid
567	* counter values, but that's uapi
568	*/
569	if (err)
570	return err;
571	return value;
572	}
573
574	const struct bpf_func_proto bpf_perf_event_read_proto = {
575	.func = bpf_perf_event_read,
576	.gpl_only = true,
577	.ret_type = RET_INTEGER,
578	.arg1_type = ARG_CONST_MAP_PTR,
579	.arg2_type = ARG_ANYTHING,
580	};
581
582	BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
583	struct bpf_perf_event_value *, buf, u32, size)
584	{
585	int err = -EINVAL;
586
587	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
588	goto clear;
589	err = get_map_perf_counter(map, flags, value: &buf->counter, enabled: &buf->enabled,
590	running: &buf->running);
591	if (unlikely(err))
592	goto clear;
593	return `0`;
594	clear:
595	memset(buf, `0`, size);
596	return err;
597	}
598
599	static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
600	.func = bpf_perf_event_read_value,
601	.gpl_only = true,
602	.ret_type = RET_INTEGER,
603	.arg1_type = ARG_CONST_MAP_PTR,
604	.arg2_type = ARG_ANYTHING,
605	.arg3_type = ARG_PTR_TO_UNINIT_MEM,
606	.arg4_type = ARG_CONST_SIZE,
607	};
608
609	const struct bpf_func_proto bpf_get_perf_event_read_value_proto(void*)
610	{
611	return &bpf_perf_event_read_value_proto;
612	}
613
614	static __always_inline u64
615	__bpf_perf_event_output(struct pt_regs regs, struct* bpf_map *map,
616	u64 flags, struct perf_raw_record *raw,
617	struct perf_sample_data *sd)
618	{
619	struct bpf_array array = container_of(map, struct* bpf_array, map);
620	unsigned int cpu = smp_processor_id();
621	u64 index = flags & BPF_F_INDEX_MASK;
622	struct bpf_event_entry *ee;
623	struct perf_event *event;
624
625	if (index == BPF_F_CURRENT_CPU)
626	index = cpu;
627	if (unlikely(index >= array->map.max_entries))
628	return -E2BIG;
629
630	ee = READ_ONCE(array->ptrs[index]);
631	if (!ee)
632	return -ENOENT;
633
634	event = ee->event;
635	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE \|\|
636	event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
637	return -EINVAL;
638
639	if (unlikely(event->oncpu != cpu))
640	return -EOPNOTSUPP;
641
642	perf_sample_save_raw_data(data: sd, event, raw);
643
644	return perf_event_output(event, data: sd, regs);
645	}
646
647	/*
648	* Support executing tracepoints in normal, irq, and nmi context that each call
649	* bpf_perf_event_output
650	*/
651	struct bpf_trace_sample_data {
652	struct perf_sample_data sds[`3`];
653	};
654
655	static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds);
656	static DEFINE_PER_CPU(int, bpf_trace_nest_level);
657	BPF_CALL_5(bpf_perf_event_output, struct pt_regs , regs, struct* bpf_map *, map,
658	u64, flags, void *, data, u64, size)
659	{
660	struct bpf_trace_sample_data *sds;
661	struct perf_raw_record raw = {
662	.frag = {
663	.size = size,
664	.data = data,
665	},
666	};
667	struct perf_sample_data *sd;
668	int nest_level, err;
669
670	preempt_disable();
671	sds = this_cpu_ptr(&bpf_trace_sds);
672	nest_level = this_cpu_inc_return(bpf_trace_nest_level);
673
674	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
675	err = -EBUSY;
676	goto out;
677	}
678
679	sd = &sds->sds[nest_level - `1`];
680
681	if (unlikely(flags & ~(BPF_F_INDEX_MASK))) {
682	err = -EINVAL;
683	goto out;
684	}
685
686	perf_sample_data_init(data: sd, addr: `0`, period: `0`);
687
688	err = __bpf_perf_event_output(regs, map, flags, raw: &raw, sd);
689	out:
690	this_cpu_dec(bpf_trace_nest_level);
691	preempt_enable();
692	return err;
693	}
694
695	static const struct bpf_func_proto bpf_perf_event_output_proto = {
696	.func = bpf_perf_event_output,
697	.gpl_only = true,
698	.ret_type = RET_INTEGER,
699	.arg1_type = ARG_PTR_TO_CTX,
700	.arg2_type = ARG_CONST_MAP_PTR,
701	.arg3_type = ARG_ANYTHING,
702	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
703	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
704	};
705
706	static DEFINE_PER_CPU(int, bpf_event_output_nest_level);
707	struct bpf_nested_pt_regs {
708	struct pt_regs regs[`3`];
709	};
710	static DEFINE_PER_CPU(struct bpf_nested_pt_regs, bpf_pt_regs);
711	static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
712
713	u64 bpf_event_output(struct bpf_map map, u64 flags, void* *meta, u64 meta_size,
714	void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
715	{
716	struct perf_raw_frag frag = {
717	.copy = ctx_copy,
718	.size = ctx_size,
719	.data = ctx,
720	};
721	struct perf_raw_record raw = {
722	.frag = {
723	{
724	.next = ctx_size ? &frag : NULL,
725	},
726	.size = meta_size,
727	.data = meta,
728	},
729	};
730	struct perf_sample_data *sd;
731	struct pt_regs *regs;
732	int nest_level;
733	u64 ret;
734
735	preempt_disable();
736	nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
737
738	if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
739	ret = -EBUSY;
740	goto out;
741	}
742	sd = this_cpu_ptr(&bpf_misc_sds.sds[nest_level - `1`]);
743	regs = this_cpu_ptr(&bpf_pt_regs.regs[nest_level - `1`]);
744
745	perf_fetch_caller_regs(regs);
746	perf_sample_data_init(data: sd, addr: `0`, period: `0`);
747
748	ret = __bpf_perf_event_output(regs, map, flags, raw: &raw, sd);
749	out:
750	this_cpu_dec(bpf_event_output_nest_level);
751	preempt_enable();
752	return ret;
753	}
754
755	BPF_CALL_0(bpf_get_current_task)
756	{
757	return (long) current;
758	}
759
760	const struct bpf_func_proto bpf_get_current_task_proto = {
761	.func = bpf_get_current_task,
762	.gpl_only = true,
763	.ret_type = RET_INTEGER,
764	};
765
766	BPF_CALL_0(bpf_get_current_task_btf)
767	{
768	return (unsigned long) current;
769	}
770
771	const struct bpf_func_proto bpf_get_current_task_btf_proto = {
772	.func = bpf_get_current_task_btf,
773	.gpl_only = true,
774	.ret_type = RET_PTR_TO_BTF_ID_TRUSTED,
775	.ret_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
776	};
777
778	BPF_CALL_1(bpf_task_pt_regs, struct task_struct *, task)
779	{
780	return (unsigned long) task_pt_regs(task);
781	}
782
783	BTF_ID_LIST_SINGLE(bpf_task_pt_regs_ids, struct, pt_regs)
784
785	const struct bpf_func_proto bpf_task_pt_regs_proto = {
786	.func = bpf_task_pt_regs,
787	.gpl_only = true,
788	.arg1_type = ARG_PTR_TO_BTF_ID,
789	.arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
790	.ret_type = RET_PTR_TO_BTF_ID,
791	.ret_btf_id = &bpf_task_pt_regs_ids[`0`],
792	};
793
794	struct send_signal_irq_work {
795	struct irq_work irq_work;
796	struct task_struct *task;
797	u32 sig;
798	enum pid_type type;
799	bool has_siginfo;
800	struct kernel_siginfo info;
801	};
802
803	static DEFINE_PER_CPU(struct send_signal_irq_work, send_signal_work);
804
805	static void do_bpf_send_signal(struct irq_work *entry)
806	{
807	struct send_signal_irq_work *work;
808	struct kernel_siginfo *siginfo;
809
810	work = container_of(entry, struct send_signal_irq_work, irq_work);
811	siginfo = work->has_siginfo ? &work->info : SEND_SIG_PRIV;
812
813	group_send_sig_info(sig: work->sig, info: siginfo, p: work->task, type: work->type);
814	put_task_struct(t: work->task);
815	}
816
817	static int bpf_send_signal_common(u32 sig, enum pid_type type, struct task_struct *task, u64 value)
818	{
819	struct send_signal_irq_work *work = NULL;
820	struct kernel_siginfo info;
821	struct kernel_siginfo *siginfo;
822
823	if (!task) {
824	task = current;
825	siginfo = SEND_SIG_PRIV;
826	} else {
827	clear_siginfo(info: &info);
828	info.si_signo = sig;
829	info.si_errno = `0`;
830	info.si_code = SI_KERNEL;
831	info.si_pid = `0`;
832	info.si_uid = `0`;
833	info.si_value.sival_ptr = (void )(unsigned* long)value;
834	siginfo = &info;
835	}
836
837	/ Similar to bpf_probe_write_user, task needs to be*
838	* in a sound condition and kernel memory access be
839	* permitted in order to send signal to the current
840	* task.
841	*/
842	if (unlikely(task->flags & (PF_KTHREAD \| PF_EXITING)))
843	return -EPERM;
844	if (unlikely(!nmi_uaccess_okay()))
845	return -EPERM;
846	/ Task should not be pid=1 to avoid kernel panic. /
847	if (unlikely(is_global_init(task)))
848	return -EPERM;
849
850	if (preempt_count() != `0` \|\| irqs_disabled()) {
851	/ Do an early check on signal validity. Otherwise,*
852	* the error is lost in deferred irq_work.
853	*/
854	if (unlikely(!valid_signal(sig)))
855	return -EINVAL;
856
857	work = this_cpu_ptr(&send_signal_work);
858	if (irq_work_is_busy(work: &work->irq_work))
859	return -EBUSY;
860
861	/ Add the current task, which is the target of sending signal,*
862	* to the irq_work. The current task may change when queued
863	* irq works get executed.
864	*/
865	work->task = get_task_struct(t: task);
866	work->has_siginfo = siginfo == &info;
867	if (work->has_siginfo)
868	copy_siginfo(to: &work->info, from: &info);
869	work->sig = sig;
870	work->type = type;
871	irq_work_queue(work: &work->irq_work);
872	return `0`;
873	}
874
875	return group_send_sig_info(sig, info: siginfo, p: task, type);
876	}
877
878	BPF_CALL_1(bpf_send_signal, u32, sig)
879	{
880	return bpf_send_signal_common(sig, type: PIDTYPE_TGID, NULL, value: `0`);
881	}
882
883	const struct bpf_func_proto bpf_send_signal_proto = {
884	.func = bpf_send_signal,
885	.gpl_only = false,
886	.ret_type = RET_INTEGER,
887	.arg1_type = ARG_ANYTHING,
888	};
889
890	BPF_CALL_1(bpf_send_signal_thread, u32, sig)
891	{
892	return bpf_send_signal_common(sig, type: PIDTYPE_PID, NULL, value: `0`);
893	}
894
895	const struct bpf_func_proto bpf_send_signal_thread_proto = {
896	.func = bpf_send_signal_thread,
897	.gpl_only = false,
898	.ret_type = RET_INTEGER,
899	.arg1_type = ARG_ANYTHING,
900	};
901
902	BPF_CALL_3(bpf_d_path, const struct path , path, char* *, buf, u32, sz)
903	{
904	struct path copy;
905	long len;
906	char *p;
907
908	if (!sz)
909	return `0`;
910
911	/*
912	* The path pointer is verified as trusted and safe to use,
913	* but let's double check it's valid anyway to workaround
914	* potentially broken verifier.
915	*/
916	len = copy_from_kernel_nofault(dst: &copy, src: path, size: sizeof(*path));
917	if (len < `0`)
918	return len;
919
920	p = d_path(&copy, buf, sz);
921	if (IS_ERR(ptr: p)) {
922	len = PTR_ERR(ptr: p);
923	} else {
924	len = buf + sz - p;
925	memmove(buf, p, len);
926	}
927
928	return len;
929	}
930
931	BTF_SET_START(btf_allowlist_d_path)
932	#ifdef CONFIG_SECURITY
933	BTF_ID(func, security_file_permission)
934	BTF_ID(func, security_inode_getattr)
935	BTF_ID(func, security_file_open)
936	#endif
937	#ifdef CONFIG_SECURITY_PATH
938	BTF_ID(func, security_path_truncate)
939	#endif
940	BTF_ID(func, vfs_truncate)
941	BTF_ID(func, vfs_fallocate)
942	BTF_ID(func, dentry_open)
943	BTF_ID(func, vfs_getattr)
944	BTF_ID(func, filp_close)
945	BTF_SET_END(btf_allowlist_d_path)
946
947	static bool bpf_d_path_allowed(const struct bpf_prog *prog)
948	{
949	if (prog->type == BPF_PROG_TYPE_TRACING &&
950	prog->expected_attach_type == BPF_TRACE_ITER)
951	return true;
952
953	if (prog->type == BPF_PROG_TYPE_LSM)
954	return bpf_lsm_is_sleepable_hook(btf_id: prog->aux->attach_btf_id);
955
956	return btf_id_set_contains(set: &btf_allowlist_d_path,
957	id: prog->aux->attach_btf_id);
958	}
959
960	BTF_ID_LIST_SINGLE(bpf_d_path_btf_ids, struct, path)
961
962	static const struct bpf_func_proto bpf_d_path_proto = {
963	.func = bpf_d_path,
964	.gpl_only = false,
965	.ret_type = RET_INTEGER,
966	.arg1_type = ARG_PTR_TO_BTF_ID,
967	.arg1_btf_id = &bpf_d_path_btf_ids[`0`],
968	.arg2_type = ARG_PTR_TO_MEM \| MEM_WRITE,
969	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
970	.allowed = bpf_d_path_allowed,
971	};
972
973	#define BTF_F_ALL (BTF_F_COMPACT \| BTF_F_NONAME \| \
974	BTF_F_PTR_RAW \| BTF_F_ZERO)
975
976	static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
977	u64 flags, const struct btf **btf,
978	s32 *btf_id)
979	{
980	const struct btf_type *t;
981
982	if (unlikely(flags & ~(BTF_F_ALL)))
983	return -EINVAL;
984
985	if (btf_ptr_size != sizeof(struct btf_ptr))
986	return -EINVAL;
987
988	*btf = bpf_get_btf_vmlinux();
989
990	if (IS_ERR_OR_NULL(ptr: *btf))
991	return IS_ERR(ptr: btf) ? PTR_ERR(ptr: btf) : -EINVAL;
992
993	if (ptr->type_id > `0`)
994	*btf_id = ptr->type_id;
995	else
996	return -EINVAL;
997
998	if (*btf_id > `0`)
999	t = btf_type_by_id(btf: btf, type_id: btf_id);
1000	if (*btf_id <= `0` \|\| !t)
1001	return -ENOENT;
1002
1003	return `0`;
1004	}
1005
1006	BPF_CALL_5(bpf_snprintf_btf, char , str, u32, str_size, struct* btf_ptr *, ptr,
1007	u32, btf_ptr_size, u64, flags)
1008	{
1009	const struct btf *btf;
1010	s32 btf_id;
1011	int ret;
1012
1013	ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, btf: &btf, btf_id: &btf_id);
1014	if (ret)
1015	return ret;
1016
1017	return btf_type_snprintf_show(btf, type_id: btf_id, obj: ptr->ptr, buf: str, len: str_size,
1018	flags);
1019	}
1020
1021	const struct bpf_func_proto bpf_snprintf_btf_proto = {
1022	.func = bpf_snprintf_btf,
1023	.gpl_only = false,
1024	.ret_type = RET_INTEGER,
1025	.arg1_type = ARG_PTR_TO_MEM,
1026	.arg2_type = ARG_CONST_SIZE,
1027	.arg3_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1028	.arg4_type = ARG_CONST_SIZE,
1029	.arg5_type = ARG_ANYTHING,
1030	};
1031
1032	BPF_CALL_1(bpf_get_func_ip_tracing, void *, ctx)
1033	{
1034	/ This helper call is inlined by verifier. /
1035	return ((u64 *)ctx)[-`2`];
1036	}
1037
1038	static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
1039	.func = bpf_get_func_ip_tracing,
1040	.gpl_only = true,
1041	.ret_type = RET_INTEGER,
1042	.arg1_type = ARG_PTR_TO_CTX,
1043	};
1044
1045	static inline unsigned long get_entry_ip(unsigned long fentry_ip)
1046	{
1047	#ifdef CONFIG_X86_KERNEL_IBT
1048	if (is_endbr(val: (void *)(fentry_ip - ENDBR_INSN_SIZE)))
1049	fentry_ip -= ENDBR_INSN_SIZE;
1050	#endif
1051	return fentry_ip;
1052	}
1053
1054	BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
1055	{
1056	struct bpf_trace_run_ctx *run_ctx __maybe_unused;
1057	struct kprobe *kp;
1058
1059	#ifdef CONFIG_UPROBES
1060	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1061	if (run_ctx->is_uprobe)
1062	return ((struct uprobe_dispatch_data *)current->utask->vaddr)->bp_addr;
1063	#endif
1064
1065	kp = kprobe_running();
1066
1067	if (!kp \|\| !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
1068	return `0`;
1069
1070	return get_entry_ip(fentry_ip: (uintptr_t)kp->addr);
1071	}
1072
1073	static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
1074	.func = bpf_get_func_ip_kprobe,
1075	.gpl_only = true,
1076	.ret_type = RET_INTEGER,
1077	.arg1_type = ARG_PTR_TO_CTX,
1078	};
1079
1080	BPF_CALL_1(bpf_get_func_ip_kprobe_multi, struct pt_regs *, regs)
1081	{
1082	return bpf_kprobe_multi_entry_ip(current->bpf_ctx);
1083	}
1084
1085	static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe_multi = {
1086	.func = bpf_get_func_ip_kprobe_multi,
1087	.gpl_only = false,
1088	.ret_type = RET_INTEGER,
1089	.arg1_type = ARG_PTR_TO_CTX,
1090	};
1091
1092	BPF_CALL_1(bpf_get_attach_cookie_kprobe_multi, struct pt_regs *, regs)
1093	{
1094	return bpf_kprobe_multi_cookie(current->bpf_ctx);
1095	}
1096
1097	static const struct bpf_func_proto bpf_get_attach_cookie_proto_kmulti = {
1098	.func = bpf_get_attach_cookie_kprobe_multi,
1099	.gpl_only = false,
1100	.ret_type = RET_INTEGER,
1101	.arg1_type = ARG_PTR_TO_CTX,
1102	};
1103
1104	BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs)
1105	{
1106	return bpf_uprobe_multi_entry_ip(current->bpf_ctx);
1107	}
1108
1109	static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = {
1110	.func = bpf_get_func_ip_uprobe_multi,
1111	.gpl_only = false,
1112	.ret_type = RET_INTEGER,
1113	.arg1_type = ARG_PTR_TO_CTX,
1114	};
1115
1116	BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs)
1117	{
1118	return bpf_uprobe_multi_cookie(current->bpf_ctx);
1119	}
1120
1121	static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = {
1122	.func = bpf_get_attach_cookie_uprobe_multi,
1123	.gpl_only = false,
1124	.ret_type = RET_INTEGER,
1125	.arg1_type = ARG_PTR_TO_CTX,
1126	};
1127
1128	BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx)
1129	{
1130	struct bpf_trace_run_ctx *run_ctx;
1131
1132	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1133	return run_ctx->bpf_cookie;
1134	}
1135
1136	static const struct bpf_func_proto bpf_get_attach_cookie_proto_trace = {
1137	.func = bpf_get_attach_cookie_trace,
1138	.gpl_only = false,
1139	.ret_type = RET_INTEGER,
1140	.arg1_type = ARG_PTR_TO_CTX,
1141	};
1142
1143	BPF_CALL_1(bpf_get_attach_cookie_pe, struct bpf_perf_event_data_kern *, ctx)
1144	{
1145	return ctx->event->bpf_cookie;
1146	}
1147
1148	static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
1149	.func = bpf_get_attach_cookie_pe,
1150	.gpl_only = false,
1151	.ret_type = RET_INTEGER,
1152	.arg1_type = ARG_PTR_TO_CTX,
1153	};
1154
1155	BPF_CALL_1(bpf_get_attach_cookie_tracing, void *, ctx)
1156	{
1157	struct bpf_trace_run_ctx *run_ctx;
1158
1159	run_ctx = container_of(current->bpf_ctx, struct bpf_trace_run_ctx, run_ctx);
1160	return run_ctx->bpf_cookie;
1161	}
1162
1163	static const struct bpf_func_proto bpf_get_attach_cookie_proto_tracing = {
1164	.func = bpf_get_attach_cookie_tracing,
1165	.gpl_only = false,
1166	.ret_type = RET_INTEGER,
1167	.arg1_type = ARG_PTR_TO_CTX,
1168	};
1169
1170	BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
1171	{
1172	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1173	u32 entry_cnt = size / br_entry_size;
1174
1175	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
1176
1177	if (unlikely(flags))
1178	return -EINVAL;
1179
1180	if (!entry_cnt)
1181	return -ENOENT;
1182
1183	return entry_cnt * br_entry_size;
1184	}
1185
1186	const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
1187	.func = bpf_get_branch_snapshot,
1188	.gpl_only = true,
1189	.ret_type = RET_INTEGER,
1190	.arg1_type = ARG_PTR_TO_UNINIT_MEM,
1191	.arg2_type = ARG_CONST_SIZE_OR_ZERO,
1192	};
1193
1194	BPF_CALL_3(get_func_arg, void , ctx, u32, n, u64 , value)
1195	{
1196	/ This helper call is inlined by verifier. /
1197	u64 nr_args = ((u64 *)ctx)[-`1`];
1198
1199	if ((u64) n >= nr_args)
1200	return -EINVAL;
1201	value = ((u64 )ctx)[n];
1202	return `0`;
1203	}
1204
1205	static const struct bpf_func_proto bpf_get_func_arg_proto = {
1206	.func = get_func_arg,
1207	.ret_type = RET_INTEGER,
1208	.arg1_type = ARG_PTR_TO_CTX,
1209	.arg2_type = ARG_ANYTHING,
1210	.arg3_type = ARG_PTR_TO_FIXED_SIZE_MEM \| MEM_UNINIT \| MEM_WRITE \| MEM_ALIGNED,
1211	.arg3_size = sizeof(u64),
1212	};
1213
1214	BPF_CALL_2(get_func_ret, void , ctx, u64 , value)
1215	{
1216	/ This helper call is inlined by verifier. /
1217	u64 nr_args = ((u64 *)ctx)[-`1`];
1218
1219	value = ((u64 )ctx)[nr_args];
1220	return `0`;
1221	}
1222
1223	static const struct bpf_func_proto bpf_get_func_ret_proto = {
1224	.func = get_func_ret,
1225	.ret_type = RET_INTEGER,
1226	.arg1_type = ARG_PTR_TO_CTX,
1227	.arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM \| MEM_UNINIT \| MEM_WRITE \| MEM_ALIGNED,
1228	.arg2_size = sizeof(u64),
1229	};
1230
1231	BPF_CALL_1(get_func_arg_cnt, void *, ctx)
1232	{
1233	/ This helper call is inlined by verifier. /
1234	return ((u64 *)ctx)[-`1`];
1235	}
1236
1237	static const struct bpf_func_proto bpf_get_func_arg_cnt_proto = {
1238	.func = get_func_arg_cnt,
1239	.ret_type = RET_INTEGER,
1240	.arg1_type = ARG_PTR_TO_CTX,
1241	};
1242
1243	static const struct bpf_func_proto *
1244	bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1245	{
1246	const struct bpf_func_proto *func_proto;
1247
1248	switch (func_id) {
1249	case BPF_FUNC_get_smp_processor_id:
1250	return &bpf_get_smp_processor_id_proto;
1251	#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
1252	case BPF_FUNC_probe_read:
1253	return security_locked_down(what: LOCKDOWN_BPF_READ_KERNEL) < `0` ?
1254	NULL : &bpf_probe_read_compat_proto;
1255	case BPF_FUNC_probe_read_str:
1256	return security_locked_down(what: LOCKDOWN_BPF_READ_KERNEL) < `0` ?
1257	NULL : &bpf_probe_read_compat_str_proto;
1258	#endif
1259	case BPF_FUNC_get_func_ip:
1260	return &bpf_get_func_ip_proto_tracing;
1261	default:
1262	break;
1263	}
1264
1265	func_proto = bpf_base_func_proto(func_id, prog);
1266	if (func_proto)
1267	return func_proto;
1268
1269	if (!bpf_token_capable(token: prog->aux->token, CAP_SYS_ADMIN))
1270	return NULL;
1271
1272	switch (func_id) {
1273	case BPF_FUNC_probe_write_user:
1274	return security_locked_down(what: LOCKDOWN_BPF_WRITE_USER) < `0` ?
1275	NULL : &bpf_probe_write_user_proto;
1276	default:
1277	return NULL;
1278	}
1279	}
1280
1281	static bool is_kprobe_multi(const struct bpf_prog *prog)
1282	{
1283	return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI \|\|
1284	prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1285	}
1286
1287	static inline bool is_kprobe_session(const struct bpf_prog *prog)
1288	{
1289	return prog->expected_attach_type == BPF_TRACE_KPROBE_SESSION;
1290	}
1291
1292	static inline bool is_uprobe_multi(const struct bpf_prog *prog)
1293	{
1294	return prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI \|\|
1295	prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1296	}
1297
1298	static inline bool is_uprobe_session(const struct bpf_prog *prog)
1299	{
1300	return prog->expected_attach_type == BPF_TRACE_UPROBE_SESSION;
1301	}
1302
1303	static const struct bpf_func_proto *
1304	kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1305	{
1306	switch (func_id) {
1307	case BPF_FUNC_perf_event_output:
1308	return &bpf_perf_event_output_proto;
1309	case BPF_FUNC_get_stackid:
1310	return &bpf_get_stackid_proto;
1311	case BPF_FUNC_get_stack:
1312	return prog->sleepable ? &bpf_get_stack_sleepable_proto : &bpf_get_stack_proto;
1313	#ifdef CONFIG_BPF_KPROBE_OVERRIDE
1314	case BPF_FUNC_override_return:
1315	return &bpf_override_return_proto;
1316	#endif
1317	case BPF_FUNC_get_func_ip:
1318	if (is_kprobe_multi(prog))
1319	return &bpf_get_func_ip_proto_kprobe_multi;
1320	if (is_uprobe_multi(prog))
1321	return &bpf_get_func_ip_proto_uprobe_multi;
1322	return &bpf_get_func_ip_proto_kprobe;
1323	case BPF_FUNC_get_attach_cookie:
1324	if (is_kprobe_multi(prog))
1325	return &bpf_get_attach_cookie_proto_kmulti;
1326	if (is_uprobe_multi(prog))
1327	return &bpf_get_attach_cookie_proto_umulti;
1328	return &bpf_get_attach_cookie_proto_trace;
1329	default:
1330	return bpf_tracing_func_proto(func_id, prog);
1331	}
1332	}
1333
1334	/ bpf+kprobe programs can access fields of 'struct pt_regs' /
1335	static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1336	const struct bpf_prog *prog,
1337	struct bpf_insn_access_aux *info)
1338	{
1339	if (off < `0` \|\| off >= sizeof(struct pt_regs))
1340	return false;
1341	if (off % size != `0`)
1342	return false;
1343	/*
1344	* Assertion for 32 bit to make sure last 8 byte access
1345	* (BPF_DW) to the last 4 byte member is disallowed.
1346	*/
1347	if (off + size > sizeof(struct pt_regs))
1348	return false;
1349
1350	if (type == BPF_WRITE)
1351	prog->aux->kprobe_write_ctx = true;
1352
1353	return true;
1354	}
1355
1356	const struct bpf_verifier_ops kprobe_verifier_ops = {
1357	.get_func_proto = kprobe_prog_func_proto,
1358	.is_valid_access = kprobe_prog_is_valid_access,
1359	};
1360
1361	const struct bpf_prog_ops kprobe_prog_ops = {
1362	};
1363
1364	BPF_CALL_5(bpf_perf_event_output_tp, void , tp_buff, struct* bpf_map *, map,
1365	u64, flags, void *, data, u64, size)
1366	{
1367	struct pt_regs regs = (struct pt_regs **)tp_buff;
1368
1369	/*
1370	* r1 points to perf tracepoint buffer where first 8 bytes are hidden
1371	* from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
1372	* from there and call the same bpf_perf_event_output() helper inline.
1373	*/
1374	return ____bpf_perf_event_output(regs, map, flags, data, size);
1375	}
1376
1377	static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
1378	.func = bpf_perf_event_output_tp,
1379	.gpl_only = true,
1380	.ret_type = RET_INTEGER,
1381	.arg1_type = ARG_PTR_TO_CTX,
1382	.arg2_type = ARG_CONST_MAP_PTR,
1383	.arg3_type = ARG_ANYTHING,
1384	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1385	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
1386	};
1387
1388	BPF_CALL_3(bpf_get_stackid_tp, void , tp_buff, struct* bpf_map *, map,
1389	u64, flags)
1390	{
1391	struct pt_regs regs = (struct pt_regs **)tp_buff;
1392
1393	/*
1394	* Same comment as in bpf_perf_event_output_tp(), only that this time
1395	* the other helper's function body cannot be inlined due to being
1396	* external, thus we need to call raw helper function.
1397	*/
1398	return bpf_get_stackid(r1: (unsigned long) regs, r2: (unsigned long) map,
1399	r3: flags, r4: `0`, r5: `0`);
1400	}
1401
1402	static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
1403	.func = bpf_get_stackid_tp,
1404	.gpl_only = true,
1405	.ret_type = RET_INTEGER,
1406	.arg1_type = ARG_PTR_TO_CTX,
1407	.arg2_type = ARG_CONST_MAP_PTR,
1408	.arg3_type = ARG_ANYTHING,
1409	};
1410
1411	BPF_CALL_4(bpf_get_stack_tp, void , tp_buff, void* *, buf, u32, size,
1412	u64, flags)
1413	{
1414	struct pt_regs regs = (struct pt_regs **)tp_buff;
1415
1416	return bpf_get_stack(r1: (unsigned long) regs, r2: (unsigned long) buf,
1417	r3: (unsigned long) size, r4: flags, r5: `0`);
1418	}
1419
1420	static const struct bpf_func_proto bpf_get_stack_proto_tp = {
1421	.func = bpf_get_stack_tp,
1422	.gpl_only = true,
1423	.ret_type = RET_INTEGER,
1424	.arg1_type = ARG_PTR_TO_CTX,
1425	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
1426	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
1427	.arg4_type = ARG_ANYTHING,
1428	};
1429
1430	static const struct bpf_func_proto *
1431	tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1432	{
1433	switch (func_id) {
1434	case BPF_FUNC_perf_event_output:
1435	return &bpf_perf_event_output_proto_tp;
1436	case BPF_FUNC_get_stackid:
1437	return &bpf_get_stackid_proto_tp;
1438	case BPF_FUNC_get_stack:
1439	return &bpf_get_stack_proto_tp;
1440	case BPF_FUNC_get_attach_cookie:
1441	return &bpf_get_attach_cookie_proto_trace;
1442	default:
1443	return bpf_tracing_func_proto(func_id, prog);
1444	}
1445	}
1446
1447	static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1448	const struct bpf_prog *prog,
1449	struct bpf_insn_access_aux *info)
1450	{
1451	if (off < sizeof(void *) \|\| off >= PERF_MAX_TRACE_SIZE)
1452	return false;
1453	if (type != BPF_READ)
1454	return false;
1455	if (off % size != `0`)
1456	return false;
1457
1458	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
1459	return true;
1460	}
1461
1462	const struct bpf_verifier_ops tracepoint_verifier_ops = {
1463	.get_func_proto = tp_prog_func_proto,
1464	.is_valid_access = tp_prog_is_valid_access,
1465	};
1466
1467	const struct bpf_prog_ops tracepoint_prog_ops = {
1468	};
1469
1470	BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
1471	struct bpf_perf_event_value *, buf, u32, size)
1472	{
1473	int err = -EINVAL;
1474
1475	if (unlikely(size != sizeof(struct bpf_perf_event_value)))
1476	goto clear;
1477	err = perf_event_read_local(event: ctx->event, value: &buf->counter, enabled: &buf->enabled,
1478	running: &buf->running);
1479	if (unlikely(err))
1480	goto clear;
1481	return `0`;
1482	clear:
1483	memset(buf, `0`, size);
1484	return err;
1485	}
1486
1487	static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
1488	.func = bpf_perf_prog_read_value,
1489	.gpl_only = true,
1490	.ret_type = RET_INTEGER,
1491	.arg1_type = ARG_PTR_TO_CTX,
1492	.arg2_type = ARG_PTR_TO_UNINIT_MEM,
1493	.arg3_type = ARG_CONST_SIZE,
1494	};
1495
1496	BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
1497	void *, buf, u32, size, u64, flags)
1498	{
1499	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
1500	struct perf_branch_stack *br_stack = ctx->data->br_stack;
1501	u32 to_copy;
1502
1503	if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
1504	return -EINVAL;
1505
1506	if (unlikely(!(ctx->data->sample_flags & PERF_SAMPLE_BRANCH_STACK)))
1507	return -ENOENT;
1508
1509	if (unlikely(!br_stack))
1510	return -ENOENT;
1511
1512	if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
1513	return br_stack->nr * br_entry_size;
1514
1515	if (!buf \|\| (size % br_entry_size != `0`))
1516	return -EINVAL;
1517
1518	to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
1519	memcpy(buf, br_stack->entries, to_copy);
1520
1521	return to_copy;
1522	}
1523
1524	static const struct bpf_func_proto bpf_read_branch_records_proto = {
1525	.func = bpf_read_branch_records,
1526	.gpl_only = true,
1527	.ret_type = RET_INTEGER,
1528	.arg1_type = ARG_PTR_TO_CTX,
1529	.arg2_type = ARG_PTR_TO_MEM_OR_NULL,
1530	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
1531	.arg4_type = ARG_ANYTHING,
1532	};
1533
1534	static const struct bpf_func_proto *
1535	pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1536	{
1537	switch (func_id) {
1538	case BPF_FUNC_perf_event_output:
1539	return &bpf_perf_event_output_proto_tp;
1540	case BPF_FUNC_get_stackid:
1541	return &bpf_get_stackid_proto_pe;
1542	case BPF_FUNC_get_stack:
1543	return &bpf_get_stack_proto_pe;
1544	case BPF_FUNC_perf_prog_read_value:
1545	return &bpf_perf_prog_read_value_proto;
1546	case BPF_FUNC_read_branch_records:
1547	return &bpf_read_branch_records_proto;
1548	case BPF_FUNC_get_attach_cookie:
1549	return &bpf_get_attach_cookie_proto_pe;
1550	default:
1551	return bpf_tracing_func_proto(func_id, prog);
1552	}
1553	}
1554
1555	/*
1556	* bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
1557	* to avoid potential recursive reuse issue when/if tracepoints are added
1558	* inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack.
1559	*
1560	* Since raw tracepoints run despite bpf_prog_active, support concurrent usage
1561	* in normal, irq, and nmi context.
1562	*/
1563	struct bpf_raw_tp_regs {
1564	struct pt_regs regs[`3`];
1565	};
1566	static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs);
1567	static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level);
1568	static struct pt_regs get_bpf_raw_tp_regs(void*)
1569	{
1570	struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs);
1571	int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level);
1572
1573	if (nest_level > ARRAY_SIZE(tp_regs->regs)) {
1574	this_cpu_dec(bpf_raw_tp_nest_level);
1575	return ERR_PTR(error: -EBUSY);
1576	}
1577
1578	return &tp_regs->regs[nest_level - `1`];
1579	}
1580
1581	static void put_bpf_raw_tp_regs(void)
1582	{
1583	this_cpu_dec(bpf_raw_tp_nest_level);
1584	}
1585
1586	BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
1587	struct bpf_map , map, u64, flags, void* *, data, u64, size)
1588	{
1589	struct pt_regs *regs = get_bpf_raw_tp_regs();
1590	int ret;
1591
1592	if (IS_ERR(ptr: regs))
1593	return PTR_ERR(ptr: regs);
1594
1595	perf_fetch_caller_regs(regs);
1596	ret = ____bpf_perf_event_output(regs, map, flags, data, size);
1597
1598	put_bpf_raw_tp_regs();
1599	return ret;
1600	}
1601
1602	static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
1603	.func = bpf_perf_event_output_raw_tp,
1604	.gpl_only = true,
1605	.ret_type = RET_INTEGER,
1606	.arg1_type = ARG_PTR_TO_CTX,
1607	.arg2_type = ARG_CONST_MAP_PTR,
1608	.arg3_type = ARG_ANYTHING,
1609	.arg4_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1610	.arg5_type = ARG_CONST_SIZE_OR_ZERO,
1611	};
1612
1613	extern const struct bpf_func_proto bpf_skb_output_proto;
1614	extern const struct bpf_func_proto bpf_xdp_output_proto;
1615	extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto;
1616
1617	BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
1618	struct bpf_map *, map, u64, flags)
1619	{
1620	struct pt_regs *regs = get_bpf_raw_tp_regs();
1621	int ret;
1622
1623	if (IS_ERR(ptr: regs))
1624	return PTR_ERR(ptr: regs);
1625
1626	perf_fetch_caller_regs(regs);
1627	/ similar to bpf_perf_event_output_tp, but pt_regs fetched differently /
1628	ret = bpf_get_stackid(r1: (unsigned long) regs, r2: (unsigned long) map,
1629	r3: flags, r4: `0`, r5: `0`);
1630	put_bpf_raw_tp_regs();
1631	return ret;
1632	}
1633
1634	static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
1635	.func = bpf_get_stackid_raw_tp,
1636	.gpl_only = true,
1637	.ret_type = RET_INTEGER,
1638	.arg1_type = ARG_PTR_TO_CTX,
1639	.arg2_type = ARG_CONST_MAP_PTR,
1640	.arg3_type = ARG_ANYTHING,
1641	};
1642
1643	BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
1644	void *, buf, u32, size, u64, flags)
1645	{
1646	struct pt_regs *regs = get_bpf_raw_tp_regs();
1647	int ret;
1648
1649	if (IS_ERR(ptr: regs))
1650	return PTR_ERR(ptr: regs);
1651
1652	perf_fetch_caller_regs(regs);
1653	ret = bpf_get_stack(r1: (unsigned long) regs, r2: (unsigned long) buf,
1654	r3: (unsigned long) size, r4: flags, r5: `0`);
1655	put_bpf_raw_tp_regs();
1656	return ret;
1657	}
1658
1659	static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
1660	.func = bpf_get_stack_raw_tp,
1661	.gpl_only = true,
1662	.ret_type = RET_INTEGER,
1663	.arg1_type = ARG_PTR_TO_CTX,
1664	.arg2_type = ARG_PTR_TO_MEM \| MEM_RDONLY,
1665	.arg3_type = ARG_CONST_SIZE_OR_ZERO,
1666	.arg4_type = ARG_ANYTHING,
1667	};
1668
1669	static const struct bpf_func_proto *
1670	raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1671	{
1672	switch (func_id) {
1673	case BPF_FUNC_perf_event_output:
1674	return &bpf_perf_event_output_proto_raw_tp;
1675	case BPF_FUNC_get_stackid:
1676	return &bpf_get_stackid_proto_raw_tp;
1677	case BPF_FUNC_get_stack:
1678	return &bpf_get_stack_proto_raw_tp;
1679	case BPF_FUNC_get_attach_cookie:
1680	return &bpf_get_attach_cookie_proto_tracing;
1681	default:
1682	return bpf_tracing_func_proto(func_id, prog);
1683	}
1684	}
1685
1686	const struct bpf_func_proto *
1687	tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1688	{
1689	const struct bpf_func_proto *fn;
1690
1691	switch (func_id) {
1692	#ifdef CONFIG_NET
1693	case BPF_FUNC_skb_output:
1694	return &bpf_skb_output_proto;
1695	case BPF_FUNC_xdp_output:
1696	return &bpf_xdp_output_proto;
1697	case BPF_FUNC_skc_to_tcp6_sock:
1698	return &bpf_skc_to_tcp6_sock_proto;
1699	case BPF_FUNC_skc_to_tcp_sock:
1700	return &bpf_skc_to_tcp_sock_proto;
1701	case BPF_FUNC_skc_to_tcp_timewait_sock:
1702	return &bpf_skc_to_tcp_timewait_sock_proto;
1703	case BPF_FUNC_skc_to_tcp_request_sock:
1704	return &bpf_skc_to_tcp_request_sock_proto;
1705	case BPF_FUNC_skc_to_udp6_sock:
1706	return &bpf_skc_to_udp6_sock_proto;
1707	case BPF_FUNC_skc_to_unix_sock:
1708	return &bpf_skc_to_unix_sock_proto;
1709	case BPF_FUNC_skc_to_mptcp_sock:
1710	return &bpf_skc_to_mptcp_sock_proto;
1711	case BPF_FUNC_sk_storage_get:
1712	return &bpf_sk_storage_get_tracing_proto;
1713	case BPF_FUNC_sk_storage_delete:
1714	return &bpf_sk_storage_delete_tracing_proto;
1715	case BPF_FUNC_sock_from_file:
1716	return &bpf_sock_from_file_proto;
1717	case BPF_FUNC_get_socket_cookie:
1718	return &bpf_get_socket_ptr_cookie_proto;
1719	case BPF_FUNC_xdp_get_buff_len:
1720	return &bpf_xdp_get_buff_len_trace_proto;
1721	#endif
1722	case BPF_FUNC_seq_printf:
1723	return prog->expected_attach_type == BPF_TRACE_ITER ?
1724	&bpf_seq_printf_proto :
1725	NULL;
1726	case BPF_FUNC_seq_write:
1727	return prog->expected_attach_type == BPF_TRACE_ITER ?
1728	&bpf_seq_write_proto :
1729	NULL;
1730	case BPF_FUNC_seq_printf_btf:
1731	return prog->expected_attach_type == BPF_TRACE_ITER ?
1732	&bpf_seq_printf_btf_proto :
1733	NULL;
1734	case BPF_FUNC_d_path:
1735	return &bpf_d_path_proto;
1736	case BPF_FUNC_get_func_arg:
1737	return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_proto : NULL;
1738	case BPF_FUNC_get_func_ret:
1739	return bpf_prog_has_trampoline(prog) ? &bpf_get_func_ret_proto : NULL;
1740	case BPF_FUNC_get_func_arg_cnt:
1741	return bpf_prog_has_trampoline(prog) ? &bpf_get_func_arg_cnt_proto : NULL;
1742	case BPF_FUNC_get_attach_cookie:
1743	if (prog->type == BPF_PROG_TYPE_TRACING &&
1744	prog->expected_attach_type == BPF_TRACE_RAW_TP)
1745	return &bpf_get_attach_cookie_proto_tracing;
1746	return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto_tracing : NULL;
1747	default:
1748	fn = raw_tp_prog_func_proto(func_id, prog);
1749	if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
1750	fn = bpf_iter_get_func_proto(func_id, prog);
1751	return fn;
1752	}
1753	}
1754
1755	static bool raw_tp_prog_is_valid_access(int off, int size,
1756	enum bpf_access_type type,
1757	const struct bpf_prog *prog,
1758	struct bpf_insn_access_aux *info)
1759	{
1760	return bpf_tracing_ctx_access(off, size, type);
1761	}
1762
1763	static bool tracing_prog_is_valid_access(int off, int size,
1764	enum bpf_access_type type,
1765	const struct bpf_prog *prog,
1766	struct bpf_insn_access_aux *info)
1767	{
1768	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
1769	}
1770
1771	int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
1772	const union bpf_attr *kattr,
1773	union bpf_attr __user *uattr)
1774	{
1775	return -ENOTSUPP;
1776	}
1777
1778	const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
1779	.get_func_proto = raw_tp_prog_func_proto,
1780	.is_valid_access = raw_tp_prog_is_valid_access,
1781	};
1782
1783	const struct bpf_prog_ops raw_tracepoint_prog_ops = {
1784	#ifdef CONFIG_NET
1785	.test_run = bpf_prog_test_run_raw_tp,
1786	#endif
1787	};
1788
1789	const struct bpf_verifier_ops tracing_verifier_ops = {
1790	.get_func_proto = tracing_prog_func_proto,
1791	.is_valid_access = tracing_prog_is_valid_access,
1792	};
1793
1794	const struct bpf_prog_ops tracing_prog_ops = {
1795	.test_run = bpf_prog_test_run_tracing,
1796	};
1797
1798	static bool raw_tp_writable_prog_is_valid_access(int off, int size,
1799	enum bpf_access_type type,
1800	const struct bpf_prog *prog,
1801	struct bpf_insn_access_aux *info)
1802	{
1803	if (off == `0`) {
1804	if (size != sizeof(u64) \|\| type != BPF_READ)
1805	return false;
1806	info->reg_type = PTR_TO_TP_BUFFER;
1807	}
1808	return raw_tp_prog_is_valid_access(off, size, type, prog, info);
1809	}
1810
1811	const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
1812	.get_func_proto = raw_tp_prog_func_proto,
1813	.is_valid_access = raw_tp_writable_prog_is_valid_access,
1814	};
1815
1816	const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
1817	};
1818
1819	static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
1820	const struct bpf_prog *prog,
1821	struct bpf_insn_access_aux *info)
1822	{
1823	const int size_u64 = sizeof(u64);
1824
1825	if (off < `0` \|\| off >= sizeof(struct bpf_perf_event_data))
1826	return false;
1827	if (type != BPF_READ)
1828	return false;
1829	if (off % size != `0`) {
1830	if (sizeof(unsigned long) != `4`)
1831	return false;
1832	if (size != `8`)
1833	return false;
1834	if (off % size != `4`)
1835	return false;
1836	}
1837
1838	switch (off) {
1839	case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
1840	bpf_ctx_record_field_size(aux: info, size: size_u64);
1841	if (!bpf_ctx_narrow_access_ok(off, size, size_default: size_u64))
1842	return false;
1843	break;
1844	case bpf_ctx_range(struct bpf_perf_event_data, addr):
1845	bpf_ctx_record_field_size(aux: info, size: size_u64);
1846	if (!bpf_ctx_narrow_access_ok(off, size, size_default: size_u64))
1847	return false;
1848	break;
1849	default:
1850	if (size != sizeof(long))
1851	return false;
1852	}
1853
1854	return true;
1855	}
1856
1857	static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
1858	const struct bpf_insn *si,
1859	struct bpf_insn *insn_buf,
1860	struct bpf_prog prog, u32 target_size)
1861	{
1862	struct bpf_insn *insn = insn_buf;
1863
1864	switch (si->off) {
1865	case offsetof(struct bpf_perf_event_data, sample_period):
1866	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_perf_event_data_kern,
1867	data), si->dst_reg, si->src_reg,
1868	offsetof(struct bpf_perf_event_data_kern, data));
1869	*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1870	bpf_target_off(struct perf_sample_data, period, `8`,
1871	target_size));
1872	break;
1873	case offsetof(struct bpf_perf_event_data, addr):
1874	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_perf_event_data_kern,
1875	data), si->dst_reg, si->src_reg,
1876	offsetof(struct bpf_perf_event_data_kern, data));
1877	*insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
1878	bpf_target_off(struct perf_sample_data, addr, `8`,
1879	target_size));
1880	break;
1881	default:
1882	insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct* bpf_perf_event_data_kern,
1883	regs), si->dst_reg, si->src_reg,
1884	offsetof(struct bpf_perf_event_data_kern, regs));
1885	insn++ = BPF_LDX_MEM(BPF_SIZEOF(long*), si->dst_reg, si->dst_reg,
1886	si->off);
1887	break;
1888	}
1889
1890	return insn - insn_buf;
1891	}
1892
1893	const struct bpf_verifier_ops perf_event_verifier_ops = {
1894	.get_func_proto = pe_prog_func_proto,
1895	.is_valid_access = pe_prog_is_valid_access,
1896	.convert_ctx_access = pe_prog_convert_ctx_access,
1897	};
1898
1899	const struct bpf_prog_ops perf_event_prog_ops = {
1900	};
1901
1902	static DEFINE_MUTEX(bpf_event_mutex);
1903
1904	#define BPF_TRACE_MAX_PROGS 64
1905
1906	int perf_event_attach_bpf_prog(struct perf_event *event,
1907	struct bpf_prog *prog,
1908	u64 bpf_cookie)
1909	{
1910	struct bpf_prog_array *old_array;
1911	struct bpf_prog_array *new_array;
1912	int ret = -EEXIST;
1913
1914	/*
1915	* Kprobe override only works if they are on the function entry,
1916	* and only if they are on the opt-in list.
1917	*/
1918	if (prog->kprobe_override &&
1919	(!trace_kprobe_on_func_entry(call: event->tp_event) \|\|
1920	!trace_kprobe_error_injectable(call: event->tp_event)))
1921	return -EINVAL;
1922
1923	mutex_lock(&bpf_event_mutex);
1924
1925	if (event->prog)
1926	goto unlock;
1927
1928	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1929	if (old_array &&
1930	bpf_prog_array_length(progs: old_array) >= BPF_TRACE_MAX_PROGS) {
1931	ret = -E2BIG;
1932	goto unlock;
1933	}
1934
1935	ret = bpf_prog_array_copy(old_array, NULL, include_prog: prog, bpf_cookie, new_array: &new_array);
1936	if (ret < `0`)
1937	goto unlock;
1938
1939	/ set the new array to event->tp_event and set event->prog /
1940	event->prog = prog;
1941	event->bpf_cookie = bpf_cookie;
1942	rcu_assign_pointer(event->tp_event->prog_array, new_array);
1943	bpf_prog_array_free_sleepable(progs: old_array);
1944
1945	unlock:
1946	mutex_unlock(lock: &bpf_event_mutex);
1947	return ret;
1948	}
1949
1950	void perf_event_detach_bpf_prog(struct perf_event *event)
1951	{
1952	struct bpf_prog_array *old_array;
1953	struct bpf_prog_array *new_array;
1954	struct bpf_prog *prog = NULL;
1955	int ret;
1956
1957	mutex_lock(&bpf_event_mutex);
1958
1959	if (!event->prog)
1960	goto unlock;
1961
1962	old_array = bpf_event_rcu_dereference(event->tp_event->prog_array);
1963	if (!old_array)
1964	goto put;
1965
1966	ret = bpf_prog_array_copy(old_array, exclude_prog: event->prog, NULL, bpf_cookie: `0`, new_array: &new_array);
1967	if (ret < `0`) {
1968	bpf_prog_array_delete_safe(progs: old_array, old_prog: event->prog);
1969	} else {
1970	rcu_assign_pointer(event->tp_event->prog_array, new_array);
1971	bpf_prog_array_free_sleepable(progs: old_array);
1972	}
1973
1974	put:
1975	prog = event->prog;
1976	event->prog = NULL;
1977
1978	unlock:
1979	mutex_unlock(lock: &bpf_event_mutex);
1980
1981	if (prog) {
1982	/*
1983	* It could be that the bpf_prog is not sleepable (and will be freed
1984	* via normal RCU), but is called from a point that supports sleepable
1985	* programs and uses tasks-trace-RCU.
1986	*/
1987	synchronize_rcu_tasks_trace();
1988
1989	bpf_prog_put(prog);
1990	}
1991	}
1992
1993	int perf_event_query_prog_array(struct perf_event event, void* __user *info)
1994	{
1995	struct perf_event_query_bpf __user *uquery = info;
1996	struct perf_event_query_bpf query = {};
1997	struct bpf_prog_array *progs;
1998	u32 *ids, prog_cnt, ids_len;
1999	int ret;
2000
2001	if (!perfmon_capable())
2002	return -EPERM;
2003	if (event->attr.type != PERF_TYPE_TRACEPOINT)
2004	return -EINVAL;
2005	if (copy_from_user(to: &query, from: uquery, n: sizeof(query)))
2006	return -EFAULT;
2007
2008	ids_len = query.ids_len;
2009	if (ids_len > BPF_TRACE_MAX_PROGS)
2010	return -E2BIG;
2011	ids = kcalloc(ids_len, sizeof(u32), GFP_USER \| __GFP_NOWARN);
2012	if (!ids)
2013	return -ENOMEM;
2014	/*
2015	* The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
2016	* is required when user only wants to check for uquery->prog_cnt.
2017	* There is no need to check for it since the case is handled
2018	* gracefully in bpf_prog_array_copy_info.
2019	*/
2020
2021	mutex_lock(&bpf_event_mutex);
2022	progs = bpf_event_rcu_dereference(event->tp_event->prog_array);
2023	ret = bpf_prog_array_copy_info(array: progs, prog_ids: ids, request_cnt: ids_len, prog_cnt: &prog_cnt);
2024	mutex_unlock(lock: &bpf_event_mutex);
2025
2026	if (copy_to_user(to: &uquery->prog_cnt, from: &prog_cnt, n: sizeof(prog_cnt)) \|\|
2027	copy_to_user(to: uquery->ids, from: ids, n: ids_len * sizeof(u32)))
2028	ret = -EFAULT;
2029
2030	kfree(objp: ids);
2031	return ret;
2032	}
2033
2034	extern struct bpf_raw_event_map __start__bpf_raw_tp[];
2035	extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
2036
2037	struct bpf_raw_event_map bpf_get_raw_tracepoint(const* char *name)
2038	{
2039	struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
2040
2041	for (; btp < __stop__bpf_raw_tp; btp++) {
2042	if (!strcmp(btp->tp->name, name))
2043	return btp;
2044	}
2045
2046	return bpf_get_raw_tracepoint_module(name);
2047	}
2048
2049	void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
2050	{
2051	struct module *mod;
2052
2053	guard(rcu)();
2054	mod = __module_address(addr: (unsigned long)btp);
2055	module_put(module: mod);
2056	}
2057
2058	static __always_inline
2059	void __bpf_trace_run(struct bpf_raw_tp_link link, u64 args)
2060	{
2061	struct bpf_prog *prog = link->link.prog;
2062	struct bpf_run_ctx *old_run_ctx;
2063	struct bpf_trace_run_ctx run_ctx;
2064
2065	cant_sleep();
2066	if (unlikely(this_cpu_inc_return(*(prog->active)) != `1`)) {
2067	bpf_prog_inc_misses_counter(prog);
2068	goto out;
2069	}
2070
2071	run_ctx.bpf_cookie = link->cookie;
2072	old_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx.run_ctx);
2073
2074	rcu_read_lock();
2075	(void) bpf_prog_run(prog, ctx: args);
2076	rcu_read_unlock();
2077
2078	bpf_reset_run_ctx(old_ctx: old_run_ctx);
2079	out:
2080	this_cpu_dec(*(prog->active));
2081	}
2082
2083	#define UNPACK(...) __VA_ARGS__
2084	#define REPEAT_1(FN, DL, X, ...) FN(X)
2085	#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
2086	#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
2087	#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
2088	#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
2089	#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
2090	#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
2091	#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
2092	#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
2093	#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
2094	#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
2095	#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
2096	#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
2097
2098	#define SARG(X) u64 arg##X
2099	#define COPY(X) args[X] = arg##X
2100
2101	#define __DL_COM (,)
2102	#define __DL_SEM (;)
2103
2104	#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
2105
2106	#define BPF_TRACE_DEFN_x(x) \
2107	void bpf_trace_run##x(struct bpf_raw_tp_link *link, \
2108	REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
2109	{ \
2110	u64 args[x]; \
2111	REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
2112	__bpf_trace_run(link, args); \
2113	} \
2114	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
2115	BPF_TRACE_DEFN_x(`1`);
2116	BPF_TRACE_DEFN_x(`2`);
2117	BPF_TRACE_DEFN_x(`3`);
2118	BPF_TRACE_DEFN_x(`4`);
2119	BPF_TRACE_DEFN_x(`5`);
2120	BPF_TRACE_DEFN_x(`6`);
2121	BPF_TRACE_DEFN_x(`7`);
2122	BPF_TRACE_DEFN_x(`8`);
2123	BPF_TRACE_DEFN_x(`9`);
2124	BPF_TRACE_DEFN_x(`10`);
2125	BPF_TRACE_DEFN_x(`11`);
2126	BPF_TRACE_DEFN_x(`12`);
2127
2128	int bpf_probe_register(struct bpf_raw_event_map btp, struct* bpf_raw_tp_link *link)
2129	{
2130	struct tracepoint *tp = btp->tp;
2131	struct bpf_prog *prog = link->link.prog;
2132
2133	/*
2134	* check that program doesn't access arguments beyond what's
2135	* available in this tracepoint
2136	*/
2137	if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
2138	return -EINVAL;
2139
2140	if (prog->aux->max_tp_access > btp->writable_size)
2141	return -EINVAL;
2142
2143	return tracepoint_probe_register_may_exist(tp, probe: (void *)btp->bpf_func, data: link);
2144	}
2145
2146	int bpf_probe_unregister(struct bpf_raw_event_map btp, struct* bpf_raw_tp_link *link)
2147	{
2148	return tracepoint_probe_unregister(tp: btp->tp, probe: (void *)btp->bpf_func, data: link);
2149	}
2150
2151	int bpf_get_perf_event_info(const struct perf_event event, u32 prog_id,
2152	u32 fd_type, const* char **buf,
2153	u64 probe_offset, u64 probe_addr,
2154	unsigned long *missed)
2155	{
2156	bool is_tracepoint, is_syscall_tp;
2157	struct bpf_prog *prog;
2158	int flags, err = `0`;
2159
2160	prog = event->prog;
2161	if (!prog)
2162	return -ENOENT;
2163
2164	/ not supporting BPF_PROG_TYPE_PERF_EVENT yet /
2165	if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
2166	return -EOPNOTSUPP;
2167
2168	*prog_id = prog->aux->id;
2169	flags = event->tp_event->flags;
2170	is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
2171	is_syscall_tp = is_syscall_trace_event(tp_event: event->tp_event);
2172
2173	if (is_tracepoint \|\| is_syscall_tp) {
2174	*buf = is_tracepoint ? event->tp_event->tp->name
2175	: event->tp_event->name;
2176	/ We allow NULL pointer for tracepoint /
2177	if (fd_type)
2178	*fd_type = BPF_FD_TYPE_TRACEPOINT;
2179	if (probe_offset)
2180	*probe_offset = `0x0`;
2181	if (probe_addr)
2182	*probe_addr = `0x0`;
2183	} else {
2184	/ kprobe/uprobe /
2185	err = -EOPNOTSUPP;
2186	#ifdef CONFIG_KPROBE_EVENTS
2187	if (flags & TRACE_EVENT_FL_KPROBE)
2188	err = bpf_get_kprobe_info(event, fd_type, symbol: buf,
2189	probe_offset, probe_addr, missed,
2190	perf_type_tracepoint: event->attr.type == PERF_TYPE_TRACEPOINT);
2191	#endif
2192	#ifdef CONFIG_UPROBE_EVENTS
2193	if (flags & TRACE_EVENT_FL_UPROBE)
2194	err = bpf_get_uprobe_info(event, fd_type, filename: buf,
2195	probe_offset, probe_addr,
2196	perf_type_tracepoint: event->attr.type == PERF_TYPE_TRACEPOINT);
2197	#endif
2198	}
2199
2200	return err;
2201	}
2202
2203	static int __init send_signal_irq_work_init(void)
2204	{
2205	int cpu;
2206	struct send_signal_irq_work *work;
2207
2208	for_each_possible_cpu(cpu) {
2209	work = per_cpu_ptr(&send_signal_work, cpu);
2210	init_irq_work(work: &work->irq_work, func: do_bpf_send_signal);
2211	}
2212	return `0`;
2213	}
2214
2215	subsys_initcall(send_signal_irq_work_init);
2216
2217	#ifdef CONFIG_MODULES
2218	static int bpf_event_notify(struct notifier_block nb, unsigned* long op,
2219	void *module)
2220	{
2221	struct bpf_trace_module btm, tmp;
2222	struct module *mod = module;
2223	int ret = `0`;
2224
2225	if (mod->num_bpf_raw_events == `0` \|\|
2226	(op != MODULE_STATE_COMING && op != MODULE_STATE_GOING))
2227	goto out;
2228
2229	mutex_lock(&bpf_module_mutex);
2230
2231	switch (op) {
2232	case MODULE_STATE_COMING:
2233	btm = kzalloc(sizeof(*btm), GFP_KERNEL);
2234	if (btm) {
2235	btm->module = module;
2236	list_add(new: &btm->list, head: &bpf_trace_modules);
2237	} else {
2238	ret = -ENOMEM;
2239	}
2240	break;
2241	case MODULE_STATE_GOING:
2242	list_for_each_entry_safe(btm, tmp, &bpf_trace_modules, list) {
2243	if (btm->module == module) {
2244	list_del(entry: &btm->list);
2245	kfree(objp: btm);
2246	break;
2247	}
2248	}
2249	break;
2250	}
2251
2252	mutex_unlock(lock: &bpf_module_mutex);
2253
2254	out:
2255	return notifier_from_errno(err: ret);
2256	}
2257
2258	static struct notifier_block bpf_module_nb = {
2259	.notifier_call = bpf_event_notify,
2260	};
2261
2262	static int __init bpf_event_init(void)
2263	{
2264	register_module_notifier(nb: &bpf_module_nb);
2265	return `0`;
2266	}
2267
2268	fs_initcall(bpf_event_init);
2269	#endif /* CONFIG_MODULES */
2270
2271	struct bpf_session_run_ctx {
2272	struct bpf_run_ctx run_ctx;
2273	bool is_return;
2274	void *data;
2275	};
2276
2277	#ifdef CONFIG_FPROBE
2278	struct bpf_kprobe_multi_link {
2279	struct bpf_link link;
2280	struct fprobe fp;
2281	unsigned long *addrs;
2282	u64 *cookies;
2283	u32 cnt;
2284	u32 mods_cnt;
2285	struct module **mods;
2286	};
2287
2288	struct bpf_kprobe_multi_run_ctx {
2289	struct bpf_session_run_ctx session_ctx;
2290	struct bpf_kprobe_multi_link *link;
2291	unsigned long entry_ip;
2292	};
2293
2294	struct user_syms {
2295	const char **syms;
2296	char *buf;
2297	};
2298
2299	#ifndef CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS
2300	static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
2301	#define bpf_kprobe_multi_pt_regs_ptr() this_cpu_ptr(&bpf_kprobe_multi_pt_regs)
2302	#else
2303	#define bpf_kprobe_multi_pt_regs_ptr() (NULL)
2304	#endif
2305
2306	static unsigned long ftrace_get_entry_ip(unsigned long fentry_ip)
2307	{
2308	unsigned long ip = ftrace_get_symaddr(fentry_ip);
2309
2310	return ip ? : fentry_ip;
2311	}
2312
2313	static int copy_user_syms(struct user_syms us, unsigned* long __user *usyms, u32 cnt)
2314	{
2315	unsigned long __user usymbol;
2316	const char **syms = NULL;
2317	char buf = NULL, p;
2318	int err = -ENOMEM;
2319	unsigned int i;
2320
2321	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
2322	if (!syms)
2323	goto error;
2324
2325	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
2326	if (!buf)
2327	goto error;
2328
2329	for (p = buf, i = `0`; i < cnt; i++) {
2330	if (__get_user(usymbol, usyms + i)) {
2331	err = -EFAULT;
2332	goto error;
2333	}
2334	err = strncpy_from_user(dst: p, src: (const char __user *) usymbol, KSYM_NAME_LEN);
2335	if (err == KSYM_NAME_LEN)
2336	err = -E2BIG;
2337	if (err < `0`)
2338	goto error;
2339	syms[i] = p;
2340	p += err + `1`;
2341	}
2342
2343	us->syms = syms;
2344	us->buf = buf;
2345	return `0`;
2346
2347	error:
2348	if (err) {
2349	kvfree(addr: syms);
2350	kvfree(addr: buf);
2351	}
2352	return err;
2353	}
2354
2355	static void kprobe_multi_put_modules(struct module **mods, u32 cnt)
2356	{
2357	u32 i;
2358
2359	for (i = `0`; i < cnt; i++)
2360	module_put(module: mods[i]);
2361	}
2362
2363	static void free_user_syms(struct user_syms *us)
2364	{
2365	kvfree(addr: us->syms);
2366	kvfree(addr: us->buf);
2367	}
2368
2369	static void bpf_kprobe_multi_link_release(struct bpf_link *link)
2370	{
2371	struct bpf_kprobe_multi_link *kmulti_link;
2372
2373	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2374	unregister_fprobe(fp: &kmulti_link->fp);
2375	kprobe_multi_put_modules(mods: kmulti_link->mods, cnt: kmulti_link->mods_cnt);
2376	}
2377
2378	static void bpf_kprobe_multi_link_dealloc(struct bpf_link *link)
2379	{
2380	struct bpf_kprobe_multi_link *kmulti_link;
2381
2382	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2383	kvfree(addr: kmulti_link->addrs);
2384	kvfree(addr: kmulti_link->cookies);
2385	kfree(objp: kmulti_link->mods);
2386	kfree(objp: kmulti_link);
2387	}
2388
2389	static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
2390	struct bpf_link_info *info)
2391	{
2392	u64 __user *ucookies = u64_to_user_ptr(info->kprobe_multi.cookies);
2393	u64 __user *uaddrs = u64_to_user_ptr(info->kprobe_multi.addrs);
2394	struct bpf_kprobe_multi_link *kmulti_link;
2395	u32 ucount = info->kprobe_multi.count;
2396	int err = `0`, i;
2397
2398	if (!uaddrs ^ !ucount)
2399	return -EINVAL;
2400	if (ucookies && !ucount)
2401	return -EINVAL;
2402
2403	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2404	info->kprobe_multi.count = kmulti_link->cnt;
2405	info->kprobe_multi.flags = kmulti_link->link.flags;
2406	info->kprobe_multi.missed = kmulti_link->fp.nmissed;
2407
2408	if (!uaddrs)
2409	return `0`;
2410	if (ucount < kmulti_link->cnt)
2411	err = -ENOSPC;
2412	else
2413	ucount = kmulti_link->cnt;
2414
2415	if (ucookies) {
2416	if (kmulti_link->cookies) {
2417	if (copy_to_user(to: ucookies, from: kmulti_link->cookies, n: ucount * sizeof(u64)))
2418	return -EFAULT;
2419	} else {
2420	for (i = `0`; i < ucount; i++) {
2421	if (put_user(`0`, ucookies + i))
2422	return -EFAULT;
2423	}
2424	}
2425	}
2426
2427	if (kallsyms_show_value(current_cred())) {
2428	if (copy_to_user(to: uaddrs, from: kmulti_link->addrs, n: ucount * sizeof(u64)))
2429	return -EFAULT;
2430	} else {
2431	for (i = `0`; i < ucount; i++) {
2432	if (put_user(`0`, uaddrs + i))
2433	return -EFAULT;
2434	}
2435	}
2436	return err;
2437	}
2438
2439	#ifdef CONFIG_PROC_FS
2440	static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link,
2441	struct seq_file *seq)
2442	{
2443	struct bpf_kprobe_multi_link *kmulti_link;
2444
2445	kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link);
2446
2447	seq_printf(m: seq,
2448	fmt: "kprobe_cnt:\t%u\n"
2449	"missed:\t%lu\n",
2450	kmulti_link->cnt,
2451	kmulti_link->fp.nmissed);
2452
2453	seq_printf(m: seq, fmt: "%s\t %s\n", "cookie", "func");
2454	for (int i = `0`; i < kmulti_link->cnt; i++) {
2455	seq_printf(m: seq,
2456	fmt: "%llu\t %pS\n",
2457	kmulti_link->cookies[i],
2458	(void *)kmulti_link->addrs[i]);
2459	}
2460	}
2461	#endif
2462
2463	static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
2464	.release = bpf_kprobe_multi_link_release,
2465	.dealloc_deferred = bpf_kprobe_multi_link_dealloc,
2466	.fill_link_info = bpf_kprobe_multi_link_fill_link_info,
2467	#ifdef CONFIG_PROC_FS
2468	.show_fdinfo = bpf_kprobe_multi_show_fdinfo,
2469	#endif
2470	};
2471
2472	static void bpf_kprobe_multi_cookie_swap(void a, void* b, int* size, const void *priv)
2473	{
2474	const struct bpf_kprobe_multi_link *link = priv;
2475	unsigned long addr_a = a, addr_b = b;
2476	u64 cookie_a, cookie_b;
2477
2478	cookie_a = link->cookies + (addr_a - link->addrs);
2479	cookie_b = link->cookies + (addr_b - link->addrs);
2480
2481	/ swap addr_a/addr_b and cookie_a/cookie_b values /
2482	swap(addr_a, addr_b);
2483	swap(cookie_a, cookie_b);
2484	}
2485
2486	static int bpf_kprobe_multi_addrs_cmp(const void a, const* void *b)
2487	{
2488	const unsigned long addr_a = a, addr_b = b;
2489
2490	if (addr_a == addr_b)
2491	return `0`;
2492	return addr_a < addr_b ? -`1` : `1`;
2493	}
2494
2495	static int bpf_kprobe_multi_cookie_cmp(const void a, const* void b, const* void *priv)
2496	{
2497	return bpf_kprobe_multi_addrs_cmp(a, b);
2498	}
2499
2500	static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2501	{
2502	struct bpf_kprobe_multi_run_ctx *run_ctx;
2503	struct bpf_kprobe_multi_link *link;
2504	u64 *cookie, entry_ip;
2505	unsigned long *addr;
2506
2507	if (WARN_ON_ONCE(!ctx))
2508	return `0`;
2509	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2510	session_ctx.run_ctx);
2511	link = run_ctx->link;
2512	if (!link->cookies)
2513	return `0`;
2514	entry_ip = run_ctx->entry_ip;
2515	addr = bsearch(key: &entry_ip, base: link->addrs, num: link->cnt, size: sizeof(entry_ip),
2516	cmp: bpf_kprobe_multi_addrs_cmp);
2517	if (!addr)
2518	return `0`;
2519	cookie = link->cookies + (addr - link->addrs);
2520	return *cookie;
2521	}
2522
2523	static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2524	{
2525	struct bpf_kprobe_multi_run_ctx *run_ctx;
2526
2527	run_ctx = container_of(current->bpf_ctx, struct bpf_kprobe_multi_run_ctx,
2528	session_ctx.run_ctx);
2529	return run_ctx->entry_ip;
2530	}
2531
2532	static __always_inline int
2533	kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
2534	unsigned long entry_ip, struct ftrace_regs *fregs,
2535	bool is_return, void *data)
2536	{
2537	struct bpf_kprobe_multi_run_ctx run_ctx = {
2538	.session_ctx = {
2539	.is_return = is_return,
2540	.data = data,
2541	},
2542	.link = link,
2543	.entry_ip = entry_ip,
2544	};
2545	struct bpf_run_ctx *old_run_ctx;
2546	struct pt_regs *regs;
2547	int err;
2548
2549	/*
2550	* graph tracer framework ensures we won't migrate, so there is no need
2551	* to use migrate_disable for bpf_prog_run again. The check here just for
2552	* __this_cpu_inc_return.
2553	*/
2554	cant_sleep();
2555
2556	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != `1`)) {
2557	bpf_prog_inc_misses_counter(prog: link->link.prog);
2558	err = `1`;
2559	goto out;
2560	}
2561
2562	rcu_read_lock();
2563	regs = ftrace_partial_regs(fregs, bpf_kprobe_multi_pt_regs_ptr());
2564	old_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx.session_ctx.run_ctx);
2565	err = bpf_prog_run(prog: link->link.prog, ctx: regs);
2566	bpf_reset_run_ctx(old_ctx: old_run_ctx);
2567	rcu_read_unlock();
2568
2569	out:
2570	__this_cpu_dec(bpf_prog_active);
2571	return err;
2572	}
2573
2574	static int
2575	kprobe_multi_link_handler(struct fprobe fp, unsigned* long fentry_ip,
2576	unsigned long ret_ip, struct ftrace_regs *fregs,
2577	void *data)
2578	{
2579	struct bpf_kprobe_multi_link *link;
2580	int err;
2581
2582	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2583	err = kprobe_multi_link_prog_run(link, entry_ip: ftrace_get_entry_ip(fentry_ip),
2584	fregs, is_return: false, data);
2585	return is_kprobe_session(prog: link->link.prog) ? err : `0`;
2586	}
2587
2588	static void
2589	kprobe_multi_link_exit_handler(struct fprobe fp, unsigned* long fentry_ip,
2590	unsigned long ret_ip, struct ftrace_regs *fregs,
2591	void *data)
2592	{
2593	struct bpf_kprobe_multi_link *link;
2594
2595	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
2596	kprobe_multi_link_prog_run(link, entry_ip: ftrace_get_entry_ip(fentry_ip),
2597	fregs, is_return: true, data);
2598	}
2599
2600	static int symbols_cmp_r(const void a, const* void b, const* void *priv)
2601	{
2602	const char *str_a = (const* char **) a;
2603	const char *str_b = (const* char **) b;
2604
2605	return strcmp(str_a, str_b);
2606	}
2607
2608	struct multi_symbols_sort {
2609	const char **funcs;
2610	u64 *cookies;
2611	};
2612
2613	static void symbols_swap_r(void a, void* b, int* size, const void *priv)
2614	{
2615	const struct multi_symbols_sort *data = priv;
2616	const char name_a = a, name_b = b;
2617
2618	swap(name_a, name_b);
2619
2620	/ If defined, swap also related cookies. /
2621	if (data->cookies) {
2622	u64 cookie_a, cookie_b;
2623
2624	cookie_a = data->cookies + (name_a - data->funcs);
2625	cookie_b = data->cookies + (name_b - data->funcs);
2626	swap(cookie_a, cookie_b);
2627	}
2628	}
2629
2630	struct modules_array {
2631	struct module **mods;
2632	int mods_cnt;
2633	int mods_cap;
2634	};
2635
2636	static int add_module(struct modules_array arr, struct* module *mod)
2637	{
2638	struct module **mods;
2639
2640	if (arr->mods_cnt == arr->mods_cap) {
2641	arr->mods_cap = max(`16`, arr->mods_cap * `3` / `2`);
2642	mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
2643	if (!mods)
2644	return -ENOMEM;
2645	arr->mods = mods;
2646	}
2647
2648	arr->mods[arr->mods_cnt] = mod;
2649	arr->mods_cnt++;
2650	return `0`;
2651	}
2652
2653	static bool has_module(struct modules_array arr, struct* module *mod)
2654	{
2655	int i;
2656
2657	for (i = arr->mods_cnt - `1`; i >= `0`; i--) {
2658	if (arr->mods[i] == mod)
2659	return true;
2660	}
2661	return false;
2662	}
2663
2664	static int get_modules_for_addrs(struct module **mods, unsigned* long *addrs, u32 addrs_cnt)
2665	{
2666	struct modules_array arr = {};
2667	u32 i, err = `0`;
2668
2669	for (i = `0`; i < addrs_cnt; i++) {
2670	bool skip_add = false;
2671	struct module *mod;
2672
2673	scoped_guard(rcu) {
2674	mod = __module_address(addr: addrs[i]);
2675	/ Either no module or it's already stored /
2676	if (!mod \|\| has_module(arr: &arr, mod)) {
2677	skip_add = true;
2678	break; / scoped_guard /
2679	}
2680	if (!try_module_get(module: mod))
2681	err = -EINVAL;
2682	}
2683	if (skip_add)
2684	continue;
2685	if (err)
2686	break;
2687	err = add_module(arr: &arr, mod);
2688	if (err) {
2689	module_put(module: mod);
2690	break;
2691	}
2692	}
2693
2694	/ We return either err < 0 in case of error, ... /
2695	if (err) {
2696	kprobe_multi_put_modules(mods: arr.mods, cnt: arr.mods_cnt);
2697	kfree(objp: arr.mods);
2698	return err;
2699	}
2700
2701	/ or number of modules found if everything is ok. /
2702	*mods = arr.mods;
2703	return arr.mods_cnt;
2704	}
2705
2706	static int addrs_check_error_injection_list(unsigned long *addrs, u32 cnt)
2707	{
2708	u32 i;
2709
2710	for (i = `0`; i < cnt; i++) {
2711	if (!within_error_injection_list(addr: addrs[i]))
2712	return -EINVAL;
2713	}
2714	return `0`;
2715	}
2716
2717	int bpf_kprobe_multi_link_attach(const union bpf_attr attr, struct* bpf_prog *prog)
2718	{
2719	struct bpf_kprobe_multi_link *link = NULL;
2720	struct bpf_link_primer link_primer;
2721	void __user *ucookies;
2722	unsigned long *addrs;
2723	u32 flags, cnt, size;
2724	void __user *uaddrs;
2725	u64 *cookies = NULL;
2726	void __user *usyms;
2727	int err;
2728
2729	/ no support for 32bit archs yet /
2730	if (sizeof(u64) != sizeof(void *))
2731	return -EOPNOTSUPP;
2732
2733	if (attr->link_create.flags)
2734	return -EINVAL;
2735
2736	if (!is_kprobe_multi(prog))
2737	return -EINVAL;
2738
2739	/ Writing to context is not allowed for kprobes. /
2740	if (prog->aux->kprobe_write_ctx)
2741	return -EINVAL;
2742
2743	flags = attr->link_create.kprobe_multi.flags;
2744	if (flags & ~BPF_F_KPROBE_MULTI_RETURN)
2745	return -EINVAL;
2746
2747	uaddrs = u64_to_user_ptr(attr->link_create.kprobe_multi.addrs);
2748	usyms = u64_to_user_ptr(attr->link_create.kprobe_multi.syms);
2749	if (!!uaddrs == !!usyms)
2750	return -EINVAL;
2751
2752	cnt = attr->link_create.kprobe_multi.cnt;
2753	if (!cnt)
2754	return -EINVAL;
2755	if (cnt > MAX_KPROBE_MULTI_CNT)
2756	return -E2BIG;
2757
2758	size = cnt * sizeof(*addrs);
2759	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2760	if (!addrs)
2761	return -ENOMEM;
2762
2763	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
2764	if (ucookies) {
2765	cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
2766	if (!cookies) {
2767	err = -ENOMEM;
2768	goto error;
2769	}
2770	if (copy_from_user(to: cookies, from: ucookies, n: size)) {
2771	err = -EFAULT;
2772	goto error;
2773	}
2774	}
2775
2776	if (uaddrs) {
2777	if (copy_from_user(to: addrs, from: uaddrs, n: size)) {
2778	err = -EFAULT;
2779	goto error;
2780	}
2781	} else {
2782	struct multi_symbols_sort data = {
2783	.cookies = cookies,
2784	};
2785	struct user_syms us;
2786
2787	err = copy_user_syms(us: &us, usyms, cnt);
2788	if (err)
2789	goto error;
2790
2791	if (cookies)
2792	data.funcs = us.syms;
2793
2794	sort_r(base: us.syms, num: cnt, size: sizeof(*us.syms), cmp_func: symbols_cmp_r,
2795	swap_func: symbols_swap_r, priv: &data);
2796
2797	err = ftrace_lookup_symbols(sorted_syms: us.syms, cnt, addrs);
2798	free_user_syms(us: &us);
2799	if (err)
2800	goto error;
2801	}
2802
2803	if (prog->kprobe_override && addrs_check_error_injection_list(addrs, cnt)) {
2804	err = -EINVAL;
2805	goto error;
2806	}
2807
2808	link = kzalloc(sizeof(*link), GFP_KERNEL);
2809	if (!link) {
2810	err = -ENOMEM;
2811	goto error;
2812	}
2813
2814	bpf_link_init(link: &link->link, type: BPF_LINK_TYPE_KPROBE_MULTI,
2815	ops: &bpf_kprobe_multi_link_lops, prog, attach_type: attr->link_create.attach_type);
2816
2817	err = bpf_link_prime(link: &link->link, primer: &link_primer);
2818	if (err)
2819	goto error;
2820
2821	if (!(flags & BPF_F_KPROBE_MULTI_RETURN))
2822	link->fp.entry_handler = kprobe_multi_link_handler;
2823	if ((flags & BPF_F_KPROBE_MULTI_RETURN) \|\| is_kprobe_session(prog))
2824	link->fp.exit_handler = kprobe_multi_link_exit_handler;
2825	if (is_kprobe_session(prog))
2826	link->fp.entry_data_size = sizeof(u64);
2827
2828	link->addrs = addrs;
2829	link->cookies = cookies;
2830	link->cnt = cnt;
2831	link->link.flags = flags;
2832
2833	if (cookies) {
2834	/*
2835	* Sorting addresses will trigger sorting cookies as well
2836	* (check bpf_kprobe_multi_cookie_swap). This way we can
2837	* find cookie based on the address in bpf_get_attach_cookie
2838	* helper.
2839	*/
2840	sort_r(base: addrs, num: cnt, size: sizeof(*addrs),
2841	cmp_func: bpf_kprobe_multi_cookie_cmp,
2842	swap_func: bpf_kprobe_multi_cookie_swap,
2843	priv: link);
2844	}
2845
2846	err = get_modules_for_addrs(mods: &link->mods, addrs, addrs_cnt: cnt);
2847	if (err < `0`) {
2848	bpf_link_cleanup(primer: &link_primer);
2849	return err;
2850	}
2851	link->mods_cnt = err;
2852
2853	err = register_fprobe_ips(fp: &link->fp, addrs, num: cnt);
2854	if (err) {
2855	kprobe_multi_put_modules(mods: link->mods, cnt: link->mods_cnt);
2856	bpf_link_cleanup(primer: &link_primer);
2857	return err;
2858	}
2859
2860	return bpf_link_settle(primer: &link_primer);
2861
2862	error:
2863	kfree(objp: link);
2864	kvfree(addr: addrs);
2865	kvfree(addr: cookies);
2866	return err;
2867	}
2868	#else /* !CONFIG_FPROBE */
2869	int bpf_kprobe_multi_link_attach(const union bpf_attr attr, struct* bpf_prog *prog)
2870	{
2871	return -EOPNOTSUPP;
2872	}
2873	static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx)
2874	{
2875	return `0`;
2876	}
2877	static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
2878	{
2879	return `0`;
2880	}
2881	#endif
2882
2883	#ifdef CONFIG_UPROBES
2884	struct bpf_uprobe_multi_link;
2885
2886	struct bpf_uprobe {
2887	struct bpf_uprobe_multi_link *link;
2888	loff_t offset;
2889	unsigned long ref_ctr_offset;
2890	u64 cookie;
2891	struct uprobe *uprobe;
2892	struct uprobe_consumer consumer;
2893	bool session;
2894	};
2895
2896	struct bpf_uprobe_multi_link {
2897	struct path path;
2898	struct bpf_link link;
2899	u32 cnt;
2900	struct bpf_uprobe *uprobes;
2901	struct task_struct *task;
2902	};
2903
2904	struct bpf_uprobe_multi_run_ctx {
2905	struct bpf_session_run_ctx session_ctx;
2906	unsigned long entry_ip;
2907	struct bpf_uprobe *uprobe;
2908	};
2909
2910	static void bpf_uprobe_unregister(struct bpf_uprobe *uprobes, u32 cnt)
2911	{
2912	u32 i;
2913
2914	for (i = `0`; i < cnt; i++)
2915	uprobe_unregister_nosync(uprobe: uprobes[i].uprobe, uc: &uprobes[i].consumer);
2916
2917	if (cnt)
2918	uprobe_unregister_sync();
2919	}
2920
2921	static void bpf_uprobe_multi_link_release(struct bpf_link *link)
2922	{
2923	struct bpf_uprobe_multi_link *umulti_link;
2924
2925	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2926	bpf_uprobe_unregister(uprobes: umulti_link->uprobes, cnt: umulti_link->cnt);
2927	if (umulti_link->task)
2928	put_task_struct(t: umulti_link->task);
2929	path_put(&umulti_link->path);
2930	}
2931
2932	static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
2933	{
2934	struct bpf_uprobe_multi_link *umulti_link;
2935
2936	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2937	kvfree(addr: umulti_link->uprobes);
2938	kfree(objp: umulti_link);
2939	}
2940
2941	static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
2942	struct bpf_link_info *info)
2943	{
2944	u64 __user *uref_ctr_offsets = u64_to_user_ptr(info->uprobe_multi.ref_ctr_offsets);
2945	u64 __user *ucookies = u64_to_user_ptr(info->uprobe_multi.cookies);
2946	u64 __user *uoffsets = u64_to_user_ptr(info->uprobe_multi.offsets);
2947	u64 __user *upath = u64_to_user_ptr(info->uprobe_multi.path);
2948	u32 upath_size = info->uprobe_multi.path_size;
2949	struct bpf_uprobe_multi_link *umulti_link;
2950	u32 ucount = info->uprobe_multi.count;
2951	int err = `0`, i;
2952	char p, buf;
2953	long left = `0`;
2954
2955	if (!upath ^ !upath_size)
2956	return -EINVAL;
2957
2958	if ((uoffsets \|\| uref_ctr_offsets \|\| ucookies) && !ucount)
2959	return -EINVAL;
2960
2961	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
2962	info->uprobe_multi.count = umulti_link->cnt;
2963	info->uprobe_multi.flags = umulti_link->link.flags;
2964	info->uprobe_multi.pid = umulti_link->task ?
2965	task_pid_nr_ns(tsk: umulti_link->task, ns: task_active_pid_ns(current)) : `0`;
2966
2967	upath_size = upath_size ? min_t(u32, upath_size, PATH_MAX) : PATH_MAX;
2968	buf = kmalloc(upath_size, GFP_KERNEL);
2969	if (!buf)
2970	return -ENOMEM;
2971	p = d_path(&umulti_link->path, buf, upath_size);
2972	if (IS_ERR(ptr: p)) {
2973	kfree(objp: buf);
2974	return PTR_ERR(ptr: p);
2975	}
2976	upath_size = buf + upath_size - p;
2977
2978	if (upath)
2979	left = copy_to_user(to: upath, from: p, n: upath_size);
2980	kfree(objp: buf);
2981	if (left)
2982	return -EFAULT;
2983	info->uprobe_multi.path_size = upath_size;
2984
2985	if (!uoffsets && !ucookies && !uref_ctr_offsets)
2986	return `0`;
2987
2988	if (ucount < umulti_link->cnt)
2989	err = -ENOSPC;
2990	else
2991	ucount = umulti_link->cnt;
2992
2993	for (i = `0`; i < ucount; i++) {
2994	if (uoffsets &&
2995	put_user(umulti_link->uprobes[i].offset, uoffsets + i))
2996	return -EFAULT;
2997	if (uref_ctr_offsets &&
2998	put_user(umulti_link->uprobes[i].ref_ctr_offset, uref_ctr_offsets + i))
2999	return -EFAULT;
3000	if (ucookies &&
3001	put_user(umulti_link->uprobes[i].cookie, ucookies + i))
3002	return -EFAULT;
3003	}
3004
3005	return err;
3006	}
3007
3008	#ifdef CONFIG_PROC_FS
3009	static void bpf_uprobe_multi_show_fdinfo(const struct bpf_link *link,
3010	struct seq_file *seq)
3011	{
3012	struct bpf_uprobe_multi_link *umulti_link;
3013	char p, buf;
3014	pid_t pid;
3015
3016	umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
3017
3018	buf = kmalloc(PATH_MAX, GFP_KERNEL);
3019	if (!buf)
3020	return;
3021
3022	p = d_path(&umulti_link->path, buf, PATH_MAX);
3023	if (IS_ERR(ptr: p)) {
3024	kfree(objp: buf);
3025	return;
3026	}
3027
3028	pid = umulti_link->task ?
3029	task_pid_nr_ns(tsk: umulti_link->task, ns: task_active_pid_ns(current)) : `0`;
3030	seq_printf(m: seq,
3031	fmt: "uprobe_cnt:\t%u\n"
3032	"pid:\t%u\n"
3033	"path:\t%s\n",
3034	umulti_link->cnt, pid, p);
3035
3036	seq_printf(m: seq, fmt: "%s\t %s\t %s\n", "cookie", "offset", "ref_ctr_offset");
3037	for (int i = `0`; i < umulti_link->cnt; i++) {
3038	seq_printf(m: seq,
3039	fmt: "%llu\t %#llx\t %#lx\n",
3040	umulti_link->uprobes[i].cookie,
3041	umulti_link->uprobes[i].offset,
3042	umulti_link->uprobes[i].ref_ctr_offset);
3043	}
3044
3045	kfree(objp: buf);
3046	}
3047	#endif
3048
3049	static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
3050	.release = bpf_uprobe_multi_link_release,
3051	.dealloc_deferred = bpf_uprobe_multi_link_dealloc,
3052	.fill_link_info = bpf_uprobe_multi_link_fill_link_info,
3053	#ifdef CONFIG_PROC_FS
3054	.show_fdinfo = bpf_uprobe_multi_show_fdinfo,
3055	#endif
3056	};
3057
3058	static int uprobe_prog_run(struct bpf_uprobe *uprobe,
3059	unsigned long entry_ip,
3060	struct pt_regs *regs,
3061	bool is_return, void *data)
3062	{
3063	struct bpf_uprobe_multi_link *link = uprobe->link;
3064	struct bpf_uprobe_multi_run_ctx run_ctx = {
3065	.session_ctx = {
3066	.is_return = is_return,
3067	.data = data,
3068	},
3069	.entry_ip = entry_ip,
3070	.uprobe = uprobe,
3071	};
3072	struct bpf_prog *prog = link->link.prog;
3073	bool sleepable = prog->sleepable;
3074	struct bpf_run_ctx *old_run_ctx;
3075	int err;
3076
3077	if (link->task && !same_thread_group(current, p2: link->task))
3078	return `0`;
3079
3080	if (sleepable)
3081	rcu_read_lock_trace();
3082	else
3083	rcu_read_lock();
3084
3085	migrate_disable();
3086
3087	old_run_ctx = bpf_set_run_ctx(new_ctx: &run_ctx.session_ctx.run_ctx);
3088	err = bpf_prog_run(prog: link->link.prog, ctx: regs);
3089	bpf_reset_run_ctx(old_ctx: old_run_ctx);
3090
3091	migrate_enable();
3092
3093	if (sleepable)
3094	rcu_read_unlock_trace();
3095	else
3096	rcu_read_unlock();
3097	return err;
3098	}
3099
3100	static bool
3101	uprobe_multi_link_filter(struct uprobe_consumer con, struct* mm_struct *mm)
3102	{
3103	struct bpf_uprobe *uprobe;
3104
3105	uprobe = container_of(con, struct bpf_uprobe, consumer);
3106	return uprobe->link->task->mm == mm;
3107	}
3108
3109	static int
3110	uprobe_multi_link_handler(struct uprobe_consumer con, struct* pt_regs *regs,
3111	__u64 *data)
3112	{
3113	struct bpf_uprobe *uprobe;
3114	int ret;
3115
3116	uprobe = container_of(con, struct bpf_uprobe, consumer);
3117	ret = uprobe_prog_run(uprobe, entry_ip: instruction_pointer(regs), regs, is_return: false, data);
3118	if (uprobe->session)
3119	return ret ? UPROBE_HANDLER_IGNORE : `0`;
3120	return `0`;
3121	}
3122
3123	static int
3124	uprobe_multi_link_ret_handler(struct uprobe_consumer con, unsigned* long func, struct pt_regs *regs,
3125	__u64 *data)
3126	{
3127	struct bpf_uprobe *uprobe;
3128
3129	uprobe = container_of(con, struct bpf_uprobe, consumer);
3130	uprobe_prog_run(uprobe, entry_ip: func, regs, is_return: true, data);
3131	return `0`;
3132	}
3133
3134	static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3135	{
3136	struct bpf_uprobe_multi_run_ctx *run_ctx;
3137
3138	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3139	session_ctx.run_ctx);
3140	return run_ctx->entry_ip;
3141	}
3142
3143	static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3144	{
3145	struct bpf_uprobe_multi_run_ctx *run_ctx;
3146
3147	run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx,
3148	session_ctx.run_ctx);
3149	return run_ctx->uprobe->cookie;
3150	}
3151
3152	int bpf_uprobe_multi_link_attach(const union bpf_attr attr, struct* bpf_prog *prog)
3153	{
3154	struct bpf_uprobe_multi_link *link = NULL;
3155	unsigned long __user *uref_ctr_offsets;
3156	struct bpf_link_primer link_primer;
3157	struct bpf_uprobe *uprobes = NULL;
3158	struct task_struct *task = NULL;
3159	unsigned long __user *uoffsets;
3160	u64 __user *ucookies;
3161	void __user *upath;
3162	u32 flags, cnt, i;
3163	struct path path;
3164	char *name;
3165	pid_t pid;
3166	int err;
3167
3168	/ no support for 32bit archs yet /
3169	if (sizeof(u64) != sizeof(void *))
3170	return -EOPNOTSUPP;
3171
3172	if (attr->link_create.flags)
3173	return -EINVAL;
3174
3175	if (!is_uprobe_multi(prog))
3176	return -EINVAL;
3177
3178	flags = attr->link_create.uprobe_multi.flags;
3179	if (flags & ~BPF_F_UPROBE_MULTI_RETURN)
3180	return -EINVAL;
3181
3182	/*
3183	* path, offsets and cnt are mandatory,
3184	* ref_ctr_offsets and cookies are optional
3185	*/
3186	upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path);
3187	uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets);
3188	cnt = attr->link_create.uprobe_multi.cnt;
3189	pid = attr->link_create.uprobe_multi.pid;
3190
3191	if (!upath \|\| !uoffsets \|\| !cnt \|\| pid < `0`)
3192	return -EINVAL;
3193	if (cnt > MAX_UPROBE_MULTI_CNT)
3194	return -E2BIG;
3195
3196	uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets);
3197	ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies);
3198
3199	name = strndup_user(upath, PATH_MAX);
3200	if (IS_ERR(ptr: name)) {
3201	err = PTR_ERR(ptr: name);
3202	return err;
3203	}
3204
3205	err = kern_path(name, LOOKUP_FOLLOW, &path);
3206	kfree(objp: name);
3207	if (err)
3208	return err;
3209
3210	if (!d_is_reg(dentry: path.dentry)) {
3211	err = -EBADF;
3212	goto error_path_put;
3213	}
3214
3215	if (pid) {
3216	rcu_read_lock();
3217	task = get_pid_task(pid: find_vpid(nr: pid), PIDTYPE_TGID);
3218	rcu_read_unlock();
3219	if (!task) {
3220	err = -ESRCH;
3221	goto error_path_put;
3222	}
3223	}
3224
3225	err = -ENOMEM;
3226
3227	link = kzalloc(sizeof(*link), GFP_KERNEL);
3228	uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL);
3229
3230	if (!uprobes \|\| !link)
3231	goto error_free;
3232
3233	for (i = `0`; i < cnt; i++) {
3234	if (__get_user(uprobes[i].offset, uoffsets + i)) {
3235	err = -EFAULT;
3236	goto error_free;
3237	}
3238	if (uprobes[i].offset < `0`) {
3239	err = -EINVAL;
3240	goto error_free;
3241	}
3242	if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) {
3243	err = -EFAULT;
3244	goto error_free;
3245	}
3246	if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) {
3247	err = -EFAULT;
3248	goto error_free;
3249	}
3250
3251	uprobes[i].link = link;
3252
3253	if (!(flags & BPF_F_UPROBE_MULTI_RETURN))
3254	uprobes[i].consumer.handler = uprobe_multi_link_handler;
3255	if (flags & BPF_F_UPROBE_MULTI_RETURN \|\| is_uprobe_session(prog))
3256	uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler;
3257	if (is_uprobe_session(prog))
3258	uprobes[i].session = true;
3259	if (pid)
3260	uprobes[i].consumer.filter = uprobe_multi_link_filter;
3261	}
3262
3263	link->cnt = cnt;
3264	link->uprobes = uprobes;
3265	link->path = path;
3266	link->task = task;
3267	link->link.flags = flags;
3268
3269	bpf_link_init(link: &link->link, type: BPF_LINK_TYPE_UPROBE_MULTI,
3270	ops: &bpf_uprobe_multi_link_lops, prog, attach_type: attr->link_create.attach_type);
3271
3272	for (i = `0`; i < cnt; i++) {
3273	uprobes[i].uprobe = uprobe_register(inode: d_real_inode(dentry: link->path.dentry),
3274	offset: uprobes[i].offset,
3275	ref_ctr_offset: uprobes[i].ref_ctr_offset,
3276	uc: &uprobes[i].consumer);
3277	if (IS_ERR(ptr: uprobes[i].uprobe)) {
3278	err = PTR_ERR(ptr: uprobes[i].uprobe);
3279	link->cnt = i;
3280	goto error_unregister;
3281	}
3282	}
3283
3284	err = bpf_link_prime(link: &link->link, primer: &link_primer);
3285	if (err)
3286	goto error_unregister;
3287
3288	return bpf_link_settle(primer: &link_primer);
3289
3290	error_unregister:
3291	bpf_uprobe_unregister(uprobes, cnt: link->cnt);
3292
3293	error_free:
3294	kvfree(addr: uprobes);
3295	kfree(objp: link);
3296	if (task)
3297	put_task_struct(t: task);
3298	error_path_put:
3299	path_put(&path);
3300	return err;
3301	}
3302	#else /* !CONFIG_UPROBES */
3303	int bpf_uprobe_multi_link_attach(const union bpf_attr attr, struct* bpf_prog *prog)
3304	{
3305	return -EOPNOTSUPP;
3306	}
3307	static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx)
3308	{
3309	return `0`;
3310	}
3311	static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx)
3312	{
3313	return `0`;
3314	}
3315	#endif /* CONFIG_UPROBES */
3316
3317	__bpf_kfunc_start_defs();
3318
3319	__bpf_kfunc bool bpf_session_is_return(void)
3320	{
3321	struct bpf_session_run_ctx *session_ctx;
3322
3323	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3324	return session_ctx->is_return;
3325	}
3326
3327	__bpf_kfunc __u64 bpf_session_cookie(void*)
3328	{
3329	struct bpf_session_run_ctx *session_ctx;
3330
3331	session_ctx = container_of(current->bpf_ctx, struct bpf_session_run_ctx, run_ctx);
3332	return session_ctx->data;
3333	}
3334
3335	__bpf_kfunc_end_defs();
3336
3337	BTF_KFUNCS_START(kprobe_multi_kfunc_set_ids)
3338	BTF_ID_FLAGS(func, bpf_session_is_return)
3339	BTF_ID_FLAGS(func, bpf_session_cookie)
3340	BTF_KFUNCS_END(kprobe_multi_kfunc_set_ids)
3341
3342	static int bpf_kprobe_multi_filter(const struct bpf_prog *prog, u32 kfunc_id)
3343	{
3344	if (!btf_id_set8_contains(set: &kprobe_multi_kfunc_set_ids, id: kfunc_id))
3345	return `0`;
3346
3347	if (!is_kprobe_session(prog) && !is_uprobe_session(prog))
3348	return -EACCES;
3349
3350	return `0`;
3351	}
3352
3353	static const struct btf_kfunc_id_set bpf_kprobe_multi_kfunc_set = {
3354	.owner = THIS_MODULE,
3355	.set = &kprobe_multi_kfunc_set_ids,
3356	.filter = bpf_kprobe_multi_filter,
3357	};
3358
3359	static int __init bpf_kprobe_multi_kfuncs_init(void)
3360	{
3361	return register_btf_kfunc_id_set(prog_type: BPF_PROG_TYPE_KPROBE, s: &bpf_kprobe_multi_kfunc_set);
3362	}
3363
3364	late_initcall(bpf_kprobe_multi_kfuncs_init);
3365
3366	typedef int (copy_fn_t)(void* dst, const* void src, u32 size, struct* task_struct *tsk);
3367
3368	/*
3369	* The __always_inline is to make sure the compiler doesn't
3370	* generate indirect calls into callbacks, which is expensive,
3371	* on some kernel configurations. This allows compiler to put
3372	* direct calls into all the specific callback implementations
3373	* (copy_user_data_sleepable, copy_user_data_nofault, and so on)
3374	*/
3375	static __always_inline int __bpf_dynptr_copy_str(struct bpf_dynptr *dptr, u64 doff, u64 size,
3376	const void *unsafe_src,
3377	copy_fn_t str_copy_fn,
3378	struct task_struct *tsk)
3379	{
3380	struct bpf_dynptr_kern *dst;
3381	u64 chunk_sz, off;
3382	void *dst_slice;
3383	int cnt, err;
3384	char buf[`256`];
3385
3386	dst_slice = bpf_dynptr_slice_rdwr(p: dptr, offset: doff, NULL, buffer__szk: size);
3387	if (likely(dst_slice))
3388	return str_copy_fn(dst_slice, unsafe_src, size, tsk);
3389
3390	dst = (struct bpf_dynptr_kern *)dptr;
3391	if (bpf_dynptr_check_off_len(ptr: dst, offset: doff, len: size))
3392	return -E2BIG;
3393
3394	for (off = `0`; off < size; off += chunk_sz - `1`) {
3395	chunk_sz = min_t(u64, sizeof(buf), size - off);
3396	/ Expect str_copy_fn to return count of copied bytes, including*
3397	* zero terminator. Next iteration increment off by chunk_sz - 1 to
3398	* overwrite NUL.
3399	*/
3400	cnt = str_copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3401	if (cnt < `0`)
3402	return cnt;
3403	err = __bpf_dynptr_write(dst, offset: doff + off, src: buf, len: cnt, flags: `0`);
3404	if (err)
3405	return err;
3406	if (cnt < chunk_sz \|\| chunk_sz == `1`) / we are done /
3407	return off + cnt;
3408	}
3409	return off;
3410	}
3411
3412	static __always_inline int __bpf_dynptr_copy(const struct bpf_dynptr *dptr, u64 doff,
3413	u64 size, const void *unsafe_src,
3414	copy_fn_t copy_fn, struct task_struct *tsk)
3415	{
3416	struct bpf_dynptr_kern *dst;
3417	void *dst_slice;
3418	char buf[`256`];
3419	u64 off, chunk_sz;
3420	int err;
3421
3422	dst_slice = bpf_dynptr_slice_rdwr(p: dptr, offset: doff, NULL, buffer__szk: size);
3423	if (likely(dst_slice))
3424	return copy_fn(dst_slice, unsafe_src, size, tsk);
3425
3426	dst = (struct bpf_dynptr_kern *)dptr;
3427	if (bpf_dynptr_check_off_len(ptr: dst, offset: doff, len: size))
3428	return -E2BIG;
3429
3430	for (off = `0`; off < size; off += chunk_sz) {
3431	chunk_sz = min_t(u64, sizeof(buf), size - off);
3432	err = copy_fn(buf, unsafe_src + off, chunk_sz, tsk);
3433	if (err)
3434	return err;
3435	err = __bpf_dynptr_write(dst, offset: doff + off, src: buf, len: chunk_sz, flags: `0`);
3436	if (err)
3437	return err;
3438	}
3439	return `0`;
3440	}
3441
3442	static __always_inline int copy_user_data_nofault(void dst, const* void *unsafe_src,
3443	u32 size, struct task_struct *tsk)
3444	{
3445	return copy_from_user_nofault(dst, src: (const void __user *)unsafe_src, size);
3446	}
3447
3448	static __always_inline int copy_user_data_sleepable(void dst, const* void *unsafe_src,
3449	u32 size, struct task_struct *tsk)
3450	{
3451	int ret;
3452
3453	if (!tsk) { / Read from the current task /
3454	ret = copy_from_user(to: dst, from: (const void __user *)unsafe_src, n: size);
3455	if (ret)
3456	return -EFAULT;
3457	return `0`;
3458	}
3459
3460	ret = access_process_vm(tsk, addr: (unsigned long)unsafe_src, buf: dst, len: size, gup_flags: `0`);
3461	if (ret != size)
3462	return -EFAULT;
3463	return `0`;
3464	}
3465
3466	static __always_inline int copy_kernel_data_nofault(void dst, const* void *unsafe_src,
3467	u32 size, struct task_struct *tsk)
3468	{
3469	return copy_from_kernel_nofault(dst, src: unsafe_src, size);
3470	}
3471
3472	static __always_inline int copy_user_str_nofault(void dst, const* void *unsafe_src,
3473	u32 size, struct task_struct *tsk)
3474	{
3475	return strncpy_from_user_nofault(dst, unsafe_addr: (const void __user *)unsafe_src, count: size);
3476	}
3477
3478	static __always_inline int copy_user_str_sleepable(void dst, const* void *unsafe_src,
3479	u32 size, struct task_struct *tsk)
3480	{
3481	int ret;
3482
3483	if (unlikely(size == `0`))
3484	return `0`;
3485
3486	if (tsk) {
3487	ret = copy_remote_vm_str(tsk, addr: (unsigned long)unsafe_src, buf: dst, len: size, gup_flags: `0`);
3488	} else {
3489	ret = strncpy_from_user(dst, src: (const void __user *)unsafe_src, count: size - `1`);
3490	/ strncpy_from_user does not guarantee NUL termination /
3491	if (ret >= `0`)
3492	((char *)dst)[ret] = `'\0'`;
3493	}
3494
3495	if (ret < `0`)
3496	return ret;
3497	return ret + `1`;
3498	}
3499
3500	static __always_inline int copy_kernel_str_nofault(void dst, const* void *unsafe_src,
3501	u32 size, struct task_struct *tsk)
3502	{
3503	return strncpy_from_kernel_nofault(dst, unsafe_addr: unsafe_src, count: size);
3504	}
3505
3506	__bpf_kfunc_start_defs();
3507
3508	__bpf_kfunc int bpf_send_signal_task(struct task_struct task, int* sig, enum pid_type type,
3509	u64 value)
3510	{
3511	if (type != PIDTYPE_PID && type != PIDTYPE_TGID)
3512	return -EINVAL;
3513
3514	return bpf_send_signal_common(sig, type, task, value);
3515	}
3516
3517	__bpf_kfunc int bpf_probe_read_user_dynptr(struct bpf_dynptr *dptr, u64 off,
3518	u64 size, const void __user *unsafe_ptr__ign)
3519	{
3520	return __bpf_dynptr_copy(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3521	copy_fn: copy_user_data_nofault, NULL);
3522	}
3523
3524	__bpf_kfunc int bpf_probe_read_kernel_dynptr(struct bpf_dynptr *dptr, u64 off,
3525	u64 size, const void *unsafe_ptr__ign)
3526	{
3527	return __bpf_dynptr_copy(dptr, doff: off, size, unsafe_src: unsafe_ptr__ign,
3528	copy_fn: copy_kernel_data_nofault, NULL);
3529	}
3530
3531	__bpf_kfunc int bpf_probe_read_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
3532	u64 size, const void __user *unsafe_ptr__ign)
3533	{
3534	return __bpf_dynptr_copy_str(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3535	str_copy_fn: copy_user_str_nofault, NULL);
3536	}
3537
3538	__bpf_kfunc int bpf_probe_read_kernel_str_dynptr(struct bpf_dynptr *dptr, u64 off,
3539	u64 size, const void *unsafe_ptr__ign)
3540	{
3541	return __bpf_dynptr_copy_str(dptr, doff: off, size, unsafe_src: unsafe_ptr__ign,
3542	str_copy_fn: copy_kernel_str_nofault, NULL);
3543	}
3544
3545	__bpf_kfunc int bpf_copy_from_user_dynptr(struct bpf_dynptr *dptr, u64 off,
3546	u64 size, const void __user *unsafe_ptr__ign)
3547	{
3548	return __bpf_dynptr_copy(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3549	copy_fn: copy_user_data_sleepable, NULL);
3550	}
3551
3552	__bpf_kfunc int bpf_copy_from_user_str_dynptr(struct bpf_dynptr *dptr, u64 off,
3553	u64 size, const void __user *unsafe_ptr__ign)
3554	{
3555	return __bpf_dynptr_copy_str(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3556	str_copy_fn: copy_user_str_sleepable, NULL);
3557	}
3558
3559	__bpf_kfunc int bpf_copy_from_user_task_dynptr(struct bpf_dynptr *dptr, u64 off,
3560	u64 size, const void __user *unsafe_ptr__ign,
3561	struct task_struct *tsk)
3562	{
3563	return __bpf_dynptr_copy(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3564	copy_fn: copy_user_data_sleepable, tsk);
3565	}
3566
3567	__bpf_kfunc int bpf_copy_from_user_task_str_dynptr(struct bpf_dynptr *dptr, u64 off,
3568	u64 size, const void __user *unsafe_ptr__ign,
3569	struct task_struct *tsk)
3570	{
3571	return __bpf_dynptr_copy_str(dptr, doff: off, size, unsafe_src: (const void *)unsafe_ptr__ign,
3572	str_copy_fn: copy_user_str_sleepable, tsk);
3573	}
3574
3575	__bpf_kfunc_end_defs();
3576

source code of linux/kernel/trace/bpf_trace.c