cancel.c source code [linux/io_uring/cancel.c]

1	// SPDX-License-Identifier: GPL-2.0
2	#include <linux/kernel.h>
3	#include <linux/errno.h>
4	#include <linux/fs.h>
5	#include <linux/file.h>
6	#include <linux/mm.h>
7	#include <linux/slab.h>
8	#include <linux/namei.h>
9	#include <linux/nospec.h>
10	#include <linux/io_uring.h>
11
12	#include <uapi/linux/io_uring.h>
13
14	#include "filetable.h"
15	#include "io_uring.h"
16	#include "tctx.h"
17	#include "sqpoll.h"
18	#include "uring_cmd.h"
19	#include "poll.h"
20	#include "timeout.h"
21	#include "waitid.h"
22	#include "futex.h"
23	#include "cancel.h"
24
25	struct io_cancel {
26	struct file *file;
27	u64 addr;
28	u32 flags;
29	s32 fd;
30	u8 opcode;
31	};
32
33	#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL \| IORING_ASYNC_CANCEL_FD \| \
34	IORING_ASYNC_CANCEL_ANY \| IORING_ASYNC_CANCEL_FD_FIXED \| \
35	IORING_ASYNC_CANCEL_USERDATA \| IORING_ASYNC_CANCEL_OP)
36
37	/*
38	* Returns true if the request matches the criteria outlined by 'cd'.
39	*/
40	bool io_cancel_req_match(struct io_kiocb req, struct* io_cancel_data *cd)
41	{
42	bool match_user_data = cd->flags & IORING_ASYNC_CANCEL_USERDATA;
43
44	if (req->ctx != cd->ctx)
45	return false;
46
47	if (!(cd->flags & (IORING_ASYNC_CANCEL_FD \| IORING_ASYNC_CANCEL_OP)))
48	match_user_data = true;
49
50	if (cd->flags & IORING_ASYNC_CANCEL_ANY)
51	goto check_seq;
52	if (cd->flags & IORING_ASYNC_CANCEL_FD) {
53	if (req->file != cd->file)
54	return false;
55	}
56	if (cd->flags & IORING_ASYNC_CANCEL_OP) {
57	if (req->opcode != cd->opcode)
58	return false;
59	}
60	if (match_user_data && req->cqe.user_data != cd->data)
61	return false;
62	if (cd->flags & IORING_ASYNC_CANCEL_ALL) {
63	check_seq:
64	if (io_cancel_match_sequence(req, sequence: cd->seq))
65	return false;
66	}
67
68	return true;
69	}
70
71	static bool io_cancel_cb(struct io_wq_work work, void* *data)
72	{
73	struct io_kiocb req = container_of(work, struct* io_kiocb, work);
74	struct io_cancel_data *cd = data;
75
76	return io_cancel_req_match(req, cd);
77	}
78
79	static int io_async_cancel_one(struct io_uring_task *tctx,
80	struct io_cancel_data *cd)
81	{
82	enum io_wq_cancel cancel_ret;
83	int ret = `0`;
84	bool all;
85
86	if (!tctx \|\| !tctx->io_wq)
87	return -ENOENT;
88
89	all = cd->flags & (IORING_ASYNC_CANCEL_ALL\|IORING_ASYNC_CANCEL_ANY);
90	cancel_ret = io_wq_cancel_cb(wq: tctx->io_wq, cancel: io_cancel_cb, data: cd, cancel_all: all);
91	switch (cancel_ret) {
92	case IO_WQ_CANCEL_OK:
93	ret = `0`;
94	break;
95	case IO_WQ_CANCEL_RUNNING:
96	ret = -EALREADY;
97	break;
98	case IO_WQ_CANCEL_NOTFOUND:
99	ret = -ENOENT;
100	break;
101	}
102
103	return ret;
104	}
105
106	int io_try_cancel(struct io_uring_task tctx, struct* io_cancel_data *cd,
107	unsigned issue_flags)
108	{
109	struct io_ring_ctx *ctx = cd->ctx;
110	int ret;
111
112	WARN_ON_ONCE(!io_wq_current_is_worker() && tctx != current->io_uring);
113
114	ret = io_async_cancel_one(tctx, cd);
115	/*
116	* Fall-through even for -EALREADY, as we may have poll armed
117	* that need unarming.
118	*/
119	if (!ret)
120	return `0`;
121
122	ret = io_poll_cancel(ctx, cd, issue_flags);
123	if (ret != -ENOENT)
124	return ret;
125
126	ret = io_waitid_cancel(ctx, cd, issue_flags);
127	if (ret != -ENOENT)
128	return ret;
129
130	ret = io_futex_cancel(ctx, cd, issue_flags);
131	if (ret != -ENOENT)
132	return ret;
133
134	spin_lock(lock: &ctx->completion_lock);
135	if (!(cd->flags & IORING_ASYNC_CANCEL_FD))
136	ret = io_timeout_cancel(ctx, cd);
137	spin_unlock(lock: &ctx->completion_lock);
138	return ret;
139	}
140
141	int io_async_cancel_prep(struct io_kiocb req, const* struct io_uring_sqe *sqe)
142	{
143	struct io_cancel cancel = io_kiocb_to_cmd(req, struct* io_cancel);
144
145	if (unlikely(req->flags & REQ_F_BUFFER_SELECT))
146	return -EINVAL;
147	if (sqe->off \|\| sqe->splice_fd_in)
148	return -EINVAL;
149
150	cancel->addr = READ_ONCE(sqe->addr);
151	cancel->flags = READ_ONCE(sqe->cancel_flags);
152	if (cancel->flags & ~CANCEL_FLAGS)
153	return -EINVAL;
154	if (cancel->flags & IORING_ASYNC_CANCEL_FD) {
155	if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
156	return -EINVAL;
157	cancel->fd = READ_ONCE(sqe->fd);
158	}
159	if (cancel->flags & IORING_ASYNC_CANCEL_OP) {
160	if (cancel->flags & IORING_ASYNC_CANCEL_ANY)
161	return -EINVAL;
162	cancel->opcode = READ_ONCE(sqe->len);
163	}
164
165	return `0`;
166	}
167
168	static int __io_async_cancel(struct io_cancel_data *cd,
169	struct io_uring_task *tctx,
170	unsigned int issue_flags)
171	{
172	bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL\|IORING_ASYNC_CANCEL_ANY);
173	struct io_ring_ctx *ctx = cd->ctx;
174	struct io_tctx_node *node;
175	int ret, nr = `0`;
176
177	do {
178	ret = io_try_cancel(tctx, cd, issue_flags);
179	if (ret == -ENOENT)
180	break;
181	if (!all)
182	return ret;
183	nr++;
184	} while (`1`);
185
186	/ slow path, try all io-wq's /
187	__set_current_state(TASK_RUNNING);
188	io_ring_submit_lock(ctx, issue_flags);
189	mutex_lock(&ctx->tctx_lock);
190	ret = -ENOENT;
191	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
192	ret = io_async_cancel_one(tctx: node->task->io_uring, cd);
193	if (ret != -ENOENT) {
194	if (!all)
195	break;
196	nr++;
197	}
198	}
199	mutex_unlock(lock: &ctx->tctx_lock);
200	io_ring_submit_unlock(ctx, issue_flags);
201	return all ? nr : ret;
202	}
203
204	int io_async_cancel(struct io_kiocb req, unsigned* int issue_flags)
205	{
206	struct io_cancel cancel = io_kiocb_to_cmd(req, struct* io_cancel);
207	struct io_cancel_data cd = {
208	.ctx = req->ctx,
209	.data = cancel->addr,
210	.flags = cancel->flags,
211	.opcode = cancel->opcode,
212	.seq = atomic_inc_return(v: &req->ctx->cancel_seq),
213	};
214	struct io_uring_task *tctx = req->tctx;
215	int ret;
216
217	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
218	if (req->flags & REQ_F_FIXED_FILE \|\|
219	cd.flags & IORING_ASYNC_CANCEL_FD_FIXED) {
220	req->flags \|= REQ_F_FIXED_FILE;
221	req->file = io_file_get_fixed(req, fd: cancel->fd,
222	issue_flags);
223	} else {
224	req->file = io_file_get_normal(req, fd: cancel->fd);
225	}
226	if (!req->file) {
227	ret = -EBADF;
228	goto done;
229	}
230	cd.file = req->file;
231	}
232
233	ret = __io_async_cancel(cd: &cd, tctx, issue_flags);
234	done:
235	if (ret < `0`)
236	req_set_fail(req);
237	io_req_set_res(req, res: ret, cflags: `0`);
238	return IOU_COMPLETE;
239	}
240
241	static int __io_sync_cancel(struct io_uring_task *tctx,
242	struct io_cancel_data cd, int* fd)
243	{
244	struct io_ring_ctx *ctx = cd->ctx;
245
246	/ fixed must be grabbed every time since we drop the uring_lock /
247	if ((cd->flags & IORING_ASYNC_CANCEL_FD) &&
248	(cd->flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
249	struct io_rsrc_node *node;
250
251	node = io_rsrc_node_lookup(data: &ctx->file_table.data, index: fd);
252	if (unlikely(!node))
253	return -EBADF;
254	cd->file = io_slot_file(node);
255	if (!cd->file)
256	return -EBADF;
257	}
258
259	return __io_async_cancel(cd, tctx, issue_flags: `0`);
260	}
261
262	int io_sync_cancel(struct io_ring_ctx ctx, void* __user *arg)
263	__must_hold(&ctx->uring_lock)
264	{
265	struct io_cancel_data cd = {
266	.ctx = ctx,
267	.seq = atomic_inc_return(v: &ctx->cancel_seq),
268	};
269	ktime_t timeout = KTIME_MAX;
270	struct io_uring_sync_cancel_reg sc;
271	struct file *file = NULL;
272	DEFINE_WAIT(wait);
273	int ret, i;
274
275	if (copy_from_user(to: &sc, from: arg, n: sizeof(sc)))
276	return -EFAULT;
277	if (sc.flags & ~CANCEL_FLAGS)
278	return -EINVAL;
279	for (i = `0`; i < ARRAY_SIZE(sc.pad); i++)
280	if (sc.pad[i])
281	return -EINVAL;
282	for (i = `0`; i < ARRAY_SIZE(sc.pad2); i++)
283	if (sc.pad2[i])
284	return -EINVAL;
285
286	cd.data = sc.addr;
287	cd.flags = sc.flags;
288	cd.opcode = sc.opcode;
289
290	/ we can grab a normal file descriptor upfront /
291	if ((cd.flags & IORING_ASYNC_CANCEL_FD) &&
292	!(cd.flags & IORING_ASYNC_CANCEL_FD_FIXED)) {
293	file = fget(fd: sc.fd);
294	if (!file)
295	return -EBADF;
296	cd.file = file;
297	}
298
299	ret = __io_sync_cancel(current->io_uring, cd: &cd, fd: sc.fd);
300
301	/ found something, done! /
302	if (ret != -EALREADY)
303	goto out;
304
305	if (sc.timeout.tv_sec != -`1UL` \|\| sc.timeout.tv_nsec != -`1UL`) {
306	struct timespec64 ts = {
307	.tv_sec = sc.timeout.tv_sec,
308	.tv_nsec = sc.timeout.tv_nsec
309	};
310
311	timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
312	}
313
314	/*
315	* Keep looking until we get -ENOENT. we'll get woken everytime
316	* every time a request completes and will retry the cancelation.
317	*/
318	do {
319	cd.seq = atomic_inc_return(v: &ctx->cancel_seq);
320
321	prepare_to_wait(wq_head: &ctx->cq_wait, wq_entry: &wait, TASK_INTERRUPTIBLE);
322
323	ret = __io_sync_cancel(current->io_uring, cd: &cd, fd: sc.fd);
324
325	mutex_unlock(lock: &ctx->uring_lock);
326	if (ret != -EALREADY)
327	break;
328
329	ret = io_run_task_work_sig(ctx);
330	if (ret < `0`)
331	break;
332	ret = schedule_hrtimeout(expires: &timeout, mode: HRTIMER_MODE_ABS);
333	if (!ret) {
334	ret = -ETIME;
335	break;
336	}
337	mutex_lock(&ctx->uring_lock);
338	} while (`1`);
339
340	finish_wait(wq_head: &ctx->cq_wait, wq_entry: &wait);
341	mutex_lock(&ctx->uring_lock);
342
343	if (ret == -ENOENT \|\| ret > `0`)
344	ret = `0`;
345	out:
346	if (file)
347	fput(file);
348	return ret;
349	}
350
351	bool io_cancel_remove_all(struct io_ring_ctx ctx, struct* io_uring_task *tctx,
352	struct hlist_head *list, bool cancel_all,
353	bool (cancel)(struct* io_kiocb *))
354	{
355	struct hlist_node *tmp;
356	struct io_kiocb *req;
357	bool found = false;
358
359	lockdep_assert_held(&ctx->uring_lock);
360
361	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
362	if (!io_match_task_safe(head: req, tctx, cancel_all))
363	continue;
364	hlist_del_init(n: &req->hash_node);
365	if (cancel(req))
366	found = true;
367	}
368
369	return found;
370	}
371
372	int io_cancel_remove(struct io_ring_ctx ctx, struct* io_cancel_data *cd,
373	unsigned int issue_flags, struct hlist_head *list,
374	bool (cancel)(struct* io_kiocb *))
375	{
376	struct hlist_node *tmp;
377	struct io_kiocb *req;
378	int nr = `0`;
379
380	io_ring_submit_lock(ctx, issue_flags);
381	hlist_for_each_entry_safe(req, tmp, list, hash_node) {
382	if (!io_cancel_req_match(req, cd))
383	continue;
384	if (cancel(req))
385	nr++;
386	if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
387	break;
388	}
389	io_ring_submit_unlock(ctx, issue_flags);
390	return nr ?: -ENOENT;
391	}
392
393	static bool io_match_linked(struct io_kiocb *head)
394	{
395	struct io_kiocb *req;
396
397	io_for_each_link(req, head) {
398	if (req->flags & REQ_F_INFLIGHT)
399	return true;
400	}
401	return false;
402	}
403
404	/*
405	* As io_match_task() but protected against racing with linked timeouts.
406	* User must not hold timeout_lock.
407	*/
408	bool io_match_task_safe(struct io_kiocb head, struct* io_uring_task *tctx,
409	bool cancel_all)
410	{
411	bool matched;
412
413	if (tctx && head->tctx != tctx)
414	return false;
415	if (cancel_all)
416	return true;
417
418	if (head->flags & REQ_F_LINK_TIMEOUT) {
419	struct io_ring_ctx *ctx = head->ctx;
420
421	/ protect against races with linked timeouts /
422	raw_spin_lock_irq(&ctx->timeout_lock);
423	matched = io_match_linked(head);
424	raw_spin_unlock_irq(&ctx->timeout_lock);
425	} else {
426	matched = io_match_linked(head);
427	}
428	return matched;
429	}
430
431	void __io_uring_cancel(bool cancel_all)
432	{
433	io_uring_unreg_ringfd();
434	io_uring_cancel_generic(cancel_all, NULL);
435	}
436
437	struct io_task_cancel {
438	struct io_uring_task *tctx;
439	bool all;
440	};
441
442	static bool io_cancel_task_cb(struct io_wq_work work, void* *data)
443	{
444	struct io_kiocb req = container_of(work, struct* io_kiocb, work);
445	struct io_task_cancel *cancel = data;
446
447	return io_match_task_safe(head: req, tctx: cancel->tctx, cancel_all: cancel->all);
448	}
449
450	static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
451	struct io_uring_task *tctx,
452	bool cancel_all)
453	{
454	struct io_defer_entry *de;
455	LIST_HEAD(list);
456
457	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
458	if (io_match_task_safe(head: de->req, tctx, cancel_all)) {
459	list_cut_position(list: &list, head: &ctx->defer_list, entry: &de->list);
460	break;
461	}
462	}
463	if (list_empty(head: &list))
464	return false;
465
466	while (!list_empty(head: &list)) {
467	de = list_first_entry(&list, struct io_defer_entry, list);
468	list_del_init(entry: &de->list);
469	ctx->nr_drained -= io_linked_nr(req: de->req);
470	io_req_task_queue_fail(req: de->req, ret: -ECANCELED);
471	kfree(objp: de);
472	}
473	return true;
474	}
475
476	__cold bool io_cancel_ctx_cb(struct io_wq_work work, void* *data)
477	{
478	struct io_kiocb req = container_of(work, struct* io_kiocb, work);
479
480	return req->ctx == data;
481	}
482
483	static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
484	{
485	struct io_tctx_node *node;
486	enum io_wq_cancel cret;
487	bool ret = false;
488
489	mutex_lock(&ctx->uring_lock);
490	mutex_lock(&ctx->tctx_lock);
491	list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
492	struct io_uring_task *tctx = node->task->io_uring;
493
494	/*
495	* io_wq will stay alive while we hold uring_lock, because it's
496	* killed after ctx nodes, which requires to take the lock.
497	*/
498	if (!tctx \|\| !tctx->io_wq)
499	continue;
500	cret = io_wq_cancel_cb(wq: tctx->io_wq, cancel: io_cancel_ctx_cb, data: ctx, cancel_all: true);
501	ret \|= (cret != IO_WQ_CANCEL_NOTFOUND);
502	}
503	mutex_unlock(lock: &ctx->tctx_lock);
504	mutex_unlock(lock: &ctx->uring_lock);
505
506	return ret;
507	}
508
509	__cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
510	struct io_uring_task *tctx,
511	bool cancel_all, bool is_sqpoll_thread)
512	{
513	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
514	enum io_wq_cancel cret;
515	bool ret = false;
516
517	/ set it so io_req_local_work_add() would wake us up /
518	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
519	atomic_set(v: &ctx->cq_wait_nr, i: `1`);
520	smp_mb();
521	}
522
523	/ failed during ring init, it couldn't have issued any requests /
524	if (!ctx->rings)
525	return false;
526
527	if (!tctx) {
528	ret \|= io_uring_try_cancel_iowq(ctx);
529	} else if (tctx->io_wq) {
530	/*
531	* Cancels requests of all rings, not only @ctx, but
532	* it's fine as the task is in exit/exec.
533	*/
534	cret = io_wq_cancel_cb(wq: tctx->io_wq, cancel: io_cancel_task_cb,
535	data: &cancel, cancel_all: true);
536	ret \|= (cret != IO_WQ_CANCEL_NOTFOUND);
537	}
538
539	/ SQPOLL thread does its own polling /
540	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) \|\|
541	is_sqpoll_thread) {
542	while (!wq_list_empty(&ctx->iopoll_list)) {
543	io_iopoll_try_reap_events(ctx);
544	ret = true;
545	cond_resched();
546	}
547	}
548
549	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
550	io_allowed_defer_tw_run(ctx))
551	ret \|= io_run_local_work(ctx, INT_MAX, INT_MAX) > `0`;
552	mutex_lock(&ctx->uring_lock);
553	ret \|= io_cancel_defer_files(ctx, tctx, cancel_all);
554	ret \|= io_poll_remove_all(ctx, tctx, cancel_all);
555	ret \|= io_waitid_remove_all(ctx, tctx, cancel_all);
556	ret \|= io_futex_remove_all(ctx, tctx, cancel_all);
557	ret \|= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
558	mutex_unlock(lock: &ctx->uring_lock);
559	ret \|= io_kill_timeouts(ctx, tctx, cancel_all);
560	if (tctx)
561	ret \|= io_run_task_work() > `0`;
562	else
563	ret \|= flush_delayed_work(dwork: &ctx->fallback_work);
564	return ret;
565	}
566
567	static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked)
568	{
569	if (tracked)
570	return atomic_read(v: &tctx->inflight_tracked);
571	return percpu_counter_sum(fbc: &tctx->inflight);
572	}
573
574	/*
575	* Find any io_uring ctx that this task has registered or done IO on, and cancel
576	* requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation.
577	*/
578	__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
579	{
580	struct io_uring_task *tctx = current->io_uring;
581	struct io_ring_ctx *ctx;
582	struct io_tctx_node *node;
583	unsigned long index;
584	s64 inflight;
585	DEFINE_WAIT(wait);
586
587	WARN_ON_ONCE(sqd && sqpoll_task_locked(sqd) != current);
588
589	if (!current->io_uring)
590	return;
591	if (tctx->io_wq)
592	io_wq_exit_start(wq: tctx->io_wq);
593
594	atomic_inc(v: &tctx->in_cancel);
595	do {
596	bool loop = false;
597
598	io_uring_drop_tctx_refs(current);
599	if (!tctx_inflight(tctx, tracked: !cancel_all))
600	break;
601
602	/ read completions before cancelations /
603	inflight = tctx_inflight(tctx, tracked: false);
604	if (!inflight)
605	break;
606
607	if (!sqd) {
608	xa_for_each(&tctx->xa, index, node) {
609	/ sqpoll task will cancel all its requests /
610	if (node->ctx->sq_data)
611	continue;
612	loop \|= io_uring_try_cancel_requests(ctx: node->ctx,
613	current->io_uring,
614	cancel_all,
615	is_sqpoll_thread: false);
616	}
617	} else {
618	list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
619	loop \|= io_uring_try_cancel_requests(ctx,
620	current->io_uring,
621	cancel_all,
622	is_sqpoll_thread: true);
623	}
624
625	if (loop) {
626	cond_resched();
627	continue;
628	}
629
630	prepare_to_wait(wq_head: &tctx->wait, wq_entry: &wait, TASK_INTERRUPTIBLE);
631	io_run_task_work();
632	io_uring_drop_tctx_refs(current);
633	xa_for_each(&tctx->xa, index, node) {
634	if (io_local_work_pending(ctx: node->ctx)) {
635	WARN_ON_ONCE(node->ctx->submitter_task &&
636	node->ctx->submitter_task != current);
637	goto end_wait;
638	}
639	}
640	/*
641	* If we've seen completions, retry without waiting. This
642	* avoids a race where a completion comes in before we did
643	* prepare_to_wait().
644	*/
645	if (inflight == tctx_inflight(tctx, tracked: !cancel_all))
646	schedule();
647	end_wait:
648	finish_wait(wq_head: &tctx->wait, wq_entry: &wait);
649	} while (`1`);
650
651	io_uring_clean_tctx(tctx);
652	if (cancel_all) {
653	/*
654	* We shouldn't run task_works after cancel, so just leave
655	* ->in_cancel set for normal exit.
656	*/
657	atomic_dec(v: &tctx->in_cancel);
658	/ for exec all current's requests should be gone, kill tctx /
659	__io_uring_free(current);
660	}
661	}
662

source code of linux/io_uring/cancel.c