i915_request.c source code [linux/drivers/gpu/drm/i915/i915_request.c]

1	/*
2	* Copyright © 2008-2015 Intel Corporation
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/dma-fence-array.h>
26	#include <linux/dma-fence-chain.h>
27	#include <linux/irq_work.h>
28	#include <linux/prefetch.h>
29	#include <linux/sched.h>
30	#include <linux/sched/clock.h>
31	#include <linux/sched/signal.h>
32	#include <linux/sched/mm.h>
33
34	#include <drm/drm_print.h>
35
36	#include "gem/i915_gem_context.h"
37	#include "gt/intel_breadcrumbs.h"
38	#include "gt/intel_context.h"
39	#include "gt/intel_engine.h"
40	#include "gt/intel_engine_heartbeat.h"
41	#include "gt/intel_engine_regs.h"
42	#include "gt/intel_gpu_commands.h"
43	#include "gt/intel_reset.h"
44	#include "gt/intel_ring.h"
45	#include "gt/intel_rps.h"
46
47	#include "i915_active.h"
48	#include "i915_config.h"
49	#include "i915_deps.h"
50	#include "i915_driver.h"
51	#include "i915_drv.h"
52	#include "i915_trace.h"
53
54	struct execute_cb {
55	struct irq_work work;
56	struct i915_sw_fence *fence;
57	};
58
59	static struct kmem_cache *slab_requests;
60	static struct kmem_cache *slab_execute_cbs;
61
62	static const char i915_fence_get_driver_name(struct* dma_fence *fence)
63	{
64	return dev_name(dev: to_request(fence)->i915->drm.dev);
65	}
66
67	static const char i915_fence_get_timeline_name(struct* dma_fence *fence)
68	{
69	const struct i915_gem_context *ctx;
70
71	/*
72	* The timeline struct (as part of the ppgtt underneath a context)
73	* may be freed when the request is no longer in use by the GPU.
74	* We could extend the life of a context to beyond that of all
75	* fences, possibly keeping the hw resource around indefinitely,
76	* or we just give them a false name. Since
77	* dma_fence_ops.get_timeline_name is a debug feature, the occasional
78	* lie seems justifiable.
79	*/
80	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
81	return "signaled";
82
83	ctx = i915_request_gem_context(rq: to_request(fence));
84	if (!ctx)
85	return "[" DRIVER_NAME "]";
86
87	return ctx->name;
88	}
89
90	static bool i915_fence_signaled(struct dma_fence *fence)
91	{
92	return i915_request_completed(rq: to_request(fence));
93	}
94
95	static bool i915_fence_enable_signaling(struct dma_fence *fence)
96	{
97	return i915_request_enable_breadcrumb(request: to_request(fence));
98	}
99
100	static signed long i915_fence_wait(struct dma_fence *fence,
101	bool interruptible,
102	signed long timeout)
103	{
104	return i915_request_wait_timeout(rq: to_request(fence),
105	flags: interruptible \| I915_WAIT_PRIORITY,
106	timeout);
107	}
108
109	struct kmem_cache i915_request_slab_cache(void*)
110	{
111	return slab_requests;
112	}
113
114	static void i915_fence_release(struct dma_fence *fence)
115	{
116	struct i915_request *rq = to_request(fence);
117
118	GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
119	rq->guc_prio != GUC_PRIO_FINI);
120
121	i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
122	if (rq->batch_res) {
123	i915_vma_resource_put(vma_res: rq->batch_res);
124	rq->batch_res = NULL;
125	}
126
127	/*
128	* The request is put onto a RCU freelist (i.e. the address
129	* is immediately reused), mark the fences as being freed now.
130	* Otherwise the debugobjects for the fences are only marked as
131	* freed when the slab cache itself is freed, and so we would get
132	* caught trying to reuse dead objects.
133	*/
134	i915_sw_fence_fini(fence: &rq->submit);
135	i915_sw_fence_fini(fence: &rq->semaphore);
136
137	/*
138	* Keep one request on each engine for reserved use under mempressure.
139	*
140	* We do not hold a reference to the engine here and so have to be
141	* very careful in what rq->engine we poke. The virtual engine is
142	* referenced via the rq->context and we released that ref during
143	* i915_request_retire(), ergo we must not dereference a virtual
144	* engine here. Not that we would want to, as the only consumer of
145	* the reserved engine->request_pool is the power management parking,
146	* which must-not-fail, and that is only run on the physical engines.
147	*
148	* Since the request must have been executed to be have completed,
149	* we know that it will have been processed by the HW and will
150	* not be unsubmitted again, so rq->engine and rq->execution_mask
151	* at this point is stable. rq->execution_mask will be a single
152	* bit if the last and _only_ engine it could execution on was a
153	* physical engine, if it's multiple bits then it started on and
154	* could still be on a virtual engine. Thus if the mask is not a
155	* power-of-two we assume that rq->engine may still be a virtual
156	* engine and so a dangling invalid pointer that we cannot dereference
157	*
158	* For example, consider the flow of a bonded request through a virtual
159	* engine. The request is created with a wide engine mask (all engines
160	* that we might execute on). On processing the bond, the request mask
161	* is reduced to one or more engines. If the request is subsequently
162	* bound to a single engine, it will then be constrained to only
163	* execute on that engine and never returned to the virtual engine
164	* after timeslicing away, see __unwind_incomplete_requests(). Thus we
165	* know that if the rq->execution_mask is a single bit, rq->engine
166	* can be a physical engine with the exact corresponding mask.
167	*/
168	if (is_power_of_2(n: rq->execution_mask) &&
169	!cmpxchg(&rq->engine->request_pool, NULL, rq))
170	return;
171
172	kmem_cache_free(s: slab_requests, objp: rq);
173	}
174
175	const struct dma_fence_ops i915_fence_ops = {
176	.get_driver_name = i915_fence_get_driver_name,
177	.get_timeline_name = i915_fence_get_timeline_name,
178	.enable_signaling = i915_fence_enable_signaling,
179	.signaled = i915_fence_signaled,
180	.wait = i915_fence_wait,
181	.release = i915_fence_release,
182	};
183
184	static void irq_execute_cb(struct irq_work *wrk)
185	{
186	struct execute_cb cb = container_of(wrk, typeof(cb), work);
187
188	i915_sw_fence_complete(fence: cb->fence);
189	kmem_cache_free(s: slab_execute_cbs, objp: cb);
190	}
191
192	static __always_inline void
193	__notify_execute_cb(struct i915_request rq, bool (fn)(struct irq_work *wrk))
194	{
195	struct execute_cb cb, cn;
196
197	if (llist_empty(head: &rq->execute_cb))
198	return;
199
200	llist_for_each_entry_safe(cb, cn,
201	llist_del_all(&rq->execute_cb),
202	work.node.llist)
203	fn(&cb->work);
204	}
205
206	static void __notify_execute_cb_irq(struct i915_request *rq)
207	{
208	__notify_execute_cb(rq, fn: irq_work_queue);
209	}
210
211	static bool irq_work_imm(struct irq_work *wrk)
212	{
213	wrk->func(wrk);
214	return false;
215	}
216
217	void i915_request_notify_execute_cb_imm(struct i915_request *rq)
218	{
219	__notify_execute_cb(rq, fn: irq_work_imm);
220	}
221
222	static void __i915_request_fill(struct i915_request *rq, u8 val)
223	{
224	void *vaddr = rq->ring->vaddr;
225	u32 head;
226
227	head = rq->infix;
228	if (rq->postfix < head) {
229	memset(vaddr + head, val, rq->ring->size - head);
230	head = `0`;
231	}
232	memset(vaddr + head, val, rq->postfix - head);
233	}
234
235	/**
236	* i915_request_active_engine
237	* @rq: request to inspect
238	* @active: pointer in which to return the active engine
239	*
240	* Fills the currently active engine to the @active pointer if the request
241	* is active and still not completed.
242	*
243	* Returns true if request was active or false otherwise.
244	*/
245	bool
246	i915_request_active_engine(struct i915_request *rq,
247	struct intel_engine_cs **active)
248	{
249	struct intel_engine_cs engine, locked;
250	bool ret = false;
251
252	/*
253	* Serialise with __i915_request_submit() so that it sees
254	* is-banned?, or we know the request is already inflight.
255	*
256	* Note that rq->engine is unstable, and so we double
257	* check that we have acquired the lock on the final engine.
258	*/
259	locked = READ_ONCE(rq->engine);
260	spin_lock_irq(lock: &locked->sched_engine->lock);
261	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
262	spin_unlock(lock: &locked->sched_engine->lock);
263	locked = engine;
264	spin_lock(lock: &locked->sched_engine->lock);
265	}
266
267	if (i915_request_is_active(rq)) {
268	if (!__i915_request_is_complete(rq))
269	*active = locked;
270	ret = true;
271	}
272
273	spin_unlock_irq(lock: &locked->sched_engine->lock);
274
275	return ret;
276	}
277
278	static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
279	{
280	struct i915_request *rq =
281	container_of(hrtimer, struct i915_request, watchdog.timer);
282	struct intel_gt *gt = rq->engine->gt;
283
284	if (!i915_request_completed(rq)) {
285	if (llist_add(new: &rq->watchdog.link, head: &gt->watchdog.list))
286	queue_work(wq: gt->i915->unordered_wq, work: &gt->watchdog.work);
287	} else {
288	i915_request_put(rq);
289	}
290
291	return HRTIMER_NORESTART;
292	}
293
294	static void __rq_init_watchdog(struct i915_request *rq)
295	{
296	struct i915_request_watchdog *wdg = &rq->watchdog;
297
298	hrtimer_setup(timer: &wdg->timer, function: __rq_watchdog_expired, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL);
299	}
300
301	static void __rq_arm_watchdog(struct i915_request *rq)
302	{
303	struct i915_request_watchdog *wdg = &rq->watchdog;
304	struct intel_context *ce = rq->context;
305
306	if (!ce->watchdog.timeout_us)
307	return;
308
309	i915_request_get(rq);
310
311	hrtimer_start_range_ns(timer: &wdg->timer,
312	tim: ns_to_ktime(ns: ce->watchdog.timeout_us *
313	NSEC_PER_USEC),
314	NSEC_PER_MSEC,
315	mode: HRTIMER_MODE_REL);
316	}
317
318	static void __rq_cancel_watchdog(struct i915_request *rq)
319	{
320	struct i915_request_watchdog *wdg = &rq->watchdog;
321
322	if (hrtimer_try_to_cancel(timer: &wdg->timer) > `0`)
323	i915_request_put(rq);
324	}
325
326	#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
327
328	/**
329	* i915_request_free_capture_list - Free a capture list
330	* @capture: Pointer to the first list item or NULL
331	*
332	*/
333	void i915_request_free_capture_list(struct i915_capture_list *capture)
334	{
335	while (capture) {
336	struct i915_capture_list *next = capture->next;
337
338	i915_vma_resource_put(vma_res: capture->vma_res);
339	kfree(objp: capture);
340	capture = next;
341	}
342	}
343
344	#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list)
345
346	#define clear_capture_list(_rq) ((_rq)->capture_list = NULL)
347
348	#else
349
350	#define i915_request_free_capture_list(_a) do {} while (0)
351
352	#define assert_capture_list_is_null(_a) do {} while (0)
353
354	#define clear_capture_list(_rq) do {} while (0)
355
356	#endif
357
358	bool i915_request_retire(struct i915_request *rq)
359	{
360	if (!__i915_request_is_complete(rq))
361	return false;
362
363	RQ_TRACE(rq, "\n");
364
365	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
366	trace_i915_request_retire(rq);
367	i915_request_mark_complete(rq);
368
369	__rq_cancel_watchdog(rq);
370
371	/*
372	* We know the GPU must have read the request to have
373	* sent us the seqno + interrupt, so use the position
374	* of tail of the request to update the last known position
375	* of the GPU head.
376	*
377	* Note this requires that we are always called in request
378	* completion order.
379	*/
380	GEM_BUG_ON(!list_is_first(&rq->link,
381	&i915_request_timeline(rq)->requests));
382	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
383	/ Poison before we release our space in the ring /
384	__i915_request_fill(rq, POISON_FREE);
385	rq->ring->head = rq->postfix;
386
387	if (!i915_request_signaled(rq)) {
388	spin_lock_irq(lock: &rq->lock);
389	dma_fence_signal_locked(fence: &rq->fence);
390	spin_unlock_irq(lock: &rq->lock);
391	}
392
393	if (test_and_set_bit(nr: I915_FENCE_FLAG_BOOST, addr: &rq->fence.flags))
394	intel_rps_dec_waiters(rps: &rq->engine->gt->rps);
395
396	/*
397	* We only loosely track inflight requests across preemption,
398	* and so we may find ourselves attempting to retire a _completed_
399	* request that we have removed from the HW and put back on a run
400	* queue.
401	*
402	* As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
403	* after removing the breadcrumb and signaling it, so that we do not
404	* inadvertently attach the breadcrumb to a completed request.
405	*/
406	rq->engine->remove_active_request(rq);
407	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
408
409	__list_del_entry(entry: &rq->link); / poison neither prev/next (RCU walks) /
410
411	intel_context_exit(ce: rq->context);
412	intel_context_unpin(ce: rq->context);
413
414	i915_sched_node_fini(node: &rq->sched);
415	i915_request_put(rq);
416
417	return true;
418	}
419
420	void i915_request_retire_upto(struct i915_request *rq)
421	{
422	struct intel_timeline * const tl = i915_request_timeline(rq);
423	struct i915_request *tmp;
424
425	RQ_TRACE(rq, "\n");
426	GEM_BUG_ON(!__i915_request_is_complete(rq));
427
428	do {
429	tmp = list_first_entry(&tl->requests, typeof(*tmp), link);
430	GEM_BUG_ON(!i915_request_completed(tmp));
431	} while (i915_request_retire(rq: tmp) && tmp != rq);
432	}
433
434	static struct i915_request * const *
435	__engine_active(struct intel_engine_cs *engine)
436	{
437	return READ_ONCE(engine->execlists.active);
438	}
439
440	static bool __request_in_flight(const struct i915_request *signal)
441	{
442	struct i915_request * const port, rq;
443	bool inflight = false;
444
445	if (!i915_request_is_ready(rq: signal))
446	return false;
447
448	/*
449	* Even if we have unwound the request, it may still be on
450	* the GPU (preempt-to-busy). If that request is inside an
451	* unpreemptible critical section, it will not be removed. Some
452	* GPU functions may even be stuck waiting for the paired request
453	* (__await_execution) to be submitted and cannot be preempted
454	* until the bond is executing.
455	*
456	* As we know that there are always preemption points between
457	* requests, we know that only the currently executing request
458	* may be still active even though we have cleared the flag.
459	* However, we can't rely on our tracking of ELSP[0] to know
460	* which request is currently active and so maybe stuck, as
461	* the tracking maybe an event behind. Instead assume that
462	* if the context is still inflight, then it is still active
463	* even if the active flag has been cleared.
464	*
465	* To further complicate matters, if there a pending promotion, the HW
466	* may either perform a context switch to the second inflight execlists,
467	* or it may switch to the pending set of execlists. In the case of the
468	* latter, it may send the ACK and we process the event copying the
469	* pending[] over top of inflight[], _overwriting_ our *active. Since
470	* this implies the HW is arbitrating and not struck in *active, we do
471	* not worry about complete accuracy, but we do require no read/write
472	* tearing of the pointer [the read of the pointer must be valid, even
473	* as the array is being overwritten, for which we require the writes
474	* to avoid tearing.]
475	*
476	* Note that the read of *execlists->active may race with the promotion
477	* of execlists->pending[] to execlists->inflight[], overwriting
478	* the value at *execlists->active. This is fine. The promotion implies
479	* that we received an ACK from the HW, and so the context is not
480	* stuck -- if we do not see ourselves in *active, the inflight status
481	* is valid. If instead we see ourselves being copied into *active,
482	* we are inflight and may signal the callback.
483	*/
484	if (!intel_context_inflight(signal->context))
485	return false;
486
487	rcu_read_lock();
488	for (port = __engine_active(engine: signal->engine);
489	(rq = READ_ONCE(port)); /* may race with promotion of pending[] /
490	port++) {
491	if (rq->context == signal->context) {
492	inflight = i915_seqno_passed(seq1: rq->fence.seqno,
493	seq2: signal->fence.seqno);
494	break;
495	}
496	}
497	rcu_read_unlock();
498
499	return inflight;
500	}
501
502	static int
503	__await_execution(struct i915_request *rq,
504	struct i915_request *signal,
505	gfp_t gfp)
506	{
507	struct execute_cb *cb;
508
509	if (i915_request_is_active(rq: signal))
510	return `0`;
511
512	cb = kmem_cache_alloc(slab_execute_cbs, gfp);
513	if (!cb)
514	return -ENOMEM;
515
516	cb->fence = &rq->submit;
517	i915_sw_fence_await(fence: cb->fence);
518	init_irq_work(work: &cb->work, func: irq_execute_cb);
519
520	/*
521	* Register the callback first, then see if the signaler is already
522	* active. This ensures that if we race with the
523	* __notify_execute_cb from i915_request_submit() and we are not
524	* included in that list, we get a second bite of the cherry and
525	* execute it ourselves. After this point, a future
526	* i915_request_submit() will notify us.
527	*
528	* In i915_request_retire() we set the ACTIVE bit on a completed
529	* request (then flush the execute_cb). So by registering the
530	* callback first, then checking the ACTIVE bit, we serialise with
531	* the completed/retired request.
532	*/
533	if (llist_add(new: &cb->work.node.llist, head: &signal->execute_cb)) {
534	if (i915_request_is_active(rq: signal) \|\|
535	__request_in_flight(signal))
536	i915_request_notify_execute_cb_imm(rq: signal);
537	}
538
539	return `0`;
540	}
541
542	static bool fatal_error(int error)
543	{
544	switch (error) {
545	case `0`: / not an error! /
546	case -EAGAIN: / innocent victim of a GT reset (__i915_request_reset) /
547	case -ETIMEDOUT: / waiting for Godot (timer_i915_sw_fence_wake) /
548	return false;
549	default:
550	return true;
551	}
552	}
553
554	void __i915_request_skip(struct i915_request *rq)
555	{
556	GEM_BUG_ON(!fatal_error(rq->fence.error));
557
558	if (rq->infix == rq->postfix)
559	return;
560
561	RQ_TRACE(rq, "error: %d\n", rq->fence.error);
562
563	/*
564	* As this request likely depends on state from the lost
565	* context, clear out all the user operations leaving the
566	* breadcrumb at the end (so we get the fence notifications).
567	*/
568	__i915_request_fill(rq, val: `0`);
569	rq->infix = rq->postfix;
570	}
571
572	bool i915_request_set_error_once(struct i915_request rq, int* error)
573	{
574	int old;
575
576	GEM_BUG_ON(!IS_ERR_VALUE((long)error));
577
578	if (i915_request_signaled(rq))
579	return false;
580
581	old = READ_ONCE(rq->fence.error);
582	do {
583	if (fatal_error(error: old))
584	return false;
585	} while (!try_cmpxchg(&rq->fence.error, &old, error));
586
587	return true;
588	}
589
590	struct i915_request i915_request_mark_eio(struct* i915_request *rq)
591	{
592	if (__i915_request_is_complete(rq))
593	return NULL;
594
595	GEM_BUG_ON(i915_request_signaled(rq));
596
597	/ As soon as the request is completed, it may be retired /
598	rq = i915_request_get(rq);
599
600	i915_request_set_error_once(rq, error: -EIO);
601	i915_request_mark_complete(rq);
602
603	return rq;
604	}
605
606	bool __i915_request_submit(struct i915_request *request)
607	{
608	struct intel_engine_cs *engine = request->engine;
609	bool result = false;
610
611	RQ_TRACE(request, "\n");
612
613	GEM_BUG_ON(!irqs_disabled());
614	lockdep_assert_held(&engine->sched_engine->lock);
615
616	/*
617	* With the advent of preempt-to-busy, we frequently encounter
618	* requests that we have unsubmitted from HW, but left running
619	* until the next ack and so have completed in the meantime. On
620	* resubmission of that completed request, we can skip
621	* updating the payload, and execlists can even skip submitting
622	* the request.
623	*
624	* We must remove the request from the caller's priority queue,
625	* and the caller must only call us when the request is in their
626	* priority queue, under the sched_engine->lock. This ensures that the
627	* request has not yet been retired and we can safely move
628	* the request into the engine->active.list where it will be
629	* dropped upon retiring. (Otherwise if resubmit a retired
630	* request, this would be a horrible use-after-free.)
631	*/
632	if (__i915_request_is_complete(rq: request)) {
633	list_del_init(entry: &request->sched.link);
634	goto active;
635	}
636
637	if (unlikely(!intel_context_is_schedulable(request->context)))
638	i915_request_set_error_once(rq: request, error: -EIO);
639
640	if (unlikely(fatal_error(request->fence.error)))
641	__i915_request_skip(rq: request);
642
643	/*
644	* Are we using semaphores when the gpu is already saturated?
645	*
646	* Using semaphores incurs a cost in having the GPU poll a
647	* memory location, busywaiting for it to change. The continual
648	* memory reads can have a noticeable impact on the rest of the
649	* system with the extra bus traffic, stalling the cpu as it too
650	* tries to access memory across the bus (perf stat -e bus-cycles).
651	*
652	* If we installed a semaphore on this request and we only submit
653	* the request after the signaler completed, that indicates the
654	* system is overloaded and using semaphores at this time only
655	* increases the amount of work we are doing. If so, we disable
656	* further use of semaphores until we are idle again, whence we
657	* optimistically try again.
658	*/
659	if (request->sched.semaphores &&
660	i915_sw_fence_signaled(fence: &request->semaphore))
661	engine->saturated \|= request->sched.semaphores;
662
663	engine->emit_fini_breadcrumb(request,
664	request->ring->vaddr + request->postfix);
665
666	trace_i915_request_execute(rq: request);
667	if (engine->bump_serial)
668	engine->bump_serial(engine);
669	else
670	engine->serial++;
671
672	result = true;
673
674	GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
675	engine->add_active_request(request);
676	active:
677	clear_bit(nr: I915_FENCE_FLAG_PQUEUE, addr: &request->fence.flags);
678	set_bit(nr: I915_FENCE_FLAG_ACTIVE, addr: &request->fence.flags);
679
680	/*
681	* XXX Rollback bonded-execution on __i915_request_unsubmit()?
682	*
683	* In the future, perhaps when we have an active time-slicing scheduler,
684	* it will be interesting to unsubmit parallel execution and remove
685	* busywaits from the GPU until their master is restarted. This is
686	* quite hairy, we have to carefully rollback the fence and do a
687	* preempt-to-idle cycle on the target engine, all the while the
688	* master execute_cb may refire.
689	*/
690	__notify_execute_cb_irq(rq: request);
691
692	/ We may be recursing from the signal callback of another i915 fence /
693	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
694	i915_request_enable_breadcrumb(request);
695
696	return result;
697	}
698
699	void i915_request_submit(struct i915_request *request)
700	{
701	struct intel_engine_cs *engine = request->engine;
702	unsigned long flags;
703
704	/ Will be called from irq-context when using foreign fences. /
705	spin_lock_irqsave(&engine->sched_engine->lock, flags);
706
707	__i915_request_submit(request);
708
709	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
710	}
711
712	void __i915_request_unsubmit(struct i915_request *request)
713	{
714	struct intel_engine_cs *engine = request->engine;
715
716	/*
717	* Only unwind in reverse order, required so that the per-context list
718	* is kept in seqno/ring order.
719	*/
720	RQ_TRACE(request, "\n");
721
722	GEM_BUG_ON(!irqs_disabled());
723	lockdep_assert_held(&engine->sched_engine->lock);
724
725	/*
726	* Before we remove this breadcrumb from the signal list, we have
727	* to ensure that a concurrent dma_fence_enable_signaling() does not
728	* attach itself. We first mark the request as no longer active and
729	* make sure that is visible to other cores, and then remove the
730	* breadcrumb if attached.
731	*/
732	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
733	clear_bit_unlock(nr: I915_FENCE_FLAG_ACTIVE, addr: &request->fence.flags);
734	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
735	i915_request_cancel_breadcrumb(request);
736
737	/ We've already spun, don't charge on resubmitting. /
738	if (request->sched.semaphores && __i915_request_has_started(rq: request))
739	request->sched.semaphores = `0`;
740
741	/*
742	* We don't need to wake_up any waiters on request->execute, they
743	* will get woken by any other event or us re-adding this request
744	* to the engine timeline (__i915_request_submit()). The waiters
745	* should be quite adapt at finding that the request now has a new
746	* global_seqno to the one they went to sleep on.
747	*/
748	}
749
750	void i915_request_unsubmit(struct i915_request *request)
751	{
752	struct intel_engine_cs *engine = request->engine;
753	unsigned long flags;
754
755	/ Will be called from irq-context when using foreign fences. /
756	spin_lock_irqsave(&engine->sched_engine->lock, flags);
757
758	__i915_request_unsubmit(request);
759
760	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
761	}
762
763	void i915_request_cancel(struct i915_request rq, int* error)
764	{
765	if (!i915_request_set_error_once(rq, error))
766	return;
767
768	set_bit(nr: I915_FENCE_FLAG_SENTINEL, addr: &rq->fence.flags);
769
770	intel_context_cancel_request(ce: rq->context, rq);
771	}
772
773	static int
774	submit_notify(struct i915_sw_fence fence, enum* i915_sw_fence_notify state)
775	{
776	struct i915_request *request =
777	container_of(fence, typeof(*request), submit);
778
779	switch (state) {
780	case FENCE_COMPLETE:
781	trace_i915_request_submit(rq: request);
782
783	if (unlikely(fence->error))
784	i915_request_set_error_once(rq: request, error: fence->error);
785	else
786	__rq_arm_watchdog(rq: request);
787
788	/*
789	* We need to serialize use of the submit_request() callback
790	* with its hotplugging performed during an emergency
791	* i915_gem_set_wedged(). We use the RCU mechanism to mark the
792	* critical section in order to force i915_gem_set_wedged() to
793	* wait until the submit_request() is completed before
794	* proceeding.
795	*/
796	rcu_read_lock();
797	request->engine->submit_request(request);
798	rcu_read_unlock();
799	break;
800
801	case FENCE_FREE:
802	i915_request_put(rq: request);
803	break;
804	}
805
806	return NOTIFY_DONE;
807	}
808
809	static int
810	semaphore_notify(struct i915_sw_fence fence, enum* i915_sw_fence_notify state)
811	{
812	struct i915_request rq = container_of(fence, typeof(rq), semaphore);
813
814	switch (state) {
815	case FENCE_COMPLETE:
816	break;
817
818	case FENCE_FREE:
819	i915_request_put(rq);
820	break;
821	}
822
823	return NOTIFY_DONE;
824	}
825
826	static void retire_requests(struct intel_timeline *tl)
827	{
828	struct i915_request rq, rn;
829
830	list_for_each_entry_safe(rq, rn, &tl->requests, link)
831	if (!i915_request_retire(rq))
832	break;
833	}
834
835	static noinline struct i915_request *
836	request_alloc_slow(struct intel_timeline *tl,
837	struct i915_request **rsvd,
838	gfp_t gfp)
839	{
840	struct i915_request *rq;
841
842	/ If we cannot wait, dip into our reserves /
843	if (!gfpflags_allow_blocking(gfp_flags: gfp)) {
844	rq = xchg(rsvd, NULL);
845	if (!rq) / Use the normal failure path for one final WARN /
846	goto out;
847
848	return rq;
849	}
850
851	if (list_empty(head: &tl->requests))
852	goto out;
853
854	/ Move our oldest request to the slab-cache (if not in use!) /
855	rq = list_first_entry(&tl->requests, typeof(*rq), link);
856	i915_request_retire(rq);
857
858	rq = kmem_cache_alloc(slab_requests,
859	gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
860	if (rq)
861	return rq;
862
863	/ Ratelimit ourselves to prevent oom from malicious clients /
864	rq = list_last_entry(&tl->requests, typeof(*rq), link);
865	cond_synchronize_rcu(oldstate: rq->rcustate);
866
867	/ Retire our old requests in the hope that we free some /
868	retire_requests(tl);
869
870	out:
871	return kmem_cache_alloc(slab_requests, gfp);
872	}
873
874	static void __i915_request_ctor(void *arg)
875	{
876	struct i915_request *rq = arg;
877
878	spin_lock_init(&rq->lock);
879	i915_sched_node_init(node: &rq->sched);
880	i915_sw_fence_init(&rq->submit, submit_notify);
881	i915_sw_fence_init(&rq->semaphore, semaphore_notify);
882
883	clear_capture_list(rq);
884	rq->batch_res = NULL;
885
886	init_llist_head(list: &rq->execute_cb);
887	}
888
889	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
890	#define clear_batch_ptr(_rq) ((_rq)->batch = NULL)
891	#else
892	#define clear_batch_ptr(_a) do {} while (0)
893	#endif
894
895	struct i915_request *
896	__i915_request_create(struct intel_context *ce, gfp_t gfp)
897	{
898	struct intel_timeline *tl = ce->timeline;
899	struct i915_request *rq;
900	u32 seqno;
901	int ret;
902
903	might_alloc(gfp_mask: gfp);
904
905	/ Check that the caller provided an already pinned context /
906	__intel_context_pin(ce);
907
908	/*
909	* Beware: Dragons be flying overhead.
910	*
911	* We use RCU to look up requests in flight. The lookups may
912	* race with the request being allocated from the slab freelist.
913	* That is the request we are writing to here, may be in the process
914	* of being read by __i915_active_request_get_rcu(). As such,
915	* we have to be very careful when overwriting the contents. During
916	* the RCU lookup, we change chase the request->engine pointer,
917	* read the request->global_seqno and increment the reference count.
918	*
919	* The reference count is incremented atomically. If it is zero,
920	* the lookup knows the request is unallocated and complete. Otherwise,
921	* it is either still in use, or has been reallocated and reset
922	* with dma_fence_init(). This increment is safe for release as we
923	* check that the request we have a reference to and matches the active
924	* request.
925	*
926	* Before we increment the refcount, we chase the request->engine
927	* pointer. We must not call kmem_cache_zalloc() or else we set
928	* that pointer to NULL and cause a crash during the lookup. If
929	* we see the request is completed (based on the value of the
930	* old engine and seqno), the lookup is complete and reports NULL.
931	* If we decide the request is not completed (new engine or seqno),
932	* then we grab a reference and double check that it is still the
933	* active request - which it won't be and restart the lookup.
934	*
935	* Do not use kmem_cache_zalloc() here!
936	*/
937	rq = kmem_cache_alloc(slab_requests,
938	gfp \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
939	if (unlikely(!rq)) {
940	rq = request_alloc_slow(tl, rsvd: &ce->engine->request_pool, gfp);
941	if (!rq) {
942	ret = -ENOMEM;
943	goto err_unreserve;
944	}
945	}
946
947	rq->context = ce;
948	rq->engine = ce->engine;
949	rq->ring = ce->ring;
950	rq->execution_mask = ce->engine->mask;
951	rq->i915 = ce->engine->i915;
952
953	ret = intel_timeline_get_seqno(tl, rq, seqno: &seqno);
954	if (ret)
955	goto err_free;
956
957	dma_fence_init(fence: &rq->fence, ops: &i915_fence_ops, lock: &rq->lock,
958	context: tl->fence_context, seqno);
959
960	RCU_INIT_POINTER(rq->timeline, tl);
961	rq->hwsp_seqno = tl->hwsp_seqno;
962	GEM_BUG_ON(__i915_request_is_complete(rq));
963
964	rq->rcustate = get_state_synchronize_rcu(); / acts as smp_mb() /
965
966	rq->guc_prio = GUC_PRIO_INIT;
967
968	/ We bump the ref for the fence chain /
969	i915_sw_fence_reinit(fence: &i915_request_get(rq)->submit);
970	i915_sw_fence_reinit(fence: &i915_request_get(rq)->semaphore);
971
972	i915_sched_node_reinit(node: &rq->sched);
973
974	/ No zalloc, everything must be cleared after use /
975	clear_batch_ptr(rq);
976	__rq_init_watchdog(rq);
977	assert_capture_list_is_null(rq);
978	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
979	GEM_BUG_ON(rq->batch_res);
980
981	/*
982	* Reserve space in the ring buffer for all the commands required to
983	* eventually emit this request. This is to guarantee that the
984	* i915_request_add() call can't fail. Note that the reserve may need
985	* to be redone if the request is not actually submitted straight
986	* away, e.g. because a GPU scheduler has deferred it.
987	*
988	* Note that due to how we add reserved_space to intel_ring_begin()
989	* we need to double our request to ensure that if we need to wrap
990	* around inside i915_request_add() there is sufficient space at
991	* the beginning of the ring as well.
992	*/
993	rq->reserved_space =
994	`2` * rq->engine->emit_fini_breadcrumb_dw * sizeof(u32);
995
996	/*
997	* Record the position of the start of the request so that
998	* should we detect the updated seqno part-way through the
999	* GPU processing the request, we never over-estimate the
1000	* position of the head.
1001	*/
1002	rq->head = rq->ring->emit;
1003
1004	ret = rq->engine->request_alloc(rq);
1005	if (ret)
1006	goto err_unwind;
1007
1008	rq->infix = rq->ring->emit; / end of header; start of user payload /
1009
1010	intel_context_mark_active(ce);
1011	list_add_tail_rcu(new: &rq->link, head: &tl->requests);
1012
1013	return rq;
1014
1015	err_unwind:
1016	ce->ring->emit = rq->head;
1017
1018	/ Make sure we didn't add ourselves to external state before freeing /
1019	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
1020	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
1021
1022	err_free:
1023	kmem_cache_free(s: slab_requests, objp: rq);
1024	err_unreserve:
1025	intel_context_unpin(ce);
1026	return ERR_PTR(error: ret);
1027	}
1028
1029	struct i915_request *
1030	i915_request_create(struct intel_context *ce)
1031	{
1032	struct i915_request *rq;
1033	struct intel_timeline *tl;
1034
1035	tl = intel_context_timeline_lock(ce);
1036	if (IS_ERR(ptr: tl))
1037	return ERR_CAST(ptr: tl);
1038
1039	/ Move our oldest request to the slab-cache (if not in use!) /
1040	rq = list_first_entry(&tl->requests, typeof(*rq), link);
1041	if (!list_is_last(list: &rq->link, head: &tl->requests))
1042	i915_request_retire(rq);
1043
1044	intel_context_enter(ce);
1045	rq = __i915_request_create(ce, GFP_KERNEL);
1046	intel_context_exit(ce); / active reference transferred to request /
1047	if (IS_ERR(ptr: rq))
1048	goto err_unlock;
1049
1050	/ Check that we do not interrupt ourselves with a new request /
1051	rq->cookie = lockdep_pin_lock(&tl->mutex);
1052
1053	return rq;
1054
1055	err_unlock:
1056	intel_context_timeline_unlock(tl);
1057	return rq;
1058	}
1059
1060	static int
1061	i915_request_await_start(struct i915_request rq, struct* i915_request *signal)
1062	{
1063	struct dma_fence *fence;
1064	int err;
1065
1066	if (i915_request_timeline(rq) == rcu_access_pointer(signal->timeline))
1067	return `0`;
1068
1069	if (i915_request_started(rq: signal))
1070	return `0`;
1071
1072	/*
1073	* The caller holds a reference on @signal, but we do not serialise
1074	* against it being retired and removed from the lists.
1075	*
1076	* We do not hold a reference to the request before @signal, and
1077	* so must be very careful to ensure that it is not _recycled_ as
1078	* we follow the link backwards.
1079	*/
1080	fence = NULL;
1081	rcu_read_lock();
1082	do {
1083	struct list_head *pos = READ_ONCE(signal->link.prev);
1084	struct i915_request *prev;
1085
1086	/ Confirm signal has not been retired, the link is valid /
1087	if (unlikely(__i915_request_has_started(signal)))
1088	break;
1089
1090	/ Is signal the earliest request on its timeline? /
1091	if (pos == &rcu_dereference(signal->timeline)->requests)
1092	break;
1093
1094	/*
1095	* Peek at the request before us in the timeline. That
1096	* request will only be valid before it is retired, so
1097	* after acquiring a reference to it, confirm that it is
1098	* still part of the signaler's timeline.
1099	*/
1100	prev = list_entry(pos, typeof(*prev), link);
1101	if (!i915_request_get_rcu(rq: prev))
1102	break;
1103
1104	/ After the strong barrier, confirm prev is still attached /
1105	if (unlikely(READ_ONCE(prev->link.next) != &signal->link)) {
1106	i915_request_put(rq: prev);
1107	break;
1108	}
1109
1110	fence = &prev->fence;
1111	} while (`0`);
1112	rcu_read_unlock();
1113	if (!fence)
1114	return `0`;
1115
1116	err = `0`;
1117	if (!intel_timeline_sync_is_later(tl: i915_request_timeline(rq), fence))
1118	err = i915_sw_fence_await_dma_fence(fence: &rq->submit,
1119	dma: fence, timeout: `0`,
1120	I915_FENCE_GFP);
1121	dma_fence_put(fence);
1122
1123	return err;
1124	}
1125
1126	static intel_engine_mask_t
1127	already_busywaiting(struct i915_request *rq)
1128	{
1129	/*
1130	* Polling a semaphore causes bus traffic, delaying other users of
1131	* both the GPU and CPU. We want to limit the impact on others,
1132	* while taking advantage of early submission to reduce GPU
1133	* latency. Therefore we restrict ourselves to not using more
1134	* than one semaphore from each source, and not using a semaphore
1135	* if we have detected the engine is saturated (i.e. would not be
1136	* submitted early and cause bus traffic reading an already passed
1137	* semaphore).
1138	*
1139	* See the are-we-too-late? check in __i915_request_submit().
1140	*/
1141	return rq->sched.semaphores \| READ_ONCE(rq->engine->saturated);
1142	}
1143
1144	static int
1145	__emit_semaphore_wait(struct i915_request *to,
1146	struct i915_request *from,
1147	u32 seqno)
1148	{
1149	const int has_token = GRAPHICS_VER(to->engine->i915) >= `12`;
1150	u32 hwsp_offset;
1151	int len, err;
1152	u32 *cs;
1153
1154	GEM_BUG_ON(GRAPHICS_VER(to->engine->i915) < `8`);
1155	GEM_BUG_ON(i915_request_has_initial_breadcrumb(to));
1156
1157	/ We need to pin the signaler's HWSP until we are finished reading. /
1158	err = intel_timeline_read_hwsp(from, until: to, hwsp_offset: &hwsp_offset);
1159	if (err)
1160	return err;
1161
1162	len = `4`;
1163	if (has_token)
1164	len += `2`;
1165
1166	cs = intel_ring_begin(rq: to, num_dwords: len);
1167	if (IS_ERR(ptr: cs))
1168	return PTR_ERR(ptr: cs);
1169
1170	/*
1171	* Using greater-than-or-equal here means we have to worry
1172	* about seqno wraparound. To side step that issue, we swap
1173	* the timeline HWSP upon wrapping, so that everyone listening
1174	* for the old (pre-wrap) values do not see the much smaller
1175	* (post-wrap) values than they were expecting (and so wait
1176	* forever).
1177	*/
1178	*cs++ = (MI_SEMAPHORE_WAIT \|
1179	MI_SEMAPHORE_GLOBAL_GTT \|
1180	MI_SEMAPHORE_POLL \|
1181	MI_SEMAPHORE_SAD_GTE_SDD) +
1182	has_token;
1183	*cs++ = seqno;
1184	*cs++ = hwsp_offset;
1185	*cs++ = `0`;
1186	if (has_token) {
1187	*cs++ = `0`;
1188	*cs++ = MI_NOOP;
1189	}
1190
1191	intel_ring_advance(rq: to, cs);
1192	return `0`;
1193	}
1194
1195	static bool
1196	can_use_semaphore_wait(struct i915_request to, struct* i915_request *from)
1197	{
1198	return to->engine->gt->ggtt == from->engine->gt->ggtt;
1199	}
1200
1201	static int
1202	emit_semaphore_wait(struct i915_request *to,
1203	struct i915_request *from,
1204	gfp_t gfp)
1205	{
1206	const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask;
1207	struct i915_sw_fence *wait = &to->submit;
1208
1209	if (!can_use_semaphore_wait(to, from))
1210	goto await_fence;
1211
1212	if (!intel_context_use_semaphores(ce: to->context))
1213	goto await_fence;
1214
1215	if (i915_request_has_initial_breadcrumb(rq: to))
1216	goto await_fence;
1217
1218	/*
1219	* If this or its dependents are waiting on an external fence
1220	* that may fail catastrophically, then we want to avoid using
1221	* semaphores as they bypass the fence signaling metadata, and we
1222	* lose the fence->error propagation.
1223	*/
1224	if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN)
1225	goto await_fence;
1226
1227	/ Just emit the first semaphore we see as request space is limited. /
1228	if (already_busywaiting(rq: to) & mask)
1229	goto await_fence;
1230
1231	if (i915_request_await_start(rq: to, signal: from) < `0`)
1232	goto await_fence;
1233
1234	/ Only submit our spinner after the signaler is running! /
1235	if (__await_execution(rq: to, signal: from, gfp))
1236	goto await_fence;
1237
1238	if (__emit_semaphore_wait(to, from, seqno: from->fence.seqno))
1239	goto await_fence;
1240
1241	to->sched.semaphores \|= mask;
1242	wait = &to->semaphore;
1243
1244	await_fence:
1245	return i915_sw_fence_await_dma_fence(fence: wait,
1246	dma: &from->fence, timeout: `0`,
1247	I915_FENCE_GFP);
1248	}
1249
1250	static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
1251	struct dma_fence *fence)
1252	{
1253	return __intel_timeline_sync_is_later(tl,
1254	context: fence->context,
1255	seqno: fence->seqno - `1`);
1256	}
1257
1258	static int intel_timeline_sync_set_start(struct intel_timeline *tl,
1259	const struct dma_fence *fence)
1260	{
1261	return __intel_timeline_sync_set(tl, context: fence->context, seqno: fence->seqno - `1`);
1262	}
1263
1264	static int
1265	__i915_request_await_execution(struct i915_request *to,
1266	struct i915_request *from)
1267	{
1268	int err;
1269
1270	GEM_BUG_ON(intel_context_is_barrier(from->context));
1271
1272	/ Submit both requests at the same time /
1273	err = __await_execution(rq: to, signal: from, I915_FENCE_GFP);
1274	if (err)
1275	return err;
1276
1277	/ Squash repeated depenendices to the same timelines /
1278	if (intel_timeline_sync_has_start(tl: i915_request_timeline(rq: to),
1279	fence: &from->fence))
1280	return `0`;
1281
1282	/*
1283	* Wait until the start of this request.
1284	*
1285	* The execution cb fires when we submit the request to HW. But in
1286	* many cases this may be long before the request itself is ready to
1287	* run (consider that we submit 2 requests for the same context, where
1288	* the request of interest is behind an indefinite spinner). So we hook
1289	* up to both to reduce our queues and keep the execution lag minimised
1290	* in the worst case, though we hope that the await_start is elided.
1291	*/
1292	err = i915_request_await_start(rq: to, signal: from);
1293	if (err < `0`)
1294	return err;
1295
1296	/*
1297	* Ensure both start together [after all semaphores in signal]
1298	*
1299	* Now that we are queued to the HW at roughly the same time (thanks
1300	* to the execute cb) and are ready to run at roughly the same time
1301	* (thanks to the await start), our signaler may still be indefinitely
1302	* delayed by waiting on a semaphore from a remote engine. If our
1303	* signaler depends on a semaphore, so indirectly do we, and we do not
1304	* want to start our payload until our signaler also starts theirs.
1305	* So we wait.
1306	*
1307	* However, there is also a second condition for which we need to wait
1308	* for the precise start of the signaler. Consider that the signaler
1309	* was submitted in a chain of requests following another context
1310	* (with just an ordinary intra-engine fence dependency between the
1311	* two). In this case the signaler is queued to HW, but not for
1312	* immediate execution, and so we must wait until it reaches the
1313	* active slot.
1314	*/
1315	if (can_use_semaphore_wait(to, from) &&
1316	intel_engine_has_semaphores(engine: to->engine) &&
1317	!i915_request_has_initial_breadcrumb(rq: to)) {
1318	err = __emit_semaphore_wait(to, from, seqno: from->fence.seqno - `1`);
1319	if (err < `0`)
1320	return err;
1321	}
1322
1323	/ Couple the dependency tree for PI on this exposed to->fence /
1324	if (to->engine->sched_engine->schedule) {
1325	err = i915_sched_node_add_dependency(node: &to->sched,
1326	signal: &from->sched,
1327	I915_DEPENDENCY_WEAK);
1328	if (err < `0`)
1329	return err;
1330	}
1331
1332	return intel_timeline_sync_set_start(tl: i915_request_timeline(rq: to),
1333	fence: &from->fence);
1334	}
1335
1336	static void mark_external(struct i915_request *rq)
1337	{
1338	/*
1339	* The downside of using semaphores is that we lose metadata passing
1340	* along the signaling chain. This is particularly nasty when we
1341	* need to pass along a fatal error such as EFAULT or EDEADLK. For
1342	* fatal errors we want to scrub the request before it is executed,
1343	* which means that we cannot preload the request onto HW and have
1344	* it wait upon a semaphore.
1345	*/
1346	rq->sched.flags \|= I915_SCHED_HAS_EXTERNAL_CHAIN;
1347	}
1348
1349	static int
1350	__i915_request_await_external(struct i915_request rq, struct* dma_fence *fence)
1351	{
1352	mark_external(rq);
1353	return i915_sw_fence_await_dma_fence(fence: &rq->submit, dma: fence,
1354	timeout: i915_fence_context_timeout(i915: rq->i915,
1355	context: fence->context),
1356	I915_FENCE_GFP);
1357	}
1358
1359	static int
1360	i915_request_await_external(struct i915_request rq, struct* dma_fence *fence)
1361	{
1362	struct dma_fence *iter;
1363	int err = `0`;
1364
1365	if (!to_dma_fence_chain(fence))
1366	return __i915_request_await_external(rq, fence);
1367
1368	dma_fence_chain_for_each(iter, fence) {
1369	struct dma_fence_chain *chain = to_dma_fence_chain(fence: iter);
1370
1371	if (!dma_fence_is_i915(fence: chain->fence)) {
1372	err = __i915_request_await_external(rq, fence: iter);
1373	break;
1374	}
1375
1376	err = i915_request_await_dma_fence(rq, fence: chain->fence);
1377	if (err < `0`)
1378	break;
1379	}
1380
1381	dma_fence_put(fence: iter);
1382	return err;
1383	}
1384
1385	static inline bool is_parallel_rq(struct i915_request *rq)
1386	{
1387	return intel_context_is_parallel(ce: rq->context);
1388	}
1389
1390	static inline struct intel_context request_to_parent(struct* i915_request *rq)
1391	{
1392	return intel_context_to_parent(ce: rq->context);
1393	}
1394
1395	static bool is_same_parallel_context(struct i915_request *to,
1396	struct i915_request *from)
1397	{
1398	if (is_parallel_rq(rq: to))
1399	return request_to_parent(rq: to) == request_to_parent(rq: from);
1400
1401	return false;
1402	}
1403
1404	int
1405	i915_request_await_execution(struct i915_request *rq,
1406	struct dma_fence *fence)
1407	{
1408	struct dma_fence **child = &fence;
1409	unsigned int nchild = `1`;
1410	int ret;
1411
1412	if (dma_fence_is_array(fence)) {
1413	struct dma_fence_array *array = to_dma_fence_array(fence);
1414
1415	/ XXX Error for signal-on-any fence arrays /
1416
1417	child = array->fences;
1418	nchild = array->num_fences;
1419	GEM_BUG_ON(!nchild);
1420	}
1421
1422	do {
1423	fence = *child++;
1424	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1425	continue;
1426
1427	if (fence->context == rq->fence.context)
1428	continue;
1429
1430	/*
1431	* We don't squash repeated fence dependencies here as we
1432	* want to run our callback in all cases.
1433	*/
1434
1435	if (dma_fence_is_i915(fence)) {
1436	if (is_same_parallel_context(to: rq, from: to_request(fence)))
1437	continue;
1438	ret = __i915_request_await_execution(to: rq,
1439	from: to_request(fence));
1440	} else {
1441	ret = i915_request_await_external(rq, fence);
1442	}
1443	if (ret < `0`)
1444	return ret;
1445	} while (--nchild);
1446
1447	return `0`;
1448	}
1449
1450	static int
1451	await_request_submit(struct i915_request to, struct* i915_request *from)
1452	{
1453	/*
1454	* If we are waiting on a virtual engine, then it may be
1455	* constrained to execute on a single engine prior to submission.
1456	* When it is submitted, it will be first submitted to the virtual
1457	* engine and then passed to the physical engine. We cannot allow
1458	* the waiter to be submitted immediately to the physical engine
1459	* as it may then bypass the virtual request.
1460	*/
1461	if (to->engine == READ_ONCE(from->engine))
1462	return i915_sw_fence_await_sw_fence_gfp(fence: &to->submit,
1463	after: &from->submit,
1464	I915_FENCE_GFP);
1465	else
1466	return __i915_request_await_execution(to, from);
1467	}
1468
1469	static int
1470	i915_request_await_request(struct i915_request to, struct* i915_request *from)
1471	{
1472	int ret;
1473
1474	GEM_BUG_ON(to == from);
1475	GEM_BUG_ON(to->timeline == from->timeline);
1476
1477	if (i915_request_completed(rq: from)) {
1478	i915_sw_fence_set_error_once(fence: &to->submit, error: from->fence.error);
1479	return `0`;
1480	}
1481
1482	if (to->engine->sched_engine->schedule) {
1483	ret = i915_sched_node_add_dependency(node: &to->sched,
1484	signal: &from->sched,
1485	I915_DEPENDENCY_EXTERNAL);
1486	if (ret < `0`)
1487	return ret;
1488	}
1489
1490	if (!intel_engine_uses_guc(engine: to->engine) &&
1491	is_power_of_2(n: to->execution_mask \| READ_ONCE(from->execution_mask)))
1492	ret = await_request_submit(to, from);
1493	else
1494	ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
1495	if (ret < `0`)
1496	return ret;
1497
1498	return `0`;
1499	}
1500
1501	int
1502	i915_request_await_dma_fence(struct i915_request rq, struct* dma_fence *fence)
1503	{
1504	struct dma_fence **child = &fence;
1505	unsigned int nchild = `1`;
1506	int ret;
1507
1508	/*
1509	* Note that if the fence-array was created in signal-on-any mode,
1510	* we should not decompose it into its individual fences. However,
1511	* we don't currently store which mode the fence-array is operating
1512	* in. Fortunately, the only user of signal-on-any is private to
1513	* amdgpu and we should not see any incoming fence-array from
1514	* sync-file being in signal-on-any mode.
1515	*/
1516	if (dma_fence_is_array(fence)) {
1517	struct dma_fence_array *array = to_dma_fence_array(fence);
1518
1519	child = array->fences;
1520	nchild = array->num_fences;
1521	GEM_BUG_ON(!nchild);
1522	}
1523
1524	do {
1525	fence = *child++;
1526	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
1527	continue;
1528
1529	/*
1530	* Requests on the same timeline are explicitly ordered, along
1531	* with their dependencies, by i915_request_add() which ensures
1532	* that requests are submitted in-order through each ring.
1533	*/
1534	if (fence->context == rq->fence.context)
1535	continue;
1536
1537	/ Squash repeated waits to the same timelines /
1538	if (fence->context &&
1539	intel_timeline_sync_is_later(tl: i915_request_timeline(rq),
1540	fence))
1541	continue;
1542
1543	if (dma_fence_is_i915(fence)) {
1544	if (is_same_parallel_context(to: rq, from: to_request(fence)))
1545	continue;
1546	ret = i915_request_await_request(to: rq, from: to_request(fence));
1547	} else {
1548	ret = i915_request_await_external(rq, fence);
1549	}
1550	if (ret < `0`)
1551	return ret;
1552
1553	/ Record the latest fence used against each timeline /
1554	if (fence->context)
1555	intel_timeline_sync_set(tl: i915_request_timeline(rq),
1556	fence);
1557	} while (--nchild);
1558
1559	return `0`;
1560	}
1561
1562	/**
1563	* i915_request_await_deps - set this request to (async) wait upon a struct
1564	* i915_deps dma_fence collection
1565	* @rq: request we are wishing to use
1566	* @deps: The struct i915_deps containing the dependencies.
1567	*
1568	* Returns 0 if successful, negative error code on error.
1569	*/
1570	int i915_request_await_deps(struct i915_request rq, const* struct i915_deps *deps)
1571	{
1572	int i, err;
1573
1574	for (i = `0`; i < deps->num_deps; ++i) {
1575	err = i915_request_await_dma_fence(rq, fence: deps->fences[i]);
1576	if (err)
1577	return err;
1578	}
1579
1580	return `0`;
1581	}
1582
1583	/**
1584	* i915_request_await_object - set this request to (async) wait upon a bo
1585	* @to: request we are wishing to use
1586	* @obj: object which may be in use on another ring.
1587	* @write: whether the wait is on behalf of a writer
1588	*
1589	* This code is meant to abstract object synchronization with the GPU.
1590	* Conceptually we serialise writes between engines inside the GPU.
1591	* We only allow one engine to write into a buffer at any time, but
1592	* multiple readers. To ensure each has a coherent view of memory, we must:
1593	*
1594	* - If there is an outstanding write request to the object, the new
1595	* request must wait for it to complete (either CPU or in hw, requests
1596	* on the same ring will be naturally ordered).
1597	*
1598	* - If we are a write request (pending_write_domain is set), the new
1599	* request must wait for outstanding read requests to complete.
1600	*
1601	* Returns 0 if successful, else propagates up the lower layer error.
1602	*/
1603	int
1604	i915_request_await_object(struct i915_request *to,
1605	struct drm_i915_gem_object *obj,
1606	bool write)
1607	{
1608	struct dma_resv_iter cursor;
1609	struct dma_fence *fence;
1610	int ret = `0`;
1611
1612	dma_resv_for_each_fence(&cursor, obj->base.resv,
1613	dma_resv_usage_rw(write), fence) {
1614	ret = i915_request_await_dma_fence(rq: to, fence);
1615	if (ret)
1616	break;
1617	}
1618
1619	return ret;
1620	}
1621
1622	static void i915_request_await_huc(struct i915_request *rq)
1623	{
1624	struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
1625
1626	/ don't stall kernel submissions! /
1627	if (!rcu_access_pointer(rq->context->gem_context))
1628	return;
1629
1630	if (intel_huc_wait_required(huc))
1631	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1632	after: &huc->delayed_load.fence,
1633	wq: &rq->hucq);
1634	}
1635
1636	static struct i915_request *
1637	__i915_request_ensure_parallel_ordering(struct i915_request *rq,
1638	struct intel_timeline *timeline)
1639	{
1640	struct i915_request *prev;
1641
1642	GEM_BUG_ON(!is_parallel_rq(rq));
1643
1644	prev = request_to_parent(rq)->parallel.last_rq;
1645	if (prev) {
1646	if (!__i915_request_is_complete(rq: prev)) {
1647	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1648	after: &prev->submit,
1649	wq: &rq->submitq);
1650
1651	if (rq->engine->sched_engine->schedule)
1652	__i915_sched_node_add_dependency(node: &rq->sched,
1653	signal: &prev->sched,
1654	dep: &rq->dep,
1655	flags: `0`);
1656	}
1657	i915_request_put(rq: prev);
1658	}
1659
1660	request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
1661
1662	/*
1663	* Users have to put a reference potentially got by
1664	* __i915_active_fence_set() to the returned request
1665	* when no longer needed
1666	*/
1667	return to_request(fence: __i915_active_fence_set(active: &timeline->last_request,
1668	fence: &rq->fence));
1669	}
1670
1671	static struct i915_request *
1672	__i915_request_ensure_ordering(struct i915_request *rq,
1673	struct intel_timeline *timeline)
1674	{
1675	struct i915_request *prev;
1676
1677	GEM_BUG_ON(is_parallel_rq(rq));
1678
1679	prev = to_request(fence: __i915_active_fence_set(active: &timeline->last_request,
1680	fence: &rq->fence));
1681
1682	if (prev && !__i915_request_is_complete(rq: prev)) {
1683	bool uses_guc = intel_engine_uses_guc(engine: rq->engine);
1684	bool pow2 = is_power_of_2(READ_ONCE(prev->engine)->mask \|
1685	rq->engine->mask);
1686	bool same_context = prev->context == rq->context;
1687
1688	/*
1689	* The requests are supposed to be kept in order. However,
1690	* we need to be wary in case the timeline->last_request
1691	* is used as a barrier for external modification to this
1692	* context.
1693	*/
1694	GEM_BUG_ON(same_context &&
1695	i915_seqno_passed(prev->fence.seqno,
1696	rq->fence.seqno));
1697
1698	if ((same_context && uses_guc) \|\| (!uses_guc && pow2))
1699	i915_sw_fence_await_sw_fence(fence: &rq->submit,
1700	after: &prev->submit,
1701	wq: &rq->submitq);
1702	else
1703	__i915_sw_fence_await_dma_fence(fence: &rq->submit,
1704	dma: &prev->fence,
1705	cb: &rq->dmaq);
1706	if (rq->engine->sched_engine->schedule)
1707	__i915_sched_node_add_dependency(node: &rq->sched,
1708	signal: &prev->sched,
1709	dep: &rq->dep,
1710	flags: `0`);
1711	}
1712
1713	/*
1714	* Users have to put the reference to prev potentially got
1715	* by __i915_active_fence_set() when no longer needed
1716	*/
1717	return prev;
1718	}
1719
1720	static struct i915_request *
1721	__i915_request_add_to_timeline(struct i915_request *rq)
1722	{
1723	struct intel_timeline *timeline = i915_request_timeline(rq);
1724	struct i915_request *prev;
1725
1726	/*
1727	* Media workloads may require HuC, so stall them until HuC loading is
1728	* complete. Note that HuC not being loaded when a user submission
1729	* arrives can only happen when HuC is loaded via GSC and in that case
1730	* we still expect the window between us starting to accept submissions
1731	* and HuC loading completion to be small (a few hundred ms).
1732	*/
1733	if (rq->engine->class == VIDEO_DECODE_CLASS)
1734	i915_request_await_huc(rq);
1735
1736	/*
1737	* Dependency tracking and request ordering along the timeline
1738	* is special cased so that we can eliminate redundant ordering
1739	* operations while building the request (we know that the timeline
1740	* itself is ordered, and here we guarantee it).
1741	*
1742	* As we know we will need to emit tracking along the timeline,
1743	* we embed the hooks into our request struct -- at the cost of
1744	* having to have specialised no-allocation interfaces (which will
1745	* be beneficial elsewhere).
1746	*
1747	* A second benefit to open-coding i915_request_await_request is
1748	* that we can apply a slight variant of the rules specialised
1749	* for timelines that jump between engines (such as virtual engines).
1750	* If we consider the case of virtual engine, we must emit a dma-fence
1751	* to prevent scheduling of the second request until the first is
1752	* complete (to maximise our greedy late load balancing) and this
1753	* precludes optimising to use semaphores serialisation of a single
1754	* timeline across engines.
1755	*
1756	* We do not order parallel submission requests on the timeline as each
1757	* parallel submission context has its own timeline and the ordering
1758	* rules for parallel requests are that they must be submitted in the
1759	* order received from the execbuf IOCTL. So rather than using the
1760	* timeline we store a pointer to last request submitted in the
1761	* relationship in the gem context and insert a submission fence
1762	* between that request and request passed into this function or
1763	* alternatively we use completion fence if gem context has a single
1764	* timeline and this is the first submission of an execbuf IOCTL.
1765	*/
1766	if (likely(!is_parallel_rq(rq)))
1767	prev = __i915_request_ensure_ordering(rq, timeline);
1768	else
1769	prev = __i915_request_ensure_parallel_ordering(rq, timeline);
1770	if (prev)
1771	i915_request_put(rq: prev);
1772
1773	/*
1774	* Make sure that no request gazumped us - if it was allocated after
1775	* our i915_request_alloc() and called __i915_request_add() before
1776	* us, the timeline will hold its seqno which is later than ours.
1777	*/
1778	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
1779
1780	return prev;
1781	}
1782
1783	/*
1784	* NB: This function is not allowed to fail. Doing so would mean the the
1785	* request is not being tracked for completion but the work itself is
1786	* going to happen on the hardware. This would be a Bad Thing(tm).
1787	*/
1788	struct i915_request __i915_request_commit(struct* i915_request *rq)
1789	{
1790	struct intel_engine_cs *engine = rq->engine;
1791	struct intel_ring *ring = rq->ring;
1792	u32 *cs;
1793
1794	RQ_TRACE(rq, "\n");
1795
1796	/*
1797	* To ensure that this call will not fail, space for its emissions
1798	* should already have been reserved in the ring buffer. Let the ring
1799	* know that it is time to use that space up.
1800	*/
1801	GEM_BUG_ON(rq->reserved_space > ring->space);
1802	rq->reserved_space = `0`;
1803	rq->emitted_jiffies = jiffies;
1804
1805	/*
1806	* Record the position of the start of the breadcrumb so that
1807	* should we detect the updated seqno part-way through the
1808	* GPU processing the request, we never over-estimate the
1809	* position of the ring's HEAD.
1810	*/
1811	cs = intel_ring_begin(rq, num_dwords: engine->emit_fini_breadcrumb_dw);
1812	GEM_BUG_ON(IS_ERR(cs));
1813	rq->postfix = intel_ring_offset(rq, addr: cs);
1814
1815	return __i915_request_add_to_timeline(rq);
1816	}
1817
1818	void __i915_request_queue_bh(struct i915_request *rq)
1819	{
1820	i915_sw_fence_commit(fence: &rq->semaphore);
1821	i915_sw_fence_commit(fence: &rq->submit);
1822	}
1823
1824	void __i915_request_queue(struct i915_request *rq,
1825	const struct i915_sched_attr *attr)
1826	{
1827	/*
1828	* Let the backend know a new request has arrived that may need
1829	* to adjust the existing execution schedule due to a high priority
1830	* request - i.e. we may want to preempt the current request in order
1831	* to run a high priority dependency chain before we can execute this
1832	* request.
1833	*
1834	* This is called before the request is ready to run so that we can
1835	* decide whether to preempt the entire chain so that it is ready to
1836	* run at the earliest possible convenience.
1837	*/
1838	if (attr && rq->engine->sched_engine->schedule)
1839	rq->engine->sched_engine->schedule(rq, attr);
1840
1841	local_bh_disable();
1842	__i915_request_queue_bh(rq);
1843	local_bh_enable(); / kick tasklets /
1844	}
1845
1846	void i915_request_add(struct i915_request *rq)
1847	{
1848	struct intel_timeline * const tl = i915_request_timeline(rq);
1849	struct i915_sched_attr attr = {};
1850	struct i915_gem_context *ctx;
1851
1852	lockdep_assert_held(&tl->mutex);
1853	lockdep_unpin_lock(&tl->mutex, rq->cookie);
1854
1855	trace_i915_request_add(rq);
1856	__i915_request_commit(rq);
1857
1858	/ XXX placeholder for selftests /
1859	rcu_read_lock();
1860	ctx = rcu_dereference(rq->context->gem_context);
1861	if (ctx)
1862	attr = ctx->sched;
1863	rcu_read_unlock();
1864
1865	__i915_request_queue(rq, attr: &attr);
1866
1867	mutex_unlock(lock: &tl->mutex);
1868	}
1869
1870	static unsigned long local_clock_ns(unsigned int *cpu)
1871	{
1872	unsigned long t;
1873
1874	/*
1875	* Cheaply and approximately convert from nanoseconds to microseconds.
1876	* The result and subsequent calculations are also defined in the same
1877	* approximate microseconds units. The principal source of timing
1878	* error here is from the simple truncation.
1879	*
1880	* Note that local_clock() is only defined wrt to the current CPU;
1881	* the comparisons are no longer valid if we switch CPUs. Instead of
1882	* blocking preemption for the entire busywait, we can detect the CPU
1883	* switch and use that as indicator of system load and a reason to
1884	* stop busywaiting, see busywait_stop().
1885	*/
1886	*cpu = get_cpu();
1887	t = local_clock();
1888	put_cpu();
1889
1890	return t;
1891	}
1892
1893	static bool busywait_stop(unsigned long timeout, unsigned int cpu)
1894	{
1895	unsigned int this_cpu;
1896
1897	if (time_after(local_clock_ns(&this_cpu), timeout))
1898	return true;
1899
1900	return this_cpu != cpu;
1901	}
1902
1903	static bool __i915_spin_request(struct i915_request * const rq, int state)
1904	{
1905	unsigned long timeout_ns;
1906	unsigned int cpu;
1907
1908	/*
1909	* Only wait for the request if we know it is likely to complete.
1910	*
1911	* We don't track the timestamps around requests, nor the average
1912	* request length, so we do not have a good indicator that this
1913	* request will complete within the timeout. What we do know is the
1914	* order in which requests are executed by the context and so we can
1915	* tell if the request has been started. If the request is not even
1916	* running yet, it is a fair assumption that it will not complete
1917	* within our relatively short timeout.
1918	*/
1919	if (!i915_request_is_running(rq))
1920	return false;
1921
1922	/*
1923	* When waiting for high frequency requests, e.g. during synchronous
1924	* rendering split between the CPU and GPU, the finite amount of time
1925	* required to set up the irq and wait upon it limits the response
1926	* rate. By busywaiting on the request completion for a short while we
1927	* can service the high frequency waits as quick as possible. However,
1928	* if it is a slow request, we want to sleep as quickly as possible.
1929	* The tradeoff between waiting and sleeping is roughly the time it
1930	* takes to sleep on a request, on the order of a microsecond.
1931	*/
1932
1933	timeout_ns = READ_ONCE(rq->engine->props.max_busywait_duration_ns);
1934	timeout_ns += local_clock_ns(cpu: &cpu);
1935	do {
1936	if (dma_fence_is_signaled(fence: &rq->fence))
1937	return true;
1938
1939	if (signal_pending_state(state, current))
1940	break;
1941
1942	if (busywait_stop(timeout: timeout_ns, cpu))
1943	break;
1944
1945	cpu_relax();
1946	} while (!need_resched());
1947
1948	return false;
1949	}
1950
1951	struct request_wait {
1952	struct dma_fence_cb cb;
1953	struct task_struct *tsk;
1954	};
1955
1956	static void request_wait_wake(struct dma_fence fence, struct* dma_fence_cb *cb)
1957	{
1958	struct request_wait wait = container_of(cb, typeof(wait), cb);
1959
1960	wake_up_process(fetch_and_zero(&wait->tsk));
1961	}
1962
1963	/**
1964	* i915_request_wait_timeout - wait until execution of request has finished
1965	* @rq: the request to wait upon
1966	* @flags: how to wait
1967	* @timeout: how long to wait in jiffies
1968	*
1969	* i915_request_wait_timeout() waits for the request to be completed, for a
1970	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
1971	* unbounded wait).
1972	*
1973	* Returns the remaining time (in jiffies) if the request completed, which may
1974	* be zero if the request is unfinished after the timeout expires.
1975	* If the timeout is 0, it will return 1 if the fence is signaled.
1976	*
1977	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
1978	* pending before the request completes.
1979	*
1980	* NOTE: This function has the same wait semantics as dma-fence.
1981	*/
1982	long i915_request_wait_timeout(struct i915_request *rq,
1983	unsigned int flags,
1984	long timeout)
1985	{
1986	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
1987	TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1988	struct request_wait wait;
1989
1990	might_sleep();
1991	GEM_BUG_ON(timeout < `0`);
1992
1993	if (dma_fence_is_signaled(fence: &rq->fence))
1994	return timeout ?: `1`;
1995
1996	if (!timeout)
1997	return -ETIME;
1998
1999	trace_i915_request_wait_begin(rq, flags);
2000
2001	/*
2002	* We must never wait on the GPU while holding a lock as we
2003	* may need to perform a GPU reset. So while we don't need to
2004	* serialise wait/reset with an explicit lock, we do want
2005	* lockdep to detect potential dependency cycles.
2006	*/
2007	mutex_acquire(&rq->engine->gt->reset.mutex.dep_map, `0`, `0`, _THIS_IP_);
2008
2009	/*
2010	* Optimistic spin before touching IRQs.
2011	*
2012	* We may use a rather large value here to offset the penalty of
2013	* switching away from the active task. Frequently, the client will
2014	* wait upon an old swapbuffer to throttle itself to remain within a
2015	* frame of the gpu. If the client is running in lockstep with the gpu,
2016	* then it should not be waiting long at all, and a sleep now will incur
2017	* extra scheduler latency in producing the next frame. To try to
2018	* avoid adding the cost of enabling/disabling the interrupt to the
2019	* short wait, we first spin to see if the request would have completed
2020	* in the time taken to setup the interrupt.
2021	*
2022	* We need upto 5us to enable the irq, and upto 20us to hide the
2023	* scheduler latency of a context switch, ignoring the secondary
2024	* impacts from a context switch such as cache eviction.
2025	*
2026	* The scheme used for low-latency IO is called "hybrid interrupt
2027	* polling". The suggestion there is to sleep until just before you
2028	* expect to be woken by the device interrupt and then poll for its
2029	* completion. That requires having a good predictor for the request
2030	* duration, which we currently lack.
2031	*/
2032	if (CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT &&
2033	__i915_spin_request(rq, state))
2034	goto out;
2035
2036	/*
2037	* This client is about to stall waiting for the GPU. In many cases
2038	* this is undesirable and limits the throughput of the system, as
2039	* many clients cannot continue processing user input/output whilst
2040	* blocked. RPS autotuning may take tens of milliseconds to respond
2041	* to the GPU load and thus incurs additional latency for the client.
2042	* We can circumvent that by promoting the GPU frequency to maximum
2043	* before we sleep. This makes the GPU throttle up much more quickly
2044	* (good for benchmarks and user experience, e.g. window animations),
2045	* but at a cost of spending more power processing the workload
2046	* (bad for battery).
2047	*/
2048	if (flags & I915_WAIT_PRIORITY && !i915_request_started(rq))
2049	intel_rps_boost(rq);
2050
2051	wait.tsk = current;
2052	if (dma_fence_add_callback(fence: &rq->fence, cb: &wait.cb, func: request_wait_wake))
2053	goto out;
2054
2055	/*
2056	* Flush the submission tasklet, but only if it may help this request.
2057	*
2058	* We sometimes experience some latency between the HW interrupts and
2059	* tasklet execution (mostly due to ksoftirqd latency, but it can also
2060	* be due to lazy CS events), so lets run the tasklet manually if there
2061	* is a chance it may submit this request. If the request is not ready
2062	* to run, as it is waiting for other fences to be signaled, flushing
2063	* the tasklet is busy work without any advantage for this client.
2064	*
2065	* If the HW is being lazy, this is the last chance before we go to
2066	* sleep to catch any pending events. We will check periodically in
2067	* the heartbeat to flush the submission tasklets as a last resort
2068	* for unhappy HW.
2069	*/
2070	if (i915_request_is_ready(rq))
2071	__intel_engine_flush_submission(engine: rq->engine, sync: false);
2072
2073	for (;;) {
2074	set_current_state(state);
2075
2076	if (dma_fence_is_signaled(fence: &rq->fence))
2077	break;
2078
2079	if (signal_pending_state(state, current)) {
2080	timeout = -ERESTARTSYS;
2081	break;
2082	}
2083
2084	if (!timeout) {
2085	timeout = -ETIME;
2086	break;
2087	}
2088
2089	timeout = io_schedule_timeout(timeout);
2090	}
2091	__set_current_state(TASK_RUNNING);
2092
2093	if (READ_ONCE(wait.tsk))
2094	dma_fence_remove_callback(fence: &rq->fence, cb: &wait.cb);
2095	GEM_BUG_ON(!list_empty(&wait.cb.node));
2096
2097	out:
2098	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
2099	trace_i915_request_wait_end(rq);
2100	return timeout;
2101	}
2102
2103	/**
2104	* i915_request_wait - wait until execution of request has finished
2105	* @rq: the request to wait upon
2106	* @flags: how to wait
2107	* @timeout: how long to wait in jiffies
2108	*
2109	* i915_request_wait() waits for the request to be completed, for a
2110	* maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
2111	* unbounded wait).
2112	*
2113	* Returns the remaining time (in jiffies) if the request completed, which may
2114	* be zero or -ETIME if the request is unfinished after the timeout expires.
2115	* May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
2116	* pending before the request completes.
2117	*
2118	* NOTE: This function behaves differently from dma-fence wait semantics for
2119	* timeout = 0. It returns 0 on success, and -ETIME if not signaled.
2120	*/
2121	long i915_request_wait(struct i915_request *rq,
2122	unsigned int flags,
2123	long timeout)
2124	{
2125	long ret = i915_request_wait_timeout(rq, flags, timeout);
2126
2127	if (!ret)
2128	return -ETIME;
2129
2130	if (ret > `0` && !timeout)
2131	return `0`;
2132
2133	return ret;
2134	}
2135
2136	static int print_sched_attr(const struct i915_sched_attr *attr,
2137	char buf, int* x, int len)
2138	{
2139	if (attr->priority == I915_PRIORITY_INVALID)
2140	return x;
2141
2142	x += snprintf(buf: buf + x, size: len - x,
2143	fmt: " prio=%d", attr->priority);
2144
2145	return x;
2146	}
2147
2148	static char queue_status(const struct i915_request *rq)
2149	{
2150	if (i915_request_is_active(rq))
2151	return `'E'`;
2152
2153	if (i915_request_is_ready(rq))
2154	return intel_engine_is_virtual(engine: rq->engine) ? `'V'` : `'R'`;
2155
2156	return `'U'`;
2157	}
2158
2159	static const char run_status(const* struct i915_request *rq)
2160	{
2161	if (__i915_request_is_complete(rq))
2162	return "!";
2163
2164	if (__i915_request_has_started(rq))
2165	return "*";
2166
2167	if (!i915_sw_fence_signaled(fence: &rq->semaphore))
2168	return "&";
2169
2170	return "";
2171	}
2172
2173	static const char fence_status(const* struct i915_request *rq)
2174	{
2175	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
2176	return "+";
2177
2178	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
2179	return "-";
2180
2181	return "";
2182	}
2183
2184	void i915_request_show(struct drm_printer *m,
2185	const struct i915_request *rq,
2186	const char *prefix,
2187	int indent)
2188	{
2189	const char __rcu *timeline;
2190	char buf[`80`] = "";
2191	int x = `0`;
2192
2193	/*
2194	* The prefix is used to show the queue status, for which we use
2195	* the following flags:
2196	*
2197	* U [Unready]
2198	* - initial status upon being submitted by the user
2199	*
2200	* - the request is not ready for execution as it is waiting
2201	* for external fences
2202	*
2203	* R [Ready]
2204	* - all fences the request was waiting on have been signaled,
2205	* and the request is now ready for execution and will be
2206	* in a backend queue
2207	*
2208	* - a ready request may still need to wait on semaphores
2209	* [internal fences]
2210	*
2211	* V [Ready/virtual]
2212	* - same as ready, but queued over multiple backends
2213	*
2214	* E [Executing]
2215	* - the request has been transferred from the backend queue and
2216	* submitted for execution on HW
2217	*
2218	* - a completed request may still be regarded as executing, its
2219	* status may not be updated until it is retired and removed
2220	* from the lists
2221	*/
2222
2223	x = print_sched_attr(attr: &rq->sched.attr, buf, x, len: sizeof(buf));
2224
2225	rcu_read_lock();
2226	timeline = dma_fence_timeline_name(fence: (struct dma_fence *)&rq->fence);
2227	drm_printf(p: m, f: "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n",
2228	prefix, indent, " ",
2229	queue_status(rq),
2230	rq->fence.context, rq->fence.seqno,
2231	run_status(rq),
2232	fence_status(rq),
2233	buf,
2234	jiffies_to_msecs(j: jiffies - rq->emitted_jiffies),
2235	rcu_dereference(timeline));
2236	rcu_read_unlock();
2237	}
2238
2239	static bool engine_match_ring(struct intel_engine_cs engine, struct* i915_request *rq)
2240	{
2241	u32 ring = ENGINE_READ(engine, RING_START);
2242
2243	return ring == i915_ggtt_offset(vma: rq->ring->vma);
2244	}
2245
2246	static bool match_ring(struct i915_request *rq)
2247	{
2248	struct intel_engine_cs *engine;
2249	bool found;
2250	int i;
2251
2252	if (!intel_engine_is_virtual(engine: rq->engine))
2253	return engine_match_ring(engine: rq->engine, rq);
2254
2255	found = false;
2256	i = `0`;
2257	while ((engine = intel_engine_get_sibling(engine: rq->engine, sibling: i++))) {
2258	found = engine_match_ring(engine, rq);
2259	if (found)
2260	break;
2261	}
2262
2263	return found;
2264	}
2265
2266	enum i915_request_state i915_test_request_state(struct i915_request *rq)
2267	{
2268	if (i915_request_completed(rq))
2269	return I915_REQUEST_COMPLETE;
2270
2271	if (!i915_request_started(rq))
2272	return I915_REQUEST_PENDING;
2273
2274	if (match_ring(rq))
2275	return I915_REQUEST_ACTIVE;
2276
2277	return I915_REQUEST_QUEUED;
2278	}
2279
2280	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2281	#include "selftests/mock_request.c"
2282	#include "selftests/i915_request.c"
2283	#endif
2284
2285	void i915_request_module_exit(void)
2286	{
2287	kmem_cache_destroy(s: slab_execute_cbs);
2288	kmem_cache_destroy(s: slab_requests);
2289	}
2290
2291	int __init i915_request_module_init(void)
2292	{
2293	slab_requests =
2294	kmem_cache_create("i915_request",
2295	sizeof(struct i915_request),
2296	__alignof__(struct i915_request),
2297	SLAB_HWCACHE_ALIGN \|
2298	SLAB_RECLAIM_ACCOUNT \|
2299	SLAB_TYPESAFE_BY_RCU,
2300	__i915_request_ctor);
2301	if (!slab_requests)
2302	return -ENOMEM;
2303
2304	slab_execute_cbs = KMEM_CACHE(execute_cb,
2305	SLAB_HWCACHE_ALIGN \|
2306	SLAB_RECLAIM_ACCOUNT \|
2307	SLAB_TYPESAFE_BY_RCU);
2308	if (!slab_execute_cbs)
2309	goto err_requests;
2310
2311	return `0`;
2312
2313	err_requests:
2314	kmem_cache_destroy(s: slab_requests);
2315	return -ENOMEM;
2316	}
2317

source code of linux/drivers/gpu/drm/i915/i915_request.c