xe_guc_submit.c source code [linux/drivers/gpu/drm/xe/xe_guc_submit.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2022 Intel Corporation
4	*/
5
6	#include "xe_guc_submit.h"
7
8	#include <linux/bitfield.h>
9	#include <linux/bitmap.h>
10	#include <linux/circ_buf.h>
11	#include <linux/delay.h>
12	#include <linux/dma-fence-array.h>
13	#include <linux/math64.h>
14
15	#include <drm/drm_managed.h>
16
17	#include "abi/guc_actions_abi.h"
18	#include "abi/guc_actions_slpc_abi.h"
19	#include "abi/guc_klvs_abi.h"
20	#include "regs/xe_lrc_layout.h"
21	#include "xe_assert.h"
22	#include "xe_devcoredump.h"
23	#include "xe_device.h"
24	#include "xe_exec_queue.h"
25	#include "xe_force_wake.h"
26	#include "xe_gpu_scheduler.h"
27	#include "xe_gt.h"
28	#include "xe_gt_clock.h"
29	#include "xe_gt_printk.h"
30	#include "xe_guc.h"
31	#include "xe_guc_capture.h"
32	#include "xe_guc_ct.h"
33	#include "xe_guc_exec_queue_types.h"
34	#include "xe_guc_id_mgr.h"
35	#include "xe_guc_klv_helpers.h"
36	#include "xe_guc_submit_types.h"
37	#include "xe_hw_engine.h"
38	#include "xe_hw_fence.h"
39	#include "xe_lrc.h"
40	#include "xe_macros.h"
41	#include "xe_map.h"
42	#include "xe_mocs.h"
43	#include "xe_pm.h"
44	#include "xe_ring_ops_types.h"
45	#include "xe_sched_job.h"
46	#include "xe_trace.h"
47	#include "xe_uc_fw.h"
48	#include "xe_vm.h"
49
50	static struct xe_guc *
51	exec_queue_to_guc(struct xe_exec_queue *q)
52	{
53	return &q->gt->uc.guc;
54	}
55
56	/*
57	* Helpers for engine state, using an atomic as some of the bits can transition
58	* as the same time (e.g. a suspend can be happning at the same time as schedule
59	* engine done being processed).
60	*/
61	#define EXEC_QUEUE_STATE_REGISTERED (1 << 0)
62	#define EXEC_QUEUE_STATE_ENABLED (1 << 1)
63	#define EXEC_QUEUE_STATE_PENDING_ENABLE (1 << 2)
64	#define EXEC_QUEUE_STATE_PENDING_DISABLE (1 << 3)
65	#define EXEC_QUEUE_STATE_DESTROYED (1 << 4)
66	#define EXEC_QUEUE_STATE_SUSPENDED (1 << 5)
67	#define EXEC_QUEUE_STATE_RESET (1 << 6)
68	#define EXEC_QUEUE_STATE_KILLED (1 << 7)
69	#define EXEC_QUEUE_STATE_WEDGED (1 << 8)
70	#define EXEC_QUEUE_STATE_BANNED (1 << 9)
71	#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10)
72	#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
73	#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12)
74	#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13)
75
76	static bool exec_queue_registered(struct xe_exec_queue *q)
77	{
78	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_REGISTERED;
79	}
80
81	static void set_exec_queue_registered(struct xe_exec_queue *q)
82	{
83	atomic_or(EXEC_QUEUE_STATE_REGISTERED, v: &q->guc->state);
84	}
85
86	static void clear_exec_queue_registered(struct xe_exec_queue *q)
87	{
88	atomic_and(i: ~EXEC_QUEUE_STATE_REGISTERED, v: &q->guc->state);
89	}
90
91	static bool exec_queue_enabled(struct xe_exec_queue *q)
92	{
93	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_ENABLED;
94	}
95
96	static void set_exec_queue_enabled(struct xe_exec_queue *q)
97	{
98	atomic_or(EXEC_QUEUE_STATE_ENABLED, v: &q->guc->state);
99	}
100
101	static void clear_exec_queue_enabled(struct xe_exec_queue *q)
102	{
103	atomic_and(i: ~EXEC_QUEUE_STATE_ENABLED, v: &q->guc->state);
104	}
105
106	static bool exec_queue_pending_enable(struct xe_exec_queue *q)
107	{
108	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_PENDING_ENABLE;
109	}
110
111	static void set_exec_queue_pending_enable(struct xe_exec_queue *q)
112	{
113	atomic_or(EXEC_QUEUE_STATE_PENDING_ENABLE, v: &q->guc->state);
114	}
115
116	static void clear_exec_queue_pending_enable(struct xe_exec_queue *q)
117	{
118	atomic_and(i: ~EXEC_QUEUE_STATE_PENDING_ENABLE, v: &q->guc->state);
119	}
120
121	static bool exec_queue_pending_disable(struct xe_exec_queue *q)
122	{
123	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_PENDING_DISABLE;
124	}
125
126	static void set_exec_queue_pending_disable(struct xe_exec_queue *q)
127	{
128	atomic_or(EXEC_QUEUE_STATE_PENDING_DISABLE, v: &q->guc->state);
129	}
130
131	static void clear_exec_queue_pending_disable(struct xe_exec_queue *q)
132	{
133	atomic_and(i: ~EXEC_QUEUE_STATE_PENDING_DISABLE, v: &q->guc->state);
134	}
135
136	static bool exec_queue_destroyed(struct xe_exec_queue *q)
137	{
138	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_DESTROYED;
139	}
140
141	static void set_exec_queue_destroyed(struct xe_exec_queue *q)
142	{
143	atomic_or(EXEC_QUEUE_STATE_DESTROYED, v: &q->guc->state);
144	}
145
146	static void clear_exec_queue_destroyed(struct xe_exec_queue *q)
147	{
148	atomic_and(i: ~EXEC_QUEUE_STATE_DESTROYED, v: &q->guc->state);
149	}
150
151	static bool exec_queue_banned(struct xe_exec_queue *q)
152	{
153	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_BANNED;
154	}
155
156	static void set_exec_queue_banned(struct xe_exec_queue *q)
157	{
158	atomic_or(EXEC_QUEUE_STATE_BANNED, v: &q->guc->state);
159	}
160
161	static bool exec_queue_suspended(struct xe_exec_queue *q)
162	{
163	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_SUSPENDED;
164	}
165
166	static void set_exec_queue_suspended(struct xe_exec_queue *q)
167	{
168	atomic_or(EXEC_QUEUE_STATE_SUSPENDED, v: &q->guc->state);
169	}
170
171	static void clear_exec_queue_suspended(struct xe_exec_queue *q)
172	{
173	atomic_and(i: ~EXEC_QUEUE_STATE_SUSPENDED, v: &q->guc->state);
174	}
175
176	static bool exec_queue_reset(struct xe_exec_queue *q)
177	{
178	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_RESET;
179	}
180
181	static void set_exec_queue_reset(struct xe_exec_queue *q)
182	{
183	atomic_or(EXEC_QUEUE_STATE_RESET, v: &q->guc->state);
184	}
185
186	static bool exec_queue_killed(struct xe_exec_queue *q)
187	{
188	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_KILLED;
189	}
190
191	static void set_exec_queue_killed(struct xe_exec_queue *q)
192	{
193	atomic_or(EXEC_QUEUE_STATE_KILLED, v: &q->guc->state);
194	}
195
196	static bool exec_queue_wedged(struct xe_exec_queue *q)
197	{
198	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_WEDGED;
199	}
200
201	static void set_exec_queue_wedged(struct xe_exec_queue *q)
202	{
203	atomic_or(EXEC_QUEUE_STATE_WEDGED, v: &q->guc->state);
204	}
205
206	static bool exec_queue_check_timeout(struct xe_exec_queue *q)
207	{
208	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_CHECK_TIMEOUT;
209	}
210
211	static void set_exec_queue_check_timeout(struct xe_exec_queue *q)
212	{
213	atomic_or(EXEC_QUEUE_STATE_CHECK_TIMEOUT, v: &q->guc->state);
214	}
215
216	static void clear_exec_queue_check_timeout(struct xe_exec_queue *q)
217	{
218	atomic_and(i: ~EXEC_QUEUE_STATE_CHECK_TIMEOUT, v: &q->guc->state);
219	}
220
221	static bool exec_queue_extra_ref(struct xe_exec_queue *q)
222	{
223	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_EXTRA_REF;
224	}
225
226	static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
227	{
228	atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, v: &q->guc->state);
229	}
230
231	static void clear_exec_queue_extra_ref(struct xe_exec_queue *q)
232	{
233	atomic_and(i: ~EXEC_QUEUE_STATE_EXTRA_REF, v: &q->guc->state);
234	}
235
236	static bool exec_queue_pending_resume(struct xe_exec_queue *q)
237	{
238	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
239	}
240
241	static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
242	{
243	atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, v: &q->guc->state);
244	}
245
246	static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
247	{
248	atomic_and(i: ~EXEC_QUEUE_STATE_PENDING_RESUME, v: &q->guc->state);
249	}
250
251	static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
252	{
253	return atomic_read(v: &q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT;
254	}
255
256	static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
257	{
258	atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, v: &q->guc->state);
259	}
260
261	static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
262	{
263	atomic_and(i: ~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, v: &q->guc->state);
264	}
265
266	static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
267	{
268	return (atomic_read(v: &q->guc->state) &
269	(EXEC_QUEUE_STATE_WEDGED \| EXEC_QUEUE_STATE_KILLED \|
270	EXEC_QUEUE_STATE_BANNED));
271	}
272
273	static void guc_submit_fini(struct drm_device drm, void* *arg)
274	{
275	struct xe_guc *guc = arg;
276	struct xe_device *xe = guc_to_xe(guc);
277	struct xe_gt *gt = guc_to_gt(guc);
278	int ret;
279
280	ret = wait_event_timeout(guc->submission_state.fini_wq,
281	xa_empty(&guc->submission_state.exec_queue_lookup),
282	HZ * `5`);
283
284	drain_workqueue(wq: xe->destroy_wq);
285
286	xe_gt_assert(gt, ret);
287
288	xa_destroy(&guc->submission_state.exec_queue_lookup);
289	}
290
291	static void guc_submit_wedged_fini(void *arg)
292	{
293	struct xe_guc *guc = arg;
294	struct xe_exec_queue *q;
295	unsigned long index;
296
297	mutex_lock(&guc->submission_state.lock);
298	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
299	if (exec_queue_wedged(q)) {
300	mutex_unlock(lock: &guc->submission_state.lock);
301	xe_exec_queue_put(q);
302	mutex_lock(&guc->submission_state.lock);
303	}
304	}
305	mutex_unlock(lock: &guc->submission_state.lock);
306	}
307
308	static const struct xe_exec_queue_ops guc_exec_queue_ops;
309
310	static void primelockdep(struct xe_guc *guc)
311	{
312	if (!IS_ENABLED(CONFIG_LOCKDEP))
313	return;
314
315	fs_reclaim_acquire(GFP_KERNEL);
316
317	mutex_lock(&guc->submission_state.lock);
318	mutex_unlock(lock: &guc->submission_state.lock);
319
320	fs_reclaim_release(GFP_KERNEL);
321	}
322
323	/**
324	* xe_guc_submit_init() - Initialize GuC submission.
325	* @guc: the &xe_guc to initialize
326	* @num_ids: number of GuC context IDs to use
327	*
328	* The bare-metal or PF driver can pass ~0 as &num_ids to indicate that all
329	* GuC context IDs supported by the GuC firmware should be used for submission.
330	*
331	* Only VF drivers will have to provide explicit number of GuC context IDs
332	* that they can use for submission.
333	*
334	* Return: 0 on success or a negative error code on failure.
335	*/
336	int xe_guc_submit_init(struct xe_guc guc, unsigned* int num_ids)
337	{
338	struct xe_device *xe = guc_to_xe(guc);
339	struct xe_gt *gt = guc_to_gt(guc);
340	int err;
341
342	err = drmm_mutex_init(&xe->drm, &guc->submission_state.lock);
343	if (err)
344	return err;
345
346	err = xe_guc_id_mgr_init(idm: &guc->submission_state.idm, count: num_ids);
347	if (err)
348	return err;
349
350	gt->exec_queue_ops = &guc_exec_queue_ops;
351
352	xa_init(xa: &guc->submission_state.exec_queue_lookup);
353
354	init_waitqueue_head(&guc->submission_state.fini_wq);
355
356	primelockdep(guc);
357
358	guc->submission_state.initialized = true;
359
360	return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
361	}
362
363	/*
364	* Given that we want to guarantee enough RCS throughput to avoid missing
365	* frames, we set the yield policy to 20% of each 80ms interval.
366	*/
367	#define RC_YIELD_DURATION 80 /* in ms */
368	#define RC_YIELD_RATIO 20 /* in percent */
369	static u32 emit_render_compute_yield_klv(u32 emit)
370	{
371	*emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
372	*emit++ = RC_YIELD_DURATION;
373	*emit++ = RC_YIELD_RATIO;
374
375	return emit;
376	}
377
378	#define SCHEDULING_POLICY_MAX_DWORDS 16
379	static int guc_init_global_schedule_policy(struct xe_guc *guc)
380	{
381	u32 data[SCHEDULING_POLICY_MAX_DWORDS];
382	u32 *emit = data;
383	u32 count = `0`;
384	int ret;
385
386	if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(`1`, `1`, `0`))
387	return `0`;
388
389	*emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
390
391	if (CCS_MASK(guc_to_gt(guc)))
392	emit = emit_render_compute_yield_klv(emit);
393
394	count = emit - data;
395	if (count > `1`) {
396	xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
397
398	ret = xe_guc_ct_send_block(ct: &guc->ct, action: data, len: count);
399	if (ret < `0`) {
400	xe_gt_err(guc_to_gt(guc),
401	"failed to enable GuC scheduling policies: %pe\n",
402	ERR_PTR(ret));
403	return ret;
404	}
405	}
406
407	return `0`;
408	}
409
410	int xe_guc_submit_enable(struct xe_guc *guc)
411	{
412	int ret;
413
414	ret = guc_init_global_schedule_policy(guc);
415	if (ret)
416	return ret;
417
418	guc->submission_state.enabled = true;
419
420	return `0`;
421	}
422
423	void xe_guc_submit_disable(struct xe_guc *guc)
424	{
425	guc->submission_state.enabled = false;
426	}
427
428	static void __release_guc_id(struct xe_guc guc, struct* xe_exec_queue *q, u32 xa_count)
429	{
430	int i;
431
432	lockdep_assert_held(&guc->submission_state.lock);
433
434	for (i = `0`; i < xa_count; ++i)
435	xa_erase(&guc->submission_state.exec_queue_lookup, index: q->guc->id + i);
436
437	xe_guc_id_mgr_release_locked(idm: &guc->submission_state.idm,
438	id: q->guc->id, count: q->width);
439
440	if (xa_empty(xa: &guc->submission_state.exec_queue_lookup))
441	wake_up(&guc->submission_state.fini_wq);
442	}
443
444	static int alloc_guc_id(struct xe_guc guc, struct* xe_exec_queue *q)
445	{
446	int ret;
447	int i;
448
449	/*
450	* Must use GFP_NOWAIT as this lock is in the dma fence signalling path,
451	* worse case user gets -ENOMEM on engine create and has to try again.
452	*
453	* FIXME: Have caller pre-alloc or post-alloc /w GFP_KERNEL to prevent
454	* failure.
455	*/
456	lockdep_assert_held(&guc->submission_state.lock);
457
458	ret = xe_guc_id_mgr_reserve_locked(idm: &guc->submission_state.idm,
459	count: q->width);
460	if (ret < `0`)
461	return ret;
462
463	q->guc->id = ret;
464
465	for (i = `0`; i < q->width; ++i) {
466	ret = xa_err(entry: xa_store(&guc->submission_state.exec_queue_lookup,
467	index: q->guc->id + i, entry: q, GFP_NOWAIT));
468	if (ret)
469	goto err_release;
470	}
471
472	return `0`;
473
474	err_release:
475	__release_guc_id(guc, q, xa_count: i);
476
477	return ret;
478	}
479
480	static void release_guc_id(struct xe_guc guc, struct* xe_exec_queue *q)
481	{
482	mutex_lock(&guc->submission_state.lock);
483	__release_guc_id(guc, q, xa_count: q->width);
484	mutex_unlock(lock: &guc->submission_state.lock);
485	}
486
487	struct exec_queue_policy {
488	u32 count;
489	struct guc_update_exec_queue_policy h2g;
490	};
491
492	static u32 __guc_exec_queue_policy_action_size(struct exec_queue_policy *policy)
493	{
494	size_t bytes = sizeof(policy->h2g.header) +
495	(sizeof(policy->h2g.klv[`0`]) * policy->count);
496
497	return bytes / sizeof(u32);
498	}
499
500	static void __guc_exec_queue_policy_start_klv(struct exec_queue_policy *policy,
501	u16 guc_id)
502	{
503	policy->h2g.header.action =
504	XE_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES;
505	policy->h2g.header.guc_id = guc_id;
506	policy->count = `0`;
507	}
508
509	#define MAKE_EXEC_QUEUE_POLICY_ADD(func, id) \
510	static void __guc_exec_queue_policy_add_##func(struct exec_queue_policy *policy, \
511	u32 data) \
512	{ \
513	XE_WARN_ON(policy->count >= GUC_CONTEXT_POLICIES_KLV_NUM_IDS); \
514	\
515	policy->h2g.klv[policy->count].kl = \
516	FIELD_PREP(GUC_KLV_0_KEY, \
517	GUC_CONTEXT_POLICIES_KLV_ID_##id) \| \
518	FIELD_PREP(GUC_KLV_0_LEN, 1); \
519	policy->h2g.klv[policy->count].value = data; \
520	policy->count++; \
521	}
522
523	MAKE_EXEC_QUEUE_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
524	MAKE_EXEC_QUEUE_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
525	MAKE_EXEC_QUEUE_POLICY_ADD(priority, SCHEDULING_PRIORITY)
526	MAKE_EXEC_QUEUE_POLICY_ADD(slpc_exec_queue_freq_req, SLPM_GT_FREQUENCY)
527	#undef MAKE_EXEC_QUEUE_POLICY_ADD
528
529	static const int xe_exec_queue_prio_to_guc[] = {
530	[XE_EXEC_QUEUE_PRIORITY_LOW] = GUC_CLIENT_PRIORITY_NORMAL,
531	[XE_EXEC_QUEUE_PRIORITY_NORMAL] = GUC_CLIENT_PRIORITY_KMD_NORMAL,
532	[XE_EXEC_QUEUE_PRIORITY_HIGH] = GUC_CLIENT_PRIORITY_HIGH,
533	[XE_EXEC_QUEUE_PRIORITY_KERNEL] = GUC_CLIENT_PRIORITY_KMD_HIGH,
534	};
535
536	static void init_policies(struct xe_guc guc, struct* xe_exec_queue *q)
537	{
538	struct exec_queue_policy policy;
539	enum xe_exec_queue_priority prio = q->sched_props.priority;
540	u32 timeslice_us = q->sched_props.timeslice_us;
541	u32 slpc_exec_queue_freq_req = `0`;
542	u32 preempt_timeout_us = q->sched_props.preempt_timeout_us;
543
544	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
545
546	if (q->flags & EXEC_QUEUE_FLAG_LOW_LATENCY)
547	slpc_exec_queue_freq_req \|= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
548
549	__guc_exec_queue_policy_start_klv(policy: &policy, guc_id: q->guc->id);
550	__guc_exec_queue_policy_add_priority(policy: &policy, data: xe_exec_queue_prio_to_guc[prio]);
551	__guc_exec_queue_policy_add_execution_quantum(policy: &policy, data: timeslice_us);
552	__guc_exec_queue_policy_add_preemption_timeout(policy: &policy, data: preempt_timeout_us);
553	__guc_exec_queue_policy_add_slpc_exec_queue_freq_req(policy: &policy,
554	data: slpc_exec_queue_freq_req);
555
556	xe_guc_ct_send(ct: &guc->ct, action: (u32 *)&policy.h2g,
557	len: __guc_exec_queue_policy_action_size(policy: &policy), g2h_len: `0`, num_g2h: `0`);
558	}
559
560	static void set_min_preemption_timeout(struct xe_guc guc, struct* xe_exec_queue *q)
561	{
562	struct exec_queue_policy policy;
563
564	__guc_exec_queue_policy_start_klv(policy: &policy, guc_id: q->guc->id);
565	__guc_exec_queue_policy_add_preemption_timeout(policy: &policy, data: `1`);
566
567	xe_guc_ct_send(ct: &guc->ct, action: (u32 *)&policy.h2g,
568	len: __guc_exec_queue_policy_action_size(policy: &policy), g2h_len: `0`, num_g2h: `0`);
569	}
570
571	#define parallel_read(xe_, map_, field_) \
572	xe_map_rd_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
573	field_)
574	#define parallel_write(xe_, map_, field_, val_) \
575	xe_map_wr_field(xe_, &map_, 0, struct guc_submit_parallel_scratch, \
576	field_, val_)
577
578	static void __register_mlrc_exec_queue(struct xe_guc *guc,
579	struct xe_exec_queue *q,
580	struct guc_ctxt_registration_info *info)
581	{
582	#define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2)
583	u32 action[MAX_MLRC_REG_SIZE];
584	int len = `0`;
585	int i;
586
587	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q));
588
589	action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
590	action[len++] = info->flags;
591	action[len++] = info->context_idx;
592	action[len++] = info->engine_class;
593	action[len++] = info->engine_submit_mask;
594	action[len++] = info->wq_desc_lo;
595	action[len++] = info->wq_desc_hi;
596	action[len++] = info->wq_base_lo;
597	action[len++] = info->wq_base_hi;
598	action[len++] = info->wq_size;
599	action[len++] = q->width;
600	action[len++] = info->hwlrca_lo;
601	action[len++] = info->hwlrca_hi;
602
603	for (i = `1`; i < q->width; ++i) {
604	struct xe_lrc *lrc = q->lrc[i];
605
606	action[len++] = lower_32_bits(xe_lrc_descriptor(lrc));
607	action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
608	}
609
610	/ explicitly checks some fields that we might fixup later /
611	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
612	action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
613	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
614	action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
615	xe_gt_assert(guc_to_gt(guc), q->width ==
616	action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
617	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
618	action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
619	xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
620	#undef MAX_MLRC_REG_SIZE
621
622	xe_guc_ct_send(ct: &guc->ct, action, len, g2h_len: `0`, num_g2h: `0`);
623	}
624
625	static void __register_exec_queue(struct xe_guc *guc,
626	struct guc_ctxt_registration_info *info)
627	{
628	u32 action[] = {
629	XE_GUC_ACTION_REGISTER_CONTEXT,
630	info->flags,
631	info->context_idx,
632	info->engine_class,
633	info->engine_submit_mask,
634	info->wq_desc_lo,
635	info->wq_desc_hi,
636	info->wq_base_lo,
637	info->wq_base_hi,
638	info->wq_size,
639	info->hwlrca_lo,
640	info->hwlrca_hi,
641	};
642
643	/ explicitly checks some fields that we might fixup later /
644	xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
645	action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
646	xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
647	action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
648	xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
649	action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
650
651	xe_guc_ct_send(ct: &guc->ct, action, ARRAY_SIZE(action), g2h_len: `0`, num_g2h: `0`);
652	}
653
654	static void register_exec_queue(struct xe_exec_queue q, int* ctx_type)
655	{
656	struct xe_guc *guc = exec_queue_to_guc(q);
657	struct xe_device *xe = guc_to_xe(guc);
658	struct xe_lrc *lrc = q->lrc[`0`];
659	struct guc_ctxt_registration_info info;
660
661	xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
662	xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
663
664	memset(&info, `0`, sizeof(info));
665	info.context_idx = q->guc->id;
666	info.engine_class = xe_engine_class_to_guc_class(class: q->class);
667	info.engine_submit_mask = q->logical_mask;
668	info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
669	info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
670	info.flags = CONTEXT_REGISTRATION_FLAG_KMD \|
671	FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
672
673	if (xe_exec_queue_is_parallel(q)) {
674	u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
675	struct iosys_map map = xe_lrc_parallel_map(lrc);
676
677	info.wq_desc_lo = lower_32_bits(ggtt_addr +
678	offsetof(struct guc_submit_parallel_scratch, wq_desc));
679	info.wq_desc_hi = upper_32_bits(ggtt_addr +
680	offsetof(struct guc_submit_parallel_scratch, wq_desc));
681	info.wq_base_lo = lower_32_bits(ggtt_addr +
682	offsetof(struct guc_submit_parallel_scratch, wq[`0`]));
683	info.wq_base_hi = upper_32_bits(ggtt_addr +
684	offsetof(struct guc_submit_parallel_scratch, wq[`0`]));
685	info.wq_size = WQ_SIZE;
686
687	q->guc->wqi_head = `0`;
688	q->guc->wqi_tail = `0`;
689	xe_map_memset(xe, dst: &map, offset: `0`, value: `0`, PARALLEL_SCRATCH_SIZE - WQ_SIZE);
690	parallel_write(xe, map, wq_desc.wq_status, WQ_STATUS_ACTIVE);
691	}
692
693	/*
694	* We must keep a reference for LR engines if engine is registered with
695	* the GuC as jobs signal immediately and can't destroy an engine if the
696	* GuC has a reference to it.
697	*/
698	if (xe_exec_queue_is_lr(q))
699	xe_exec_queue_get(q);
700
701	set_exec_queue_registered(q);
702	trace_xe_exec_queue_register(q);
703	if (xe_exec_queue_is_parallel(q))
704	__register_mlrc_exec_queue(guc, q, info: &info);
705	else
706	__register_exec_queue(guc, info: &info);
707	init_policies(guc, q);
708	}
709
710	static u32 wq_space_until_wrap(struct xe_exec_queue *q)
711	{
712	return (WQ_SIZE - q->guc->wqi_tail);
713	}
714
715	static bool vf_recovery(struct xe_guc *guc)
716	{
717	return xe_gt_recovery_pending(gt: guc_to_gt(guc));
718	}
719
720	static inline void relaxed_ms_sleep(unsigned int delay_ms)
721	{
722	unsigned long min_us, max_us;
723
724	if (!delay_ms)
725	return;
726
727	if (delay_ms > `20`) {
728	msleep(msecs: delay_ms);
729	return;
730	}
731
732	min_us = mul_u32_u32(a: delay_ms, b: `1000`);
733	max_us = min_us + `500`;
734
735	usleep_range(min: min_us, max: max_us);
736	}
737
738	static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
739	{
740	struct xe_guc *guc = exec_queue_to_guc(q);
741	struct xe_device *xe = guc_to_xe(guc);
742	struct iosys_map map = xe_lrc_parallel_map(lrc: q->lrc[`0`]);
743	unsigned int sleep_period_ms = `1`, sleep_total_ms = `0`;
744
745	#define AVAILABLE_SPACE \
746	CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
747	if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
748	try_again:
749	q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
750	if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
751	if (sleep_total_ms > `2000`) {
752	xe_gt_reset_async(gt: q->gt);
753	return -ENODEV;
754	}
755
756	msleep(msecs: sleep_period_ms);
757	sleep_total_ms += sleep_period_ms;
758	if (sleep_period_ms < `64`)
759	sleep_period_ms <<= `1`;
760	goto try_again;
761	}
762	}
763	#undef AVAILABLE_SPACE
764
765	return `0`;
766	}
767
768	static int wq_noop_append(struct xe_exec_queue *q)
769	{
770	struct xe_guc *guc = exec_queue_to_guc(q);
771	struct xe_device *xe = guc_to_xe(guc);
772	struct iosys_map map = xe_lrc_parallel_map(lrc: q->lrc[`0`]);
773	u32 len_dw = wq_space_until_wrap(q) / sizeof(u32) - `1`;
774
775	if (wq_wait_for_space(q, wqi_size: wq_space_until_wrap(q)))
776	return -ENODEV;
777
778	xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw));
779
780	parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)],
781	FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) \|
782	FIELD_PREP(WQ_LEN_MASK, len_dw));
783	q->guc->wqi_tail = `0`;
784
785	return `0`;
786	}
787
788	static void wq_item_append(struct xe_exec_queue *q)
789	{
790	struct xe_guc *guc = exec_queue_to_guc(q);
791	struct xe_device *xe = guc_to_xe(guc);
792	struct iosys_map map = xe_lrc_parallel_map(lrc: q->lrc[`0`]);
793	#define WQ_HEADER_SIZE 4 /* Includes 1 LRC address too */
794	u32 wqi[XE_HW_ENGINE_MAX_INSTANCE + (WQ_HEADER_SIZE - `1`)];
795	u32 wqi_size = (q->width + (WQ_HEADER_SIZE - `1`)) * sizeof(u32);
796	u32 len_dw = (wqi_size / sizeof(u32)) - `1`;
797	int i = `0`, j;
798
799	if (wqi_size > wq_space_until_wrap(q)) {
800	if (wq_noop_append(q))
801	return;
802	}
803	if (wq_wait_for_space(q, wqi_size))
804	return;
805
806	wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) \|
807	FIELD_PREP(WQ_LEN_MASK, len_dw);
808	wqi[i++] = xe_lrc_descriptor(lrc: q->lrc[`0`]);
809	wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) \|
810	FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[`0`]->ring.tail / sizeof(u64));
811	wqi[i++] = `0`;
812	for (j = `1`; j < q->width; ++j) {
813	struct xe_lrc *lrc = q->lrc[j];
814
815	wqi[i++] = lrc->ring.tail / sizeof(u64);
816	}
817
818	xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32));
819
820	iosys_map_incr(map: &map, offsetof(struct guc_submit_parallel_scratch,
821	wq[q->guc->wqi_tail / sizeof(u32)]));
822	xe_map_memcpy_to(xe, dst: &map, dst_offset: `0`, src: wqi, len: wqi_size);
823	q->guc->wqi_tail += wqi_size;
824	xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE);
825
826	xe_device_wmb(xe);
827
828	map = xe_lrc_parallel_map(lrc: q->lrc[`0`]);
829	parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
830	}
831
832	#define RESUME_PENDING ~0x0ull
833	static void submit_exec_queue(struct xe_exec_queue q, struct* xe_sched_job *job)
834	{
835	struct xe_guc *guc = exec_queue_to_guc(q);
836	struct xe_lrc *lrc = q->lrc[`0`];
837	u32 action[`3`];
838	u32 g2h_len = `0`;
839	u32 num_g2h = `0`;
840	int len = `0`;
841	bool extra_submit = false;
842
843	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
844
845	if (!job->restore_replay \|\| job->last_replay) {
846	if (xe_exec_queue_is_parallel(q))
847	wq_item_append(q);
848	else
849	xe_lrc_set_ring_tail(lrc, tail: lrc->ring.tail);
850	job->last_replay = false;
851	}
852
853	if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
854	return;
855
856	if (!exec_queue_enabled(q) && !exec_queue_suspended(q)) {
857	action[len++] = XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
858	action[len++] = q->guc->id;
859	action[len++] = GUC_CONTEXT_ENABLE;
860	g2h_len = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET;
861	num_g2h = `1`;
862	if (xe_exec_queue_is_parallel(q))
863	extra_submit = true;
864
865	q->guc->resume_time = RESUME_PENDING;
866	set_exec_queue_pending_enable(q);
867	set_exec_queue_enabled(q);
868	trace_xe_exec_queue_scheduling_enable(q);
869	} else {
870	action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
871	action[len++] = q->guc->id;
872	trace_xe_exec_queue_submit(q);
873	}
874
875	xe_guc_ct_send(ct: &guc->ct, action, len, g2h_len, num_g2h);
876
877	if (extra_submit) {
878	len = `0`;
879	action[len++] = XE_GUC_ACTION_SCHED_CONTEXT;
880	action[len++] = q->guc->id;
881	trace_xe_exec_queue_submit(q);
882
883	xe_guc_ct_send(ct: &guc->ct, action, len, g2h_len: `0`, num_g2h: `0`);
884	}
885	}
886
887	static struct dma_fence *
888	guc_exec_queue_run_job(struct drm_sched_job *drm_job)
889	{
890	struct xe_sched_job *job = to_xe_sched_job(drm: drm_job);
891	struct xe_exec_queue *q = job->q;
892	struct xe_guc *guc = exec_queue_to_guc(q);
893	bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged =
894	exec_queue_killed_or_banned_or_wedged(q);
895
896	xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) \|\| exec_queue_pending_disable(q)) \|\|
897	exec_queue_banned(q) \|\| exec_queue_suspended(q));
898
899	trace_xe_sched_job_run(job);
900
901	if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
902	if (!exec_queue_registered(q))
903	register_exec_queue(q, GUC_CONTEXT_NORMAL);
904	if (!job->restore_replay)
905	q->ring_ops->emit_job(job);
906	submit_exec_queue(q, job);
907	job->restore_replay = false;
908	}
909
910	/*
911	* We don't care about job-fence ordering in LR VMs because these fences
912	* are never exported; they are used solely to keep jobs on the pending
913	* list. Once a queue enters an error state, there's no need to track
914	* them.
915	*/
916	if (killed_or_banned_or_wedged && lr)
917	xe_sched_job_set_error(job, error: -ECANCELED);
918
919	return job->fence;
920	}
921
922	static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
923	{
924	struct xe_sched_job *job = to_xe_sched_job(drm: drm_job);
925
926	trace_xe_sched_job_free(job);
927	xe_sched_job_put(job);
928	}
929
930	int xe_guc_read_stopped(struct xe_guc *guc)
931	{
932	return atomic_read(v: &guc->submission_state.stopped);
933	}
934
935	#define MAKE_SCHED_CONTEXT_ACTION(q, enable_disable) \
936	u32 action[] = { \
937	XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET, \
938	q->guc->id, \
939	GUC_CONTEXT_##enable_disable, \
940	}
941
942	static void disable_scheduling_deregister(struct xe_guc *guc,
943	struct xe_exec_queue *q)
944	{
945	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
946	int ret;
947
948	set_min_preemption_timeout(guc, q);
949	smp_rmb();
950	ret = wait_event_timeout(guc->ct.wq,
951	(!exec_queue_pending_enable(q) &&
952	!exec_queue_pending_disable(q)) \|\|
953	xe_guc_read_stopped(guc) \|\|
954	vf_recovery(guc),
955	HZ * `5`);
956	if (!ret && !vf_recovery(guc)) {
957	struct xe_gpu_scheduler *sched = &q->guc->sched;
958
959	xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
960	xe_sched_submission_start(sched);
961	xe_gt_reset_async(gt: q->gt);
962	if (!xe_exec_queue_is_lr(q))
963	xe_sched_tdr_queue_imm(sched);
964	return;
965	}
966
967	clear_exec_queue_enabled(q);
968	set_exec_queue_pending_disable(q);
969	set_exec_queue_destroyed(q);
970	trace_xe_exec_queue_scheduling_disable(q);
971
972	/*
973	* Reserve space for both G2H here as the 2nd G2H is sent from a G2H
974	* handler and we are not allowed to reserved G2H space in handlers.
975	*/
976	xe_guc_ct_send(ct: &guc->ct, action, ARRAY_SIZE(action),
977	G2H_LEN_DW_SCHED_CONTEXT_MODE_SET +
978	G2H_LEN_DW_DEREGISTER_CONTEXT, num_g2h: `2`);
979	}
980
981	static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q)
982	{
983	struct xe_guc *guc = exec_queue_to_guc(q);
984	struct xe_device *xe = guc_to_xe(guc);
985
986	/* to wakeup xe_wait_user_fence ioctl if exec queue is reset /
987	wake_up_all(&xe->ufence_wq);
988
989	if (xe_exec_queue_is_lr(q))
990	queue_work(wq: guc_to_gt(guc)->ordered_wq, work: &q->guc->lr_tdr);
991	else
992	xe_sched_tdr_queue_imm(sched: &q->guc->sched);
993	}
994
995	/**
996	* xe_guc_submit_wedge() - Wedge GuC submission
997	* @guc: the GuC object
998	*
999	* Save exec queue's registered with GuC state by taking a ref to each queue.
1000	* Register a DRMM handler to drop refs upon driver unload.
1001	*/
1002	void xe_guc_submit_wedge(struct xe_guc *guc)
1003	{
1004	struct xe_gt *gt = guc_to_gt(guc);
1005	struct xe_exec_queue *q;
1006	unsigned long index;
1007	int err;
1008
1009	xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
1010
1011	/*
1012	* If device is being wedged even before submission_state is
1013	* initialized, there's nothing to do here.
1014	*/
1015	if (!guc->submission_state.initialized)
1016	return;
1017
1018	err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
1019	guc_submit_wedged_fini, guc);
1020	if (err) {
1021	xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; "
1022	"Although device is wedged.\n");
1023	return;
1024	}
1025
1026	mutex_lock(&guc->submission_state.lock);
1027	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
1028	if (xe_exec_queue_get_unless_zero(q))
1029	set_exec_queue_wedged(q);
1030	mutex_unlock(lock: &guc->submission_state.lock);
1031	}
1032
1033	static bool guc_submit_hint_wedged(struct xe_guc *guc)
1034	{
1035	struct xe_device *xe = guc_to_xe(guc);
1036
1037	if (xe->wedged.mode != `2`)
1038	return false;
1039
1040	if (xe_device_wedged(xe))
1041	return true;
1042
1043	xe_device_declare_wedged(xe);
1044
1045	return true;
1046	}
1047
1048	static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
1049	{
1050	struct xe_guc_exec_queue *ge =
1051	container_of(w, struct xe_guc_exec_queue, lr_tdr);
1052	struct xe_exec_queue *q = ge->q;
1053	struct xe_guc *guc = exec_queue_to_guc(q);
1054	struct xe_gpu_scheduler *sched = &ge->sched;
1055	struct xe_sched_job *job;
1056	bool wedged = false;
1057
1058	xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
1059
1060	if (vf_recovery(guc))
1061	return;
1062
1063	trace_xe_exec_queue_lr_cleanup(q);
1064
1065	if (!exec_queue_killed(q))
1066	wedged = guc_submit_hint_wedged(guc: exec_queue_to_guc(q));
1067
1068	/ Kill the run_job / process_msg entry points /
1069	xe_sched_submission_stop(sched);
1070
1071	/*
1072	* Engine state now mostly stable, disable scheduling / deregister if
1073	* needed. This cleanup routine might be called multiple times, where
1074	* the actual async engine deregister drops the final engine ref.
1075	* Calling disable_scheduling_deregister will mark the engine as
1076	* destroyed and fire off the CT requests to disable scheduling /
1077	* deregister, which we only want to do once. We also don't want to mark
1078	* the engine as pending_disable again as this may race with the
1079	* xe_guc_deregister_done_handler() which treats it as an unexpected
1080	* state.
1081	*/
1082	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1083	struct xe_guc *guc = exec_queue_to_guc(q);
1084	int ret;
1085
1086	set_exec_queue_banned(q);
1087	disable_scheduling_deregister(guc, q);
1088
1089	/*
1090	* Must wait for scheduling to be disabled before signalling
1091	* any fences, if GT broken the GT reset code should signal us.
1092	*/
1093	ret = wait_event_timeout(guc->ct.wq,
1094	!exec_queue_pending_disable(q) \|\|
1095	xe_guc_read_stopped(guc) \|\|
1096	vf_recovery(guc), HZ * `5`);
1097	if (vf_recovery(guc))
1098	return;
1099
1100	if (!ret) {
1101	xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
1102	q->guc->id);
1103	xe_devcoredump(q, NULL, fmt: "Schedule disable failed to respond, guc_id=%d\n",
1104	q->guc->id);
1105	xe_sched_submission_start(sched);
1106	xe_gt_reset_async(gt: q->gt);
1107	return;
1108	}
1109	}
1110
1111	if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(lrc: q->lrc[`0`]))
1112	xe_devcoredump(q, NULL, fmt: "LR job cleanup, guc_id=%d", q->guc->id);
1113
1114	xe_hw_fence_irq_stop(irq: q->fence_irq);
1115
1116	xe_sched_submission_start(sched);
1117
1118	spin_lock(lock: &sched->base.job_list_lock);
1119	list_for_each_entry(job, &sched->base.pending_list, drm.list)
1120	xe_sched_job_set_error(job, error: -ECANCELED);
1121	spin_unlock(lock: &sched->base.job_list_lock);
1122
1123	xe_hw_fence_irq_start(irq: q->fence_irq);
1124	}
1125
1126	#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100)
1127
1128	static bool check_timeout(struct xe_exec_queue q, struct* xe_sched_job *job)
1129	{
1130	struct xe_gt *gt = guc_to_gt(guc: exec_queue_to_guc(q));
1131	u32 ctx_timestamp, ctx_job_timestamp;
1132	u32 timeout_ms = q->sched_props.job_timeout_ms;
1133	u32 diff;
1134	u64 running_time_ms;
1135
1136	if (!xe_sched_job_started(job)) {
1137	xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
1138	xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1139	q->guc->id);
1140
1141	return xe_sched_invalidate_job(job, threshold: `2`);
1142	}
1143
1144	ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[`0`]));
1145	ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc: q->lrc[`0`]);
1146
1147	/*
1148	* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
1149	* possible overflows with a high timeout.
1150	*/
1151	xe_gt_assert(gt, timeout_ms < `100` * MSEC_PER_SEC);
1152
1153	diff = ctx_timestamp - ctx_job_timestamp;
1154
1155	/*
1156	* Ensure timeout is within 5% to account for an GuC scheduling latency
1157	*/
1158	running_time_ms =
1159	ADJUST_FIVE_PERCENT(xe_gt_clock_interval_to_ms(gt, diff));
1160
1161	xe_gt_dbg(gt,
1162	"Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, running_time_ms=%llu, timeout_ms=%u, diff=0x%08x",
1163	xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1164	q->guc->id, running_time_ms, timeout_ms, diff);
1165
1166	return running_time_ms >= timeout_ms;
1167	}
1168
1169	static void enable_scheduling(struct xe_exec_queue *q)
1170	{
1171	MAKE_SCHED_CONTEXT_ACTION(q, ENABLE);
1172	struct xe_guc *guc = exec_queue_to_guc(q);
1173	int ret;
1174
1175	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1176	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1177	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1178	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1179
1180	set_exec_queue_pending_enable(q);
1181	set_exec_queue_enabled(q);
1182	trace_xe_exec_queue_scheduling_enable(q);
1183
1184	xe_guc_ct_send(ct: &guc->ct, action, ARRAY_SIZE(action),
1185	G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, num_g2h: `1`);
1186
1187	ret = wait_event_timeout(guc->ct.wq,
1188	!exec_queue_pending_enable(q) \|\|
1189	xe_guc_read_stopped(guc) \|\|
1190	vf_recovery(guc), HZ * `5`);
1191	if ((!ret && !vf_recovery(guc)) \|\| xe_guc_read_stopped(guc)) {
1192	xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
1193	set_exec_queue_banned(q);
1194	xe_gt_reset_async(gt: q->gt);
1195	if (!xe_exec_queue_is_lr(q))
1196	xe_sched_tdr_queue_imm(sched: &q->guc->sched);
1197	}
1198	}
1199
1200	static void disable_scheduling(struct xe_exec_queue *q, bool immediate)
1201	{
1202	MAKE_SCHED_CONTEXT_ACTION(q, DISABLE);
1203	struct xe_guc *guc = exec_queue_to_guc(q);
1204
1205	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1206	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1207	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1208
1209	if (immediate)
1210	set_min_preemption_timeout(guc, q);
1211	clear_exec_queue_enabled(q);
1212	set_exec_queue_pending_disable(q);
1213	trace_xe_exec_queue_scheduling_disable(q);
1214
1215	xe_guc_ct_send(ct: &guc->ct, action, ARRAY_SIZE(action),
1216	G2H_LEN_DW_SCHED_CONTEXT_MODE_SET, num_g2h: `1`);
1217	}
1218
1219	static void __deregister_exec_queue(struct xe_guc guc, struct* xe_exec_queue *q)
1220	{
1221	u32 action[] = {
1222	XE_GUC_ACTION_DEREGISTER_CONTEXT,
1223	q->guc->id,
1224	};
1225
1226	xe_gt_assert(guc_to_gt(guc), !exec_queue_destroyed(q));
1227	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
1228	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
1229	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
1230
1231	set_exec_queue_destroyed(q);
1232	trace_xe_exec_queue_deregister(q);
1233
1234	xe_guc_ct_send(ct: &guc->ct, action, ARRAY_SIZE(action),
1235	G2H_LEN_DW_DEREGISTER_CONTEXT, num_g2h: `1`);
1236	}
1237
1238	static enum drm_gpu_sched_stat
1239	guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
1240	{
1241	struct xe_sched_job *job = to_xe_sched_job(drm: drm_job);
1242	struct xe_sched_job *tmp_job;
1243	struct xe_exec_queue *q = job->q;
1244	struct xe_gpu_scheduler *sched = &q->guc->sched;
1245	struct xe_guc *guc = exec_queue_to_guc(q);
1246	const char *process_name = "no process";
1247	struct xe_device *xe = guc_to_xe(guc);
1248	unsigned int fw_ref;
1249	int err = -ETIME;
1250	pid_t pid = -`1`;
1251	int i = `0`;
1252	bool wedged = false, skip_timeout_check;
1253
1254	xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q));
1255
1256	/*
1257	* TDR has fired before free job worker. Common if exec queue
1258	* immediately closed after last fence signaled. Add back to pending
1259	* list so job can be freed and kick scheduler ensuring free job is not
1260	* lost.
1261	*/
1262	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) \|\|
1263	vf_recovery(guc))
1264	return DRM_GPU_SCHED_STAT_NO_HANG;
1265
1266	/ Kill the run_job entry point /
1267	xe_sched_submission_stop(sched);
1268
1269	/ Must check all state after stopping scheduler /
1270	skip_timeout_check = exec_queue_reset(q) \|\|
1271	exec_queue_killed_or_banned_or_wedged(q) \|\|
1272	exec_queue_destroyed(q);
1273
1274	/*
1275	* If devcoredump not captured and GuC capture for the job is not ready
1276	* do manual capture first and decide later if we need to use it
1277	*/
1278	if (!exec_queue_killed(q) && !xe->devcoredump.captured &&
1279	!xe_guc_capture_get_matching_and_lock(q)) {
1280	/ take force wake before engine register manual capture /
1281	fw_ref = xe_force_wake_get(fw: gt_to_fw(gt: q->gt), domains: XE_FORCEWAKE_ALL);
1282	if (!xe_force_wake_ref_has_domain(fw_ref, domain: XE_FORCEWAKE_ALL))
1283	xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n");
1284
1285	xe_engine_snapshot_capture_for_queue(q);
1286
1287	xe_force_wake_put(fw: gt_to_fw(gt: q->gt), fw_ref);
1288	}
1289
1290	/*
1291	* XXX: Sampling timeout doesn't work in wedged mode as we have to
1292	* modify scheduling state to read timestamp. We could read the
1293	* timestamp from a register to accumulate current running time but this
1294	* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
1295	* genuine timeouts.
1296	*/
1297	if (!exec_queue_killed(q))
1298	wedged = guc_submit_hint_wedged(guc: exec_queue_to_guc(q));
1299
1300	/ Engine state now stable, disable scheduling to check timestamp /
1301	if (!wedged && exec_queue_registered(q)) {
1302	int ret;
1303
1304	if (exec_queue_reset(q))
1305	err = -EIO;
1306
1307	if (!exec_queue_destroyed(q)) {
1308	/*
1309	* Wait for any pending G2H to flush out before
1310	* modifying state
1311	*/
1312	ret = wait_event_timeout(guc->ct.wq,
1313	(!exec_queue_pending_enable(q) &&
1314	!exec_queue_pending_disable(q)) \|\|
1315	xe_guc_read_stopped(guc) \|\|
1316	vf_recovery(guc), HZ * `5`);
1317	if (vf_recovery(guc))
1318	goto handle_vf_resume;
1319	if (!ret \|\| xe_guc_read_stopped(guc))
1320	goto trigger_reset;
1321
1322	/*
1323	* Flag communicates to G2H handler that schedule
1324	* disable originated from a timeout check. The G2H then
1325	* avoid triggering cleanup or deregistering the exec
1326	* queue.
1327	*/
1328	set_exec_queue_check_timeout(q);
1329	disable_scheduling(q, immediate: skip_timeout_check);
1330	}
1331
1332	/*
1333	* Must wait for scheduling to be disabled before signalling
1334	* any fences, if GT broken the GT reset code should signal us.
1335	*
1336	* FIXME: Tests can generate a ton of 0x6000 (IOMMU CAT fault
1337	* error) messages which can cause the schedule disable to get
1338	* lost. If this occurs, trigger a GT reset to recover.
1339	*/
1340	smp_rmb();
1341	ret = wait_event_timeout(guc->ct.wq,
1342	!exec_queue_pending_disable(q) \|\|
1343	xe_guc_read_stopped(guc) \|\|
1344	vf_recovery(guc), HZ * `5`);
1345	if (vf_recovery(guc))
1346	goto handle_vf_resume;
1347	if (!ret \|\| xe_guc_read_stopped(guc)) {
1348	trigger_reset:
1349	if (!ret)
1350	xe_gt_warn(guc_to_gt(guc),
1351	"Schedule disable failed to respond, guc_id=%d",
1352	q->guc->id);
1353	xe_devcoredump(q, job,
1354	fmt: "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d",
1355	q->guc->id, ret, xe_guc_read_stopped(guc));
1356	set_exec_queue_extra_ref(q);
1357	xe_exec_queue_get(q); / GT reset owns this /
1358	set_exec_queue_banned(q);
1359	xe_gt_reset_async(gt: q->gt);
1360	xe_sched_tdr_queue_imm(sched);
1361	goto rearm;
1362	}
1363	}
1364
1365	/*
1366	* Check if job is actually timed out, if so restart job execution and TDR
1367	*/
1368	if (!wedged && !skip_timeout_check && !check_timeout(q, job) &&
1369	!exec_queue_reset(q) && exec_queue_registered(q)) {
1370	clear_exec_queue_check_timeout(q);
1371	goto sched_enable;
1372	}
1373
1374	if (q->vm && q->vm->xef) {
1375	process_name = q->vm->xef->process_name;
1376	pid = q->vm->xef->pid;
1377	}
1378
1379	if (!exec_queue_killed(q))
1380	xe_gt_notice(guc_to_gt(guc),
1381	"Timedout job: seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx in %s [%d]",
1382	xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1383	q->guc->id, q->flags, process_name, pid);
1384
1385	trace_xe_sched_job_timedout(job);
1386
1387	if (!exec_queue_killed(q))
1388	xe_devcoredump(q, job,
1389	fmt: "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx",
1390	xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
1391	q->guc->id, q->flags);
1392
1393	/*
1394	* Kernel jobs should never fail, nor should VM jobs if they do
1395	* somethings has gone wrong and the GT needs a reset
1396	*/
1397	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_KERNEL,
1398	"Kernel-submitted job timed out\n");
1399	xe_gt_WARN(q->gt, q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q),
1400	"VM job timed out on non-killed execqueue\n");
1401	if (!wedged && (q->flags & EXEC_QUEUE_FLAG_KERNEL \|\|
1402	(q->flags & EXEC_QUEUE_FLAG_VM && !exec_queue_killed(q)))) {
1403	if (!xe_sched_invalidate_job(job, threshold: `2`)) {
1404	clear_exec_queue_check_timeout(q);
1405	xe_gt_reset_async(gt: q->gt);
1406	goto rearm;
1407	}
1408	}
1409
1410	/ Finish cleaning up exec queue via deregister /
1411	set_exec_queue_banned(q);
1412	if (!wedged && exec_queue_registered(q) && !exec_queue_destroyed(q)) {
1413	set_exec_queue_extra_ref(q);
1414	xe_exec_queue_get(q);
1415	__deregister_exec_queue(guc, q);
1416	}
1417
1418	/ Stop fence signaling /
1419	xe_hw_fence_irq_stop(irq: q->fence_irq);
1420
1421	/*
1422	* Fence state now stable, stop / start scheduler which cleans up any
1423	* fences that are complete
1424	*/
1425	xe_sched_add_pending_job(sched, job);
1426	xe_sched_submission_start(sched);
1427
1428	xe_guc_exec_queue_trigger_cleanup(q);
1429
1430	/ Mark all outstanding jobs as bad, thus completing them /
1431	spin_lock(lock: &sched->base.job_list_lock);
1432	list_for_each_entry(tmp_job, &sched->base.pending_list, drm.list)
1433	xe_sched_job_set_error(job: tmp_job, error: !i++ ? err : -ECANCELED);
1434	spin_unlock(lock: &sched->base.job_list_lock);
1435
1436	/ Start fence signaling /
1437	xe_hw_fence_irq_start(irq: q->fence_irq);
1438
1439	return DRM_GPU_SCHED_STAT_RESET;
1440
1441	sched_enable:
1442	set_exec_queue_pending_tdr_exit(q);
1443	enable_scheduling(q);
1444	rearm:
1445	/*
1446	* XXX: Ideally want to adjust timeout based on current execution time
1447	* but there is not currently an easy way to do in DRM scheduler. With
1448	* some thought, do this in a follow up.
1449	*/
1450	xe_sched_submission_start(sched);
1451	handle_vf_resume:
1452	return DRM_GPU_SCHED_STAT_NO_HANG;
1453	}
1454
1455	static void guc_exec_queue_fini(struct xe_exec_queue *q)
1456	{
1457	struct xe_guc_exec_queue *ge = q->guc;
1458	struct xe_guc *guc = exec_queue_to_guc(q);
1459
1460	release_guc_id(guc, q);
1461	xe_sched_entity_fini(entity: &ge->entity);
1462	xe_sched_fini(sched: &ge->sched);
1463
1464	/*
1465	* RCU free due sched being exported via DRM scheduler fences
1466	* (timeline name).
1467	*/
1468	kfree_rcu(ge, rcu);
1469	}
1470
1471	static void __guc_exec_queue_destroy_async(struct work_struct *w)
1472	{
1473	struct xe_guc_exec_queue *ge =
1474	container_of(w, struct xe_guc_exec_queue, destroy_async);
1475	struct xe_exec_queue *q = ge->q;
1476	struct xe_guc *guc = exec_queue_to_guc(q);
1477
1478	xe_pm_runtime_get(xe: guc_to_xe(guc));
1479	trace_xe_exec_queue_destroy(q);
1480
1481	if (xe_exec_queue_is_lr(q))
1482	cancel_work_sync(work: &ge->lr_tdr);
1483	/ Confirm no work left behind accessing device structures /
1484	cancel_delayed_work_sync(dwork: &ge->sched.base.work_tdr);
1485
1486	xe_exec_queue_fini(q);
1487
1488	xe_pm_runtime_put(xe: guc_to_xe(guc));
1489	}
1490
1491	static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
1492	{
1493	struct xe_guc *guc = exec_queue_to_guc(q);
1494	struct xe_device *xe = guc_to_xe(guc);
1495
1496	INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
1497
1498	/ We must block on kernel engines so slabs are empty on driver unload /
1499	if (q->flags & EXEC_QUEUE_FLAG_PERMANENT \|\| exec_queue_wedged(q))
1500	__guc_exec_queue_destroy_async(w: &q->guc->destroy_async);
1501	else
1502	queue_work(wq: xe->destroy_wq, work: &q->guc->destroy_async);
1503	}
1504
1505	static void __guc_exec_queue_destroy(struct xe_guc guc, struct* xe_exec_queue *q)
1506	{
1507	/*
1508	* Might be done from within the GPU scheduler, need to do async as we
1509	* fini the scheduler when the engine is fini'd, the scheduler can't
1510	* complete fini within itself (circular dependency). Async resolves
1511	* this we and don't really care when everything is fini'd, just that it
1512	* is.
1513	*/
1514	guc_exec_queue_destroy_async(q);
1515	}
1516
1517	static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
1518	{
1519	struct xe_exec_queue *q = msg->private_data;
1520	struct xe_guc *guc = exec_queue_to_guc(q);
1521
1522	xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
1523	trace_xe_exec_queue_cleanup_entity(q);
1524
1525	/*
1526	* Expected state transitions for cleanup:
1527	* - If the exec queue is registered and GuC firmware is running, we must first
1528	* disable scheduling and deregister the queue to ensure proper teardown and
1529	* resource release in the GuC, then destroy the exec queue on driver side.
1530	* - If the GuC is already stopped (e.g., during driver unload or GPU reset),
1531	* we cannot expect a response for the deregister request. In this case,
1532	* it is safe to directly destroy the exec queue on driver side, as the GuC
1533	* will not process further requests and all resources must be cleaned up locally.
1534	*/
1535	if (exec_queue_registered(q) && xe_uc_fw_is_running(uc_fw: &guc->fw))
1536	disable_scheduling_deregister(guc, q);
1537	else
1538	__guc_exec_queue_destroy(guc, q);
1539	}
1540
1541	static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
1542	{
1543	return !exec_queue_killed_or_banned_or_wedged(q) && exec_queue_registered(q);
1544	}
1545
1546	static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *msg)
1547	{
1548	struct xe_exec_queue *q = msg->private_data;
1549	struct xe_guc *guc = exec_queue_to_guc(q);
1550
1551	if (guc_exec_queue_allowed_to_change_state(q))
1552	init_policies(guc, q);
1553	kfree(objp: msg);
1554	}
1555
1556	static void __suspend_fence_signal(struct xe_exec_queue *q)
1557	{
1558	struct xe_guc *guc = exec_queue_to_guc(q);
1559	struct xe_device *xe = guc_to_xe(guc);
1560
1561	if (!q->guc->suspend_pending)
1562	return;
1563
1564	WRITE_ONCE(q->guc->suspend_pending, false);
1565
1566	/*
1567	* We use a GuC shared wait queue for VFs because the VF resfix start
1568	* interrupt must be able to wake all instances of suspend_wait. This
1569	* prevents the VF migration worker from being starved during
1570	* scheduling.
1571	*/
1572	if (IS_SRIOV_VF(xe))
1573	wake_up_all(&guc->ct.wq);
1574	else
1575	wake_up(&q->guc->suspend_wait);
1576	}
1577
1578	static void suspend_fence_signal(struct xe_exec_queue *q)
1579	{
1580	struct xe_guc *guc = exec_queue_to_guc(q);
1581
1582	xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) \|\| exec_queue_killed(q) \|\|
1583	xe_guc_read_stopped(guc));
1584	xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending);
1585
1586	__suspend_fence_signal(q);
1587	}
1588
1589	static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
1590	{
1591	struct xe_exec_queue *q = msg->private_data;
1592	struct xe_guc *guc = exec_queue_to_guc(q);
1593
1594	if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
1595	exec_queue_enabled(q)) {
1596	wait_event(guc->ct.wq, vf_recovery(guc) \|\|
1597	((q->guc->resume_time != RESUME_PENDING \|\|
1598	xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
1599
1600	if (!xe_guc_read_stopped(guc)) {
1601	s64 since_resume_ms =
1602	ktime_ms_delta(later: ktime_get(),
1603	earlier: q->guc->resume_time);
1604	s64 wait_ms = q->vm->preempt.min_run_period_ms -
1605	since_resume_ms;
1606
1607	if (wait_ms > `0` && q->guc->resume_time)
1608	relaxed_ms_sleep(delay_ms: wait_ms);
1609
1610	set_exec_queue_suspended(q);
1611	disable_scheduling(q, immediate: false);
1612	}
1613	} else if (q->guc->suspend_pending) {
1614	set_exec_queue_suspended(q);
1615	suspend_fence_signal(q);
1616	}
1617	}
1618
1619	static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
1620	{
1621	struct xe_exec_queue *q = msg->private_data;
1622
1623	if (guc_exec_queue_allowed_to_change_state(q)) {
1624	clear_exec_queue_suspended(q);
1625	if (!exec_queue_enabled(q)) {
1626	q->guc->resume_time = RESUME_PENDING;
1627	set_exec_queue_pending_resume(q);
1628	enable_scheduling(q);
1629	}
1630	} else {
1631	clear_exec_queue_suspended(q);
1632	}
1633	}
1634
1635	#define CLEANUP 1 /* Non-zero values to catch uninitialized msg */
1636	#define SET_SCHED_PROPS 2
1637	#define SUSPEND 3
1638	#define RESUME 4
1639	#define OPCODE_MASK 0xf
1640	#define MSG_LOCKED BIT(8)
1641	#define MSG_HEAD BIT(9)
1642
1643	static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
1644	{
1645	struct xe_device *xe = guc_to_xe(guc: exec_queue_to_guc(q: msg->private_data));
1646
1647	trace_xe_sched_msg_recv(msg);
1648
1649	switch (msg->opcode) {
1650	case CLEANUP:
1651	__guc_exec_queue_process_msg_cleanup(msg);
1652	break;
1653	case SET_SCHED_PROPS:
1654	__guc_exec_queue_process_msg_set_sched_props(msg);
1655	break;
1656	case SUSPEND:
1657	__guc_exec_queue_process_msg_suspend(msg);
1658	break;
1659	case RESUME:
1660	__guc_exec_queue_process_msg_resume(msg);
1661	break;
1662	default:
1663	XE_WARN_ON("Unknown message type");
1664	}
1665
1666	xe_pm_runtime_put(xe);
1667	}
1668
1669	static const struct drm_sched_backend_ops drm_sched_ops = {
1670	.run_job = guc_exec_queue_run_job,
1671	.free_job = guc_exec_queue_free_job,
1672	.timedout_job = guc_exec_queue_timedout_job,
1673	};
1674
1675	static const struct xe_sched_backend_ops xe_sched_ops = {
1676	.process_msg = guc_exec_queue_process_msg,
1677	};
1678
1679	static int guc_exec_queue_init(struct xe_exec_queue *q)
1680	{
1681	struct xe_gpu_scheduler *sched;
1682	struct xe_guc *guc = exec_queue_to_guc(q);
1683	struct xe_guc_exec_queue *ge;
1684	long timeout;
1685	int err, i;
1686
1687	xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc)));
1688
1689	ge = kzalloc(sizeof(*ge), GFP_KERNEL);
1690	if (!ge)
1691	return -ENOMEM;
1692
1693	q->guc = ge;
1694	ge->q = q;
1695	init_rcu_head(head: &ge->rcu);
1696	init_waitqueue_head(&ge->suspend_wait);
1697
1698	for (i = `0`; i < MAX_STATIC_MSG_TYPE; ++i)
1699	INIT_LIST_HEAD(list: &ge->static_msgs[i].link);
1700
1701	timeout = (q->vm && xe_vm_in_lr_mode(vm: q->vm)) ? MAX_SCHEDULE_TIMEOUT :
1702	msecs_to_jiffies(m: q->sched_props.job_timeout_ms);
1703	err = xe_sched_init(sched: &ge->sched, ops: &drm_sched_ops, xe_ops: &xe_sched_ops,
1704	NULL, hw_submission: xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, hang_limit: `64`,
1705	timeout, timeout_wq: guc_to_gt(guc)->ordered_wq, NULL,
1706	name: q->name, gt_to_xe(q->gt)->drm.dev);
1707	if (err)
1708	goto err_free;
1709
1710	sched = &ge->sched;
1711	err = xe_sched_entity_init(entity: &ge->entity, sched);
1712	if (err)
1713	goto err_sched;
1714
1715	if (xe_exec_queue_is_lr(q))
1716	INIT_WORK(&q->guc->lr_tdr, xe_guc_exec_queue_lr_cleanup);
1717
1718	mutex_lock(&guc->submission_state.lock);
1719
1720	err = alloc_guc_id(guc, q);
1721	if (err)
1722	goto err_entity;
1723
1724	q->entity = &ge->entity;
1725
1726	if (xe_guc_read_stopped(guc) \|\| vf_recovery(guc))
1727	xe_sched_stop(sched);
1728
1729	mutex_unlock(lock: &guc->submission_state.lock);
1730
1731	xe_exec_queue_assign_name(q, instance: q->guc->id);
1732
1733	trace_xe_exec_queue_create(q);
1734
1735	return `0`;
1736
1737	err_entity:
1738	mutex_unlock(lock: &guc->submission_state.lock);
1739	xe_sched_entity_fini(entity: &ge->entity);
1740	err_sched:
1741	xe_sched_fini(sched: &ge->sched);
1742	err_free:
1743	kfree(objp: ge);
1744
1745	return err;
1746	}
1747
1748	static void guc_exec_queue_kill(struct xe_exec_queue *q)
1749	{
1750	trace_xe_exec_queue_kill(q);
1751	set_exec_queue_killed(q);
1752	__suspend_fence_signal(q);
1753	xe_guc_exec_queue_trigger_cleanup(q);
1754	}
1755
1756	static void guc_exec_queue_add_msg(struct xe_exec_queue q, struct* xe_sched_msg *msg,
1757	u32 opcode)
1758	{
1759	xe_pm_runtime_get_noresume(xe: guc_to_xe(guc: exec_queue_to_guc(q)));
1760
1761	INIT_LIST_HEAD(list: &msg->link);
1762	msg->opcode = opcode & OPCODE_MASK;
1763	msg->private_data = q;
1764
1765	trace_xe_sched_msg_add(msg);
1766	if (opcode & MSG_HEAD)
1767	xe_sched_add_msg_head(sched: &q->guc->sched, msg);
1768	else if (opcode & MSG_LOCKED)
1769	xe_sched_add_msg_locked(sched: &q->guc->sched, msg);
1770	else
1771	xe_sched_add_msg(sched: &q->guc->sched, msg);
1772	}
1773
1774	static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q,
1775	struct xe_sched_msg *msg,
1776	u32 opcode)
1777	{
1778	if (!list_empty(head: &msg->link))
1779	return;
1780
1781	guc_exec_queue_add_msg(q, msg, opcode: opcode \| MSG_LOCKED \| MSG_HEAD);
1782	}
1783
1784	static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
1785	struct xe_sched_msg *msg,
1786	u32 opcode)
1787	{
1788	if (!list_empty(head: &msg->link))
1789	return false;
1790
1791	guc_exec_queue_add_msg(q, msg, opcode: opcode \| MSG_LOCKED);
1792
1793	return true;
1794	}
1795
1796	#define STATIC_MSG_CLEANUP 0
1797	#define STATIC_MSG_SUSPEND 1
1798	#define STATIC_MSG_RESUME 2
1799	static void guc_exec_queue_destroy(struct xe_exec_queue *q)
1800	{
1801	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
1802
1803	if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
1804	guc_exec_queue_add_msg(q, msg, CLEANUP);
1805	else
1806	__guc_exec_queue_destroy(guc: exec_queue_to_guc(q), q);
1807	}
1808
1809	static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
1810	enum xe_exec_queue_priority priority)
1811	{
1812	struct xe_sched_msg *msg;
1813
1814	if (q->sched_props.priority == priority \|\|
1815	exec_queue_killed_or_banned_or_wedged(q))
1816	return `0`;
1817
1818	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1819	if (!msg)
1820	return -ENOMEM;
1821
1822	q->sched_props.priority = priority;
1823	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1824
1825	return `0`;
1826	}
1827
1828	static int guc_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
1829	{
1830	struct xe_sched_msg *msg;
1831
1832	if (q->sched_props.timeslice_us == timeslice_us \|\|
1833	exec_queue_killed_or_banned_or_wedged(q))
1834	return `0`;
1835
1836	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1837	if (!msg)
1838	return -ENOMEM;
1839
1840	q->sched_props.timeslice_us = timeslice_us;
1841	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1842
1843	return `0`;
1844	}
1845
1846	static int guc_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
1847	u32 preempt_timeout_us)
1848	{
1849	struct xe_sched_msg *msg;
1850
1851	if (q->sched_props.preempt_timeout_us == preempt_timeout_us \|\|
1852	exec_queue_killed_or_banned_or_wedged(q))
1853	return `0`;
1854
1855	msg = kmalloc(sizeof(*msg), GFP_KERNEL);
1856	if (!msg)
1857	return -ENOMEM;
1858
1859	q->sched_props.preempt_timeout_us = preempt_timeout_us;
1860	guc_exec_queue_add_msg(q, msg, SET_SCHED_PROPS);
1861
1862	return `0`;
1863	}
1864
1865	static int guc_exec_queue_suspend(struct xe_exec_queue *q)
1866	{
1867	struct xe_gpu_scheduler *sched = &q->guc->sched;
1868	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
1869
1870	if (exec_queue_killed_or_banned_or_wedged(q))
1871	return -EINVAL;
1872
1873	xe_sched_msg_lock(sched);
1874	if (guc_exec_queue_try_add_msg(q, msg, SUSPEND))
1875	q->guc->suspend_pending = true;
1876	xe_sched_msg_unlock(sched);
1877
1878	return `0`;
1879	}
1880
1881	static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
1882	{
1883	struct xe_guc *guc = exec_queue_to_guc(q);
1884	struct xe_device *xe = guc_to_xe(guc);
1885	int ret;
1886
1887	/*
1888	* Likely don't need to check exec_queue_killed() as we clear
1889	* suspend_pending upon kill but to be paranoid but races in which
1890	* suspend_pending is set after kill also check kill here.
1891	*/
1892	#define WAIT_COND \
1893	(!READ_ONCE(q->guc->suspend_pending) \|\| exec_queue_killed(q) \|\| \
1894	xe_guc_read_stopped(guc))
1895
1896	retry:
1897	if (IS_SRIOV_VF(xe))
1898	ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND \|\|
1899	vf_recovery(guc),
1900	HZ * `5`);
1901	else
1902	ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
1903	WAIT_COND, HZ * `5`);
1904
1905	if (vf_recovery(guc) && !xe_device_wedged(xe: (guc_to_xe(guc))))
1906	return -EAGAIN;
1907
1908	if (!ret) {
1909	xe_gt_warn(guc_to_gt(guc),
1910	"Suspend fence, guc_id=%d, failed to respond",
1911	q->guc->id);
1912	/ XXX: Trigger GT reset? /
1913	return -ETIME;
1914	} else if (IS_SRIOV_VF(xe) && !WAIT_COND) {
1915	/ Corner case on RESFIX DONE where vf_recovery() changes /
1916	goto retry;
1917	}
1918
1919	#undef WAIT_COND
1920
1921	return ret < `0` ? ret : `0`;
1922	}
1923
1924	static void guc_exec_queue_resume(struct xe_exec_queue *q)
1925	{
1926	struct xe_gpu_scheduler *sched = &q->guc->sched;
1927	struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME;
1928	struct xe_guc *guc = exec_queue_to_guc(q);
1929
1930	xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending);
1931
1932	xe_sched_msg_lock(sched);
1933	guc_exec_queue_try_add_msg(q, msg, RESUME);
1934	xe_sched_msg_unlock(sched);
1935	}
1936
1937	static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
1938	{
1939	return exec_queue_reset(q) \|\| exec_queue_killed_or_banned_or_wedged(q);
1940	}
1941
1942	/*
1943	* All of these functions are an abstraction layer which other parts of Xe can
1944	* use to trap into the GuC backend. All of these functions, aside from init,
1945	* really shouldn't do much other than trap into the DRM scheduler which
1946	* synchronizes these operations.
1947	*/
1948	static const struct xe_exec_queue_ops guc_exec_queue_ops = {
1949	.init = guc_exec_queue_init,
1950	.kill = guc_exec_queue_kill,
1951	.fini = guc_exec_queue_fini,
1952	.destroy = guc_exec_queue_destroy,
1953	.set_priority = guc_exec_queue_set_priority,
1954	.set_timeslice = guc_exec_queue_set_timeslice,
1955	.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
1956	.suspend = guc_exec_queue_suspend,
1957	.suspend_wait = guc_exec_queue_suspend_wait,
1958	.resume = guc_exec_queue_resume,
1959	.reset_status = guc_exec_queue_reset_status,
1960	};
1961
1962	static void guc_exec_queue_stop(struct xe_guc guc, struct* xe_exec_queue *q)
1963	{
1964	struct xe_gpu_scheduler *sched = &q->guc->sched;
1965
1966	/ Stop scheduling + flush any DRM scheduler operations /
1967	xe_sched_submission_stop(sched);
1968
1969	/ Clean up lost G2H + reset engine state /
1970	if (exec_queue_registered(q)) {
1971	if (exec_queue_extra_ref(q) \|\| xe_exec_queue_is_lr(q))
1972	xe_exec_queue_put(q);
1973	else if (exec_queue_destroyed(q))
1974	__guc_exec_queue_destroy(guc, q);
1975	}
1976	if (q->guc->suspend_pending) {
1977	set_exec_queue_suspended(q);
1978	suspend_fence_signal(q);
1979	}
1980	atomic_and(EXEC_QUEUE_STATE_WEDGED \| EXEC_QUEUE_STATE_BANNED \|
1981	EXEC_QUEUE_STATE_KILLED \| EXEC_QUEUE_STATE_DESTROYED \|
1982	EXEC_QUEUE_STATE_SUSPENDED,
1983	v: &q->guc->state);
1984	q->guc->resume_time = `0`;
1985	trace_xe_exec_queue_stop(q);
1986
1987	/*
1988	* Ban any engine (aside from kernel and engines used for VM ops) with a
1989	* started but not complete job or if a job has gone through a GT reset
1990	* more than twice.
1991	*/
1992	if (!(q->flags & (EXEC_QUEUE_FLAG_KERNEL \| EXEC_QUEUE_FLAG_VM))) {
1993	struct xe_sched_job *job = xe_sched_first_pending_job(sched);
1994	bool ban = false;
1995
1996	if (job) {
1997	if ((xe_sched_job_started(job) &&
1998	!xe_sched_job_completed(job)) \|\|
1999	xe_sched_invalidate_job(job, threshold: `2`)) {
2000	trace_xe_sched_job_ban(job);
2001	ban = true;
2002	}
2003	} else if (xe_exec_queue_is_lr(q) &&
2004	!xe_lrc_ring_is_idle(lrc: q->lrc[`0`])) {
2005	ban = true;
2006	}
2007
2008	if (ban) {
2009	set_exec_queue_banned(q);
2010	xe_guc_exec_queue_trigger_cleanup(q);
2011	}
2012	}
2013	}
2014
2015	int xe_guc_submit_reset_prepare(struct xe_guc *guc)
2016	{
2017	int ret;
2018
2019	if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
2020	return `0`;
2021
2022	if (!guc->submission_state.initialized)
2023	return `0`;
2024
2025	/*
2026	* Using an atomic here rather than submission_state.lock as this
2027	* function can be called while holding the CT lock (engine reset
2028	* failure). submission_state.lock needs the CT lock to resubmit jobs.
2029	* Atomic is not ideal, but it works to prevent against concurrent reset
2030	* and releasing any TDRs waiting on guc->submission_state.stopped.
2031	*/
2032	ret = atomic_fetch_or(i: `1`, v: &guc->submission_state.stopped);
2033	smp_wmb();
2034	wake_up_all(&guc->ct.wq);
2035
2036	return ret;
2037	}
2038
2039	void xe_guc_submit_reset_wait(struct xe_guc *guc)
2040	{
2041	wait_event(guc->ct.wq, xe_device_wedged(guc_to_xe(guc)) \|\|
2042	!xe_guc_read_stopped(guc));
2043	}
2044
2045	void xe_guc_submit_stop(struct xe_guc *guc)
2046	{
2047	struct xe_exec_queue *q;
2048	unsigned long index;
2049
2050	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == `1`);
2051
2052	mutex_lock(&guc->submission_state.lock);
2053
2054	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2055	/ Prevent redundant attempts to stop parallel queues /
2056	if (q->guc->id != index)
2057	continue;
2058
2059	guc_exec_queue_stop(guc, q);
2060	}
2061
2062	mutex_unlock(lock: &guc->submission_state.lock);
2063
2064	/*
2065	* No one can enter the backend at this point, aside from new engine
2066	* creation which is protected by guc->submission_state.lock.
2067	*/
2068
2069	}
2070
2071	static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc,
2072	struct xe_exec_queue *q)
2073	{
2074	bool pending_enable, pending_disable, pending_resume;
2075
2076	pending_enable = exec_queue_pending_enable(q);
2077	pending_resume = exec_queue_pending_resume(q);
2078
2079	if (pending_enable && pending_resume) {
2080	q->guc->needs_resume = true;
2081	xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d",
2082	q->guc->id);
2083	}
2084
2085	if (pending_enable && !pending_resume &&
2086	!exec_queue_pending_tdr_exit(q)) {
2087	clear_exec_queue_registered(q);
2088	if (xe_exec_queue_is_lr(q))
2089	xe_exec_queue_put(q);
2090	xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d",
2091	q->guc->id);
2092	}
2093
2094	if (pending_enable) {
2095	clear_exec_queue_enabled(q);
2096	clear_exec_queue_pending_resume(q);
2097	clear_exec_queue_pending_tdr_exit(q);
2098	clear_exec_queue_pending_enable(q);
2099	xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d",
2100	q->guc->id);
2101	}
2102
2103	if (exec_queue_destroyed(q) && exec_queue_registered(q)) {
2104	clear_exec_queue_destroyed(q);
2105	if (exec_queue_extra_ref(q))
2106	xe_exec_queue_put(q);
2107	else
2108	q->guc->needs_cleanup = true;
2109	clear_exec_queue_extra_ref(q);
2110	xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d",
2111	q->guc->id);
2112	}
2113
2114	pending_disable = exec_queue_pending_disable(q);
2115
2116	if (pending_disable && exec_queue_suspended(q)) {
2117	clear_exec_queue_suspended(q);
2118	q->guc->needs_suspend = true;
2119	xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d",
2120	q->guc->id);
2121	}
2122
2123	if (pending_disable) {
2124	if (!pending_enable)
2125	set_exec_queue_enabled(q);
2126	clear_exec_queue_pending_disable(q);
2127	clear_exec_queue_check_timeout(q);
2128	xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d",
2129	q->guc->id);
2130	}
2131
2132	q->guc->resume_time = `0`;
2133	}
2134
2135	static void lrc_parallel_clear(struct xe_lrc *lrc)
2136	{
2137	struct xe_device *xe = gt_to_xe(lrc->gt);
2138	struct iosys_map map = xe_lrc_parallel_map(lrc);
2139	int i;
2140
2141	for (i = `0`; i < WQ_SIZE / sizeof(u32); ++i)
2142	parallel_write(xe, map, wq[i],
2143	FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) \|
2144	FIELD_PREP(WQ_LEN_MASK, `0`));
2145	}
2146
2147	/*
2148	* This function is quite complex but only real way to ensure no state is lost
2149	* during VF resume flows. The function scans the queue state, make adjustments
2150	* as needed, and queues jobs / messages which replayed upon unpause.
2151	*/
2152	static void guc_exec_queue_pause(struct xe_guc guc, struct* xe_exec_queue *q)
2153	{
2154	struct xe_gpu_scheduler *sched = &q->guc->sched;
2155	struct xe_sched_job *job;
2156	int i;
2157
2158	lockdep_assert_held(&guc->submission_state.lock);
2159
2160	/ Stop scheduling + flush any DRM scheduler operations /
2161	xe_sched_submission_stop(sched);
2162	if (xe_exec_queue_is_lr(q))
2163	cancel_work_sync(work: &q->guc->lr_tdr);
2164	else
2165	cancel_delayed_work_sync(dwork: &sched->base.work_tdr);
2166
2167	guc_exec_queue_revert_pending_state_change(guc, q);
2168
2169	if (xe_exec_queue_is_parallel(q)) {
2170	/ Pairs with WRITE_ONCE in __xe_exec_queue_init /
2171	struct xe_lrc *lrc = READ_ONCE(q->lrc[`0`]);
2172
2173	/*
2174	* NOP existing WQ commands that may contain stale GGTT
2175	* addresses. These will be replayed upon unpause. The hardware
2176	* seems to get confused if the WQ head/tail pointers are
2177	* adjusted.
2178	*/
2179	if (lrc)
2180	lrc_parallel_clear(lrc);
2181	}
2182
2183	job = xe_sched_first_pending_job(sched);
2184	if (job) {
2185	job->restore_replay = true;
2186
2187	/*
2188	* Adjust software tail so jobs submitted overwrite previous
2189	* position in ring buffer with new GGTT addresses.
2190	*/
2191	for (i = `0`; i < q->width; ++i)
2192	q->lrc[i]->ring.tail = job->ptrs[i].head;
2193	}
2194	}
2195
2196	/**
2197	* xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
2198	* @guc: the &xe_guc struct instance whose scheduler is to be disabled
2199	*/
2200	void xe_guc_submit_pause(struct xe_guc *guc)
2201	{
2202	struct xe_exec_queue *q;
2203	unsigned long index;
2204
2205	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2206
2207	mutex_lock(&guc->submission_state.lock);
2208	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2209	/ Prevent redundant attempts to stop parallel queues /
2210	if (q->guc->id != index)
2211	continue;
2212
2213	guc_exec_queue_pause(guc, q);
2214	}
2215	mutex_unlock(lock: &guc->submission_state.lock);
2216	}
2217
2218	static void guc_exec_queue_start(struct xe_exec_queue *q)
2219	{
2220	struct xe_gpu_scheduler *sched = &q->guc->sched;
2221
2222	if (!exec_queue_killed_or_banned_or_wedged(q)) {
2223	struct xe_sched_job *job = xe_sched_first_pending_job(sched);
2224	int i;
2225
2226	trace_xe_exec_queue_resubmit(q);
2227	if (job) {
2228	for (i = `0`; i < q->width; ++i) {
2229	/*
2230	* The GuC context is unregistered at this point
2231	* time, adjusting software ring tail ensures
2232	* jobs are rewritten in original placement,
2233	* adjusting LRC tail ensures the newly loaded
2234	* GuC / contexts only view the LRC tail
2235	* increasing as jobs are written out.
2236	*/
2237	q->lrc[i]->ring.tail = job->ptrs[i].head;
2238	xe_lrc_set_ring_tail(lrc: q->lrc[i],
2239	tail: xe_lrc_ring_head(lrc: q->lrc[i]));
2240	}
2241	}
2242	xe_sched_resubmit_jobs(sched);
2243	}
2244
2245	xe_sched_submission_start(sched);
2246	xe_sched_submission_resume_tdr(sched);
2247	}
2248
2249	int xe_guc_submit_start(struct xe_guc *guc)
2250	{
2251	struct xe_exec_queue *q;
2252	unsigned long index;
2253
2254	xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == `1`);
2255
2256	mutex_lock(&guc->submission_state.lock);
2257	atomic_dec(v: &guc->submission_state.stopped);
2258	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2259	/ Prevent redundant attempts to start parallel queues /
2260	if (q->guc->id != index)
2261	continue;
2262
2263	guc_exec_queue_start(q);
2264	}
2265	mutex_unlock(lock: &guc->submission_state.lock);
2266
2267	wake_up_all(&guc->ct.wq);
2268
2269	return `0`;
2270	}
2271
2272	static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
2273	struct xe_exec_queue *q)
2274	{
2275	struct xe_gpu_scheduler *sched = &q->guc->sched;
2276	struct xe_sched_job job = NULL, __job;
2277	bool restore_replay = false;
2278
2279	list_for_each_entry(__job, &sched->base.pending_list, drm.list) {
2280	job = __job;
2281	restore_replay \|= job->restore_replay;
2282	if (restore_replay) {
2283	xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
2284	q->guc->id, xe_sched_job_seqno(job));
2285
2286	q->ring_ops->emit_job(job);
2287	job->restore_replay = true;
2288	}
2289	}
2290
2291	if (job)
2292	job->last_replay = true;
2293	}
2294
2295	/**
2296	* xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC.
2297	* @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
2298	*/
2299	void xe_guc_submit_unpause_prepare(struct xe_guc *guc)
2300	{
2301	struct xe_exec_queue *q;
2302	unsigned long index;
2303
2304	xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
2305
2306	mutex_lock(&guc->submission_state.lock);
2307	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2308	/ Prevent redundant attempts to stop parallel queues /
2309	if (q->guc->id != index)
2310	continue;
2311
2312	guc_exec_queue_unpause_prepare(guc, q);
2313	}
2314	mutex_unlock(lock: &guc->submission_state.lock);
2315	}
2316
2317	static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q)
2318	{
2319	struct xe_gpu_scheduler *sched = &q->guc->sched;
2320	struct xe_sched_msg *msg;
2321
2322	if (q->guc->needs_cleanup) {
2323	msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
2324
2325	guc_exec_queue_add_msg(q, msg, CLEANUP);
2326	q->guc->needs_cleanup = false;
2327	}
2328
2329	if (q->guc->needs_suspend) {
2330	msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
2331
2332	xe_sched_msg_lock(sched);
2333	guc_exec_queue_try_add_msg_head(q, msg, SUSPEND);
2334	xe_sched_msg_unlock(sched);
2335
2336	q->guc->needs_suspend = false;
2337	}
2338
2339	/*
2340	* The resume must be in the message queue before the suspend as it is
2341	* not possible for a resume to be issued if a suspend pending is, but
2342	* the inverse is possible.
2343	*/
2344	if (q->guc->needs_resume) {
2345	msg = q->guc->static_msgs + STATIC_MSG_RESUME;
2346
2347	xe_sched_msg_lock(sched);
2348	guc_exec_queue_try_add_msg_head(q, msg, RESUME);
2349	xe_sched_msg_unlock(sched);
2350
2351	q->guc->needs_resume = false;
2352	}
2353	}
2354
2355	static void guc_exec_queue_unpause(struct xe_guc guc, struct* xe_exec_queue *q)
2356	{
2357	struct xe_gpu_scheduler *sched = &q->guc->sched;
2358	bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q);
2359
2360	lockdep_assert_held(&guc->submission_state.lock);
2361
2362	xe_sched_resubmit_jobs(sched);
2363	guc_exec_queue_replay_pending_state_change(q);
2364	xe_sched_submission_start(sched);
2365	if (needs_tdr)
2366	xe_guc_exec_queue_trigger_cleanup(q);
2367	xe_sched_submission_resume_tdr(sched);
2368	}
2369
2370	/**
2371	* xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
2372	* @guc: the &xe_guc struct instance whose scheduler is to be enabled
2373	*/
2374	void xe_guc_submit_unpause(struct xe_guc *guc)
2375	{
2376	struct xe_exec_queue *q;
2377	unsigned long index;
2378
2379	mutex_lock(&guc->submission_state.lock);
2380	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2381	/*
2382	* Prevent redundant attempts to stop parallel queues, or queues
2383	* created after resfix done.
2384	*/
2385	if (q->guc->id != index \|\|
2386	!READ_ONCE(q->guc->sched.base.pause_submit))
2387	continue;
2388
2389	guc_exec_queue_unpause(guc, q);
2390	}
2391	mutex_unlock(lock: &guc->submission_state.lock);
2392	}
2393
2394	/**
2395	* xe_guc_submit_pause_abort - Abort all paused submission task on given GuC.
2396	* @guc: the &xe_guc struct instance whose scheduler is to be aborted
2397	*/
2398	void xe_guc_submit_pause_abort(struct xe_guc *guc)
2399	{
2400	struct xe_exec_queue *q;
2401	unsigned long index;
2402
2403	mutex_lock(&guc->submission_state.lock);
2404	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2405	struct xe_gpu_scheduler *sched = &q->guc->sched;
2406
2407	/ Prevent redundant attempts to stop parallel queues /
2408	if (q->guc->id != index)
2409	continue;
2410
2411	xe_sched_submission_start(sched);
2412	if (exec_queue_killed_or_banned_or_wedged(q))
2413	xe_guc_exec_queue_trigger_cleanup(q);
2414	}
2415	mutex_unlock(lock: &guc->submission_state.lock);
2416	}
2417
2418	static struct xe_exec_queue *
2419	g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
2420	{
2421	struct xe_gt *gt = guc_to_gt(guc);
2422	struct xe_exec_queue *q;
2423
2424	if (unlikely(guc_id >= GUC_ID_MAX)) {
2425	xe_gt_err(gt, "Invalid guc_id %u\n", guc_id);
2426	return NULL;
2427	}
2428
2429	q = xa_load(&guc->submission_state.exec_queue_lookup, index: guc_id);
2430	if (unlikely(!q)) {
2431	xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
2432	return NULL;
2433	}
2434
2435	xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id);
2436	xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width));
2437
2438	return q;
2439	}
2440
2441	static void deregister_exec_queue(struct xe_guc guc, struct* xe_exec_queue *q)
2442	{
2443	u32 action[] = {
2444	XE_GUC_ACTION_DEREGISTER_CONTEXT,
2445	q->guc->id,
2446	};
2447
2448	xe_gt_assert(guc_to_gt(guc), exec_queue_destroyed(q));
2449	xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
2450	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_disable(q));
2451	xe_gt_assert(guc_to_gt(guc), !exec_queue_pending_enable(q));
2452
2453	trace_xe_exec_queue_deregister(q);
2454
2455	xe_guc_ct_send_g2h_handler(ct: &guc->ct, action, ARRAY_SIZE(action));
2456	}
2457
2458	static void handle_sched_done(struct xe_guc guc, struct* xe_exec_queue *q,
2459	u32 runnable_state)
2460	{
2461	trace_xe_exec_queue_scheduling_done(q);
2462
2463	if (runnable_state == `1`) {
2464	xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
2465
2466	q->guc->resume_time = ktime_get();
2467	clear_exec_queue_pending_resume(q);
2468	clear_exec_queue_pending_tdr_exit(q);
2469	clear_exec_queue_pending_enable(q);
2470	smp_wmb();
2471	wake_up_all(&guc->ct.wq);
2472	} else {
2473	bool check_timeout = exec_queue_check_timeout(q);
2474
2475	xe_gt_assert(guc_to_gt(guc), runnable_state == `0`);
2476	xe_gt_assert(guc_to_gt(guc), exec_queue_pending_disable(q));
2477
2478	if (q->guc->suspend_pending) {
2479	suspend_fence_signal(q);
2480	clear_exec_queue_pending_disable(q);
2481	} else {
2482	if (exec_queue_banned(q) \|\| check_timeout) {
2483	smp_wmb();
2484	wake_up_all(&guc->ct.wq);
2485	}
2486	if (!check_timeout && exec_queue_destroyed(q)) {
2487	/*
2488	* Make sure to clear the pending_disable only
2489	* after sampling the destroyed state. We want
2490	* to ensure we don't trigger the unregister too
2491	* early with something intending to only
2492	* disable scheduling. The caller doing the
2493	* destroy must wait for an ongoing
2494	* pending_disable before marking as destroyed.
2495	*/
2496	clear_exec_queue_pending_disable(q);
2497	deregister_exec_queue(guc, q);
2498	} else {
2499	clear_exec_queue_pending_disable(q);
2500	}
2501	}
2502	}
2503	}
2504
2505	int xe_guc_sched_done_handler(struct xe_guc guc, u32 msg, u32 len)
2506	{
2507	struct xe_exec_queue *q;
2508	u32 guc_id, runnable_state;
2509
2510	if (unlikely(len < `2`))
2511	return -EPROTO;
2512
2513	guc_id = msg[`0`];
2514	runnable_state = msg[`1`];
2515
2516	q = g2h_exec_queue_lookup(guc, guc_id);
2517	if (unlikely(!q))
2518	return -EPROTO;
2519
2520	if (unlikely(!exec_queue_pending_enable(q) &&
2521	!exec_queue_pending_disable(q))) {
2522	xe_gt_err(guc_to_gt(guc),
2523	"SCHED_DONE: Unexpected engine state 0x%04x, guc_id=%d, runnable_state=%u",
2524	atomic_read(&q->guc->state), q->guc->id,
2525	runnable_state);
2526	return -EPROTO;
2527	}
2528
2529	handle_sched_done(guc, q, runnable_state);
2530
2531	return `0`;
2532	}
2533
2534	static void handle_deregister_done(struct xe_guc guc, struct* xe_exec_queue *q)
2535	{
2536	trace_xe_exec_queue_deregister_done(q);
2537
2538	clear_exec_queue_registered(q);
2539
2540	if (exec_queue_extra_ref(q) \|\| xe_exec_queue_is_lr(q))
2541	xe_exec_queue_put(q);
2542	else
2543	__guc_exec_queue_destroy(guc, q);
2544	}
2545
2546	int xe_guc_deregister_done_handler(struct xe_guc guc, u32 msg, u32 len)
2547	{
2548	struct xe_exec_queue *q;
2549	u32 guc_id;
2550
2551	if (unlikely(len < `1`))
2552	return -EPROTO;
2553
2554	guc_id = msg[`0`];
2555
2556	q = g2h_exec_queue_lookup(guc, guc_id);
2557	if (unlikely(!q))
2558	return -EPROTO;
2559
2560	if (!exec_queue_destroyed(q) \|\| exec_queue_pending_disable(q) \|\|
2561	exec_queue_pending_enable(q) \|\| exec_queue_enabled(q)) {
2562	xe_gt_err(guc_to_gt(guc),
2563	"DEREGISTER_DONE: Unexpected engine state 0x%04x, guc_id=%d",
2564	atomic_read(&q->guc->state), q->guc->id);
2565	return -EPROTO;
2566	}
2567
2568	handle_deregister_done(guc, q);
2569
2570	return `0`;
2571	}
2572
2573	int xe_guc_exec_queue_reset_handler(struct xe_guc guc, u32 msg, u32 len)
2574	{
2575	struct xe_gt *gt = guc_to_gt(guc);
2576	struct xe_exec_queue *q;
2577	u32 guc_id;
2578
2579	if (unlikely(len < `1`))
2580	return -EPROTO;
2581
2582	guc_id = msg[`0`];
2583
2584	q = g2h_exec_queue_lookup(guc, guc_id);
2585	if (unlikely(!q))
2586	return -EPROTO;
2587
2588	xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
2589	xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2590
2591	trace_xe_exec_queue_reset(q);
2592
2593	/*
2594	* A banned engine is a NOP at this point (came from
2595	* guc_exec_queue_timedout_job). Otherwise, kick drm scheduler to cancel
2596	* jobs by setting timeout of the job to the minimum value kicking
2597	* guc_exec_queue_timedout_job.
2598	*/
2599	set_exec_queue_reset(q);
2600	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2601	xe_guc_exec_queue_trigger_cleanup(q);
2602
2603	return `0`;
2604	}
2605
2606	/*
2607	* xe_guc_error_capture_handler - Handler of GuC captured message
2608	* @guc: The GuC object
2609	* @msg: Point to the message
2610	* @len: The message length
2611	*
2612	* When GuC captured data is ready, GuC will send message
2613	* XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be
2614	* called 1st to check status before process the data comes with the message.
2615	*
2616	* Returns: error code. 0 if success
2617	*/
2618	int xe_guc_error_capture_handler(struct xe_guc guc, u32 msg, u32 len)
2619	{
2620	u32 status;
2621
2622	if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN))
2623	return -EPROTO;
2624
2625	status = msg[`0`] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
2626	if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
2627	xe_gt_warn(guc_to_gt(guc), "G2H-Error capture no space");
2628
2629	xe_guc_capture_process(guc);
2630
2631	return `0`;
2632	}
2633
2634	int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc guc, u32 msg,
2635	u32 len)
2636	{
2637	struct xe_gt *gt = guc_to_gt(guc);
2638	struct xe_exec_queue *q;
2639	u32 guc_id;
2640	u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
2641
2642	if (unlikely(!len \|\| len > `2`))
2643	return -EPROTO;
2644
2645	guc_id = msg[`0`];
2646
2647	if (len == `2`)
2648	type = msg[`1`];
2649
2650	if (guc_id == GUC_ID_UNKNOWN) {
2651	/*
2652	* GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
2653	* context. In such case only PF will be notified about that fault.
2654	*/
2655	xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n");
2656	return `0`;
2657	}
2658
2659	q = g2h_exec_queue_lookup(guc, guc_id);
2660	if (unlikely(!q))
2661	return -EPROTO;
2662
2663	/*
2664	* The type is HW-defined and changes based on platform, so we don't
2665	* decode it in the kernel and only check if it is valid.
2666	* See bspec 54047 and 72187 for details.
2667	*/
2668	if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
2669	xe_gt_dbg(gt,
2670	"Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
2671	type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2672	else
2673	xe_gt_dbg(gt,
2674	"Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
2675	xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
2676
2677	trace_xe_exec_queue_memory_cat_error(q);
2678
2679	/ Treat the same as engine reset /
2680	set_exec_queue_reset(q);
2681	if (!exec_queue_banned(q) && !exec_queue_check_timeout(q))
2682	xe_guc_exec_queue_trigger_cleanup(q);
2683
2684	return `0`;
2685	}
2686
2687	int xe_guc_exec_queue_reset_failure_handler(struct xe_guc guc, u32 msg, u32 len)
2688	{
2689	struct xe_gt *gt = guc_to_gt(guc);
2690	u8 guc_class, instance;
2691	u32 reason;
2692
2693	if (unlikely(len != `3`))
2694	return -EPROTO;
2695
2696	guc_class = msg[`0`];
2697	instance = msg[`1`];
2698	reason = msg[`2`];
2699
2700	/ Unexpected failure of a hardware feature, log an actual error /
2701	xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X",
2702	guc_class, instance, reason);
2703
2704	xe_gt_reset_async(gt);
2705
2706	return `0`;
2707	}
2708
2709	static void
2710	guc_exec_queue_wq_snapshot_capture(struct xe_exec_queue *q,
2711	struct xe_guc_submit_exec_queue_snapshot *snapshot)
2712	{
2713	struct xe_guc *guc = exec_queue_to_guc(q);
2714	struct xe_device *xe = guc_to_xe(guc);
2715	struct iosys_map map = xe_lrc_parallel_map(lrc: q->lrc[`0`]);
2716	int i;
2717
2718	snapshot->guc.wqi_head = q->guc->wqi_head;
2719	snapshot->guc.wqi_tail = q->guc->wqi_tail;
2720	snapshot->parallel.wq_desc.head = parallel_read(xe, map, wq_desc.head);
2721	snapshot->parallel.wq_desc.tail = parallel_read(xe, map, wq_desc.tail);
2722	snapshot->parallel.wq_desc.status = parallel_read(xe, map,
2723	wq_desc.wq_status);
2724
2725	if (snapshot->parallel.wq_desc.head !=
2726	snapshot->parallel.wq_desc.tail) {
2727	for (i = snapshot->parallel.wq_desc.head;
2728	i != snapshot->parallel.wq_desc.tail;
2729	i = (i + sizeof(u32)) % WQ_SIZE)
2730	snapshot->parallel.wq[i / sizeof(u32)] =
2731	parallel_read(xe, map, wq[i / sizeof(u32)]);
2732	}
2733	}
2734
2735	static void
2736	guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2737	struct drm_printer *p)
2738	{
2739	int i;
2740
2741	drm_printf(p, f: "\tWQ head: %u (internal), %d (memory)\n",
2742	snapshot->guc.wqi_head, snapshot->parallel.wq_desc.head);
2743	drm_printf(p, f: "\tWQ tail: %u (internal), %d (memory)\n",
2744	snapshot->guc.wqi_tail, snapshot->parallel.wq_desc.tail);
2745	drm_printf(p, f: "\tWQ status: %u\n", snapshot->parallel.wq_desc.status);
2746
2747	if (snapshot->parallel.wq_desc.head !=
2748	snapshot->parallel.wq_desc.tail) {
2749	for (i = snapshot->parallel.wq_desc.head;
2750	i != snapshot->parallel.wq_desc.tail;
2751	i = (i + sizeof(u32)) % WQ_SIZE)
2752	drm_printf(p, f: "\tWQ[%zu]: 0x%08x\n", i / sizeof(u32),
2753	snapshot->parallel.wq[i / sizeof(u32)]);
2754	}
2755	}
2756
2757	/**
2758	* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
2759	* @q: faulty exec queue
2760	*
2761	* This can be printed out in a later stage like during dev_coredump
2762	* analysis.
2763	*
2764	* Returns: a GuC Submit Engine snapshot object that must be freed by the
2765	* caller, using `xe_guc_exec_queue_snapshot_free`.
2766	*/
2767	struct xe_guc_submit_exec_queue_snapshot *
2768	xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
2769	{
2770	struct xe_gpu_scheduler *sched = &q->guc->sched;
2771	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2772	int i;
2773
2774	snapshot = kzalloc(sizeof(*snapshot), GFP_ATOMIC);
2775
2776	if (!snapshot)
2777	return NULL;
2778
2779	snapshot->guc.id = q->guc->id;
2780	memcpy(&snapshot->name, &q->name, sizeof(snapshot->name));
2781	snapshot->class = q->class;
2782	snapshot->logical_mask = q->logical_mask;
2783	snapshot->width = q->width;
2784	snapshot->refcount = kref_read(kref: &q->refcount);
2785	snapshot->sched_timeout = sched->base.timeout;
2786	snapshot->sched_props.timeslice_us = q->sched_props.timeslice_us;
2787	snapshot->sched_props.preempt_timeout_us =
2788	q->sched_props.preempt_timeout_us;
2789
2790	snapshot->lrc = kmalloc_array(q->width, sizeof(struct xe_lrc_snapshot *),
2791	GFP_ATOMIC);
2792
2793	if (snapshot->lrc) {
2794	for (i = `0`; i < q->width; ++i) {
2795	struct xe_lrc *lrc = q->lrc[i];
2796
2797	snapshot->lrc[i] = xe_lrc_snapshot_capture(lrc);
2798	}
2799	}
2800
2801	snapshot->schedule_state = atomic_read(v: &q->guc->state);
2802	snapshot->exec_queue_flags = q->flags;
2803
2804	snapshot->parallel_execution = xe_exec_queue_is_parallel(q);
2805	if (snapshot->parallel_execution)
2806	guc_exec_queue_wq_snapshot_capture(q, snapshot);
2807
2808	spin_lock(lock: &sched->base.job_list_lock);
2809	snapshot->pending_list_size = list_count_nodes(head: &sched->base.pending_list);
2810	snapshot->pending_list = kmalloc_array(snapshot->pending_list_size,
2811	sizeof(struct pending_list_snapshot),
2812	GFP_ATOMIC);
2813
2814	if (snapshot->pending_list) {
2815	struct xe_sched_job *job_iter;
2816
2817	i = `0`;
2818	list_for_each_entry(job_iter, &sched->base.pending_list, drm.list) {
2819	snapshot->pending_list[i].seqno =
2820	xe_sched_job_seqno(job: job_iter);
2821	snapshot->pending_list[i].fence =
2822	dma_fence_is_signaled(fence: job_iter->fence) ? `1` : `0`;
2823	snapshot->pending_list[i].finished =
2824	dma_fence_is_signaled(fence: &job_iter->drm.s_fence->finished)
2825	? `1` : `0`;
2826	i++;
2827	}
2828	}
2829
2830	spin_unlock(lock: &sched->base.job_list_lock);
2831
2832	return snapshot;
2833	}
2834
2835	/**
2836	* xe_guc_exec_queue_snapshot_capture_delayed - Take delayed part of snapshot of the GuC Engine.
2837	* @snapshot: Previously captured snapshot of job.
2838	*
2839	* This captures some data that requires taking some locks, so it cannot be done in signaling path.
2840	*/
2841	void
2842	xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2843	{
2844	int i;
2845
2846	if (!snapshot \|\| !snapshot->lrc)
2847	return;
2848
2849	for (i = `0`; i < snapshot->width; ++i)
2850	xe_lrc_snapshot_capture_delayed(snapshot: snapshot->lrc[i]);
2851	}
2852
2853	/**
2854	* xe_guc_exec_queue_snapshot_print - Print out a given GuC Engine snapshot.
2855	* @snapshot: GuC Submit Engine snapshot object.
2856	* @p: drm_printer where it will be printed out.
2857	*
2858	* This function prints out a given GuC Submit Engine snapshot object.
2859	*/
2860	void
2861	xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snapshot,
2862	struct drm_printer *p)
2863	{
2864	int i;
2865
2866	if (!snapshot)
2867	return;
2868
2869	drm_printf(p, f: "GuC ID: %d\n", snapshot->guc.id);
2870	drm_printf(p, f: "\tName: %s\n", snapshot->name);
2871	drm_printf(p, f: "\tClass: %d\n", snapshot->class);
2872	drm_printf(p, f: "\tLogical mask: 0x%x\n", snapshot->logical_mask);
2873	drm_printf(p, f: "\tWidth: %d\n", snapshot->width);
2874	drm_printf(p, f: "\tRef: %d\n", snapshot->refcount);
2875	drm_printf(p, f: "\tTimeout: %ld (ms)\n", snapshot->sched_timeout);
2876	drm_printf(p, f: "\tTimeslice: %u (us)\n",
2877	snapshot->sched_props.timeslice_us);
2878	drm_printf(p, f: "\tPreempt timeout: %u (us)\n",
2879	snapshot->sched_props.preempt_timeout_us);
2880
2881	for (i = `0`; snapshot->lrc && i < snapshot->width; ++i)
2882	xe_lrc_snapshot_print(snapshot: snapshot->lrc[i], p);
2883
2884	drm_printf(p, f: "\tSchedule State: 0x%x\n", snapshot->schedule_state);
2885	drm_printf(p, f: "\tFlags: 0x%lx\n", snapshot->exec_queue_flags);
2886
2887	if (snapshot->parallel_execution)
2888	guc_exec_queue_wq_snapshot_print(snapshot, p);
2889
2890	for (i = `0`; snapshot->pending_list && i < snapshot->pending_list_size;
2891	i++)
2892	drm_printf(p, f: "\tJob: seqno=%d, fence=%d, finished=%d\n",
2893	snapshot->pending_list[i].seqno,
2894	snapshot->pending_list[i].fence,
2895	snapshot->pending_list[i].finished);
2896	}
2897
2898	/**
2899	* xe_guc_exec_queue_snapshot_free - Free all allocated objects for a given
2900	* snapshot.
2901	* @snapshot: GuC Submit Engine snapshot object.
2902	*
2903	* This function free all the memory that needed to be allocated at capture
2904	* time.
2905	*/
2906	void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot)
2907	{
2908	int i;
2909
2910	if (!snapshot)
2911	return;
2912
2913	if (snapshot->lrc) {
2914	for (i = `0`; i < snapshot->width; i++)
2915	xe_lrc_snapshot_free(snapshot: snapshot->lrc[i]);
2916	kfree(objp: snapshot->lrc);
2917	}
2918	kfree(objp: snapshot->pending_list);
2919	kfree(objp: snapshot);
2920	}
2921
2922	static void guc_exec_queue_print(struct xe_exec_queue q, struct* drm_printer *p)
2923	{
2924	struct xe_guc_submit_exec_queue_snapshot *snapshot;
2925
2926	snapshot = xe_guc_exec_queue_snapshot_capture(q);
2927	xe_guc_exec_queue_snapshot_print(snapshot, p);
2928	xe_guc_exec_queue_snapshot_free(snapshot);
2929	}
2930
2931	/**
2932	* xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
2933	* @q: Execution queue
2934	* @ctx_type: Type of the context
2935	*
2936	* This function registers the execution queue with the guc. Special context
2937	* types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
2938	* are only applicable for IGPU and in the VF.
2939	* Submits the execution queue to GUC after registering it.
2940	*
2941	* Returns - None.
2942	*/
2943	void xe_guc_register_vf_exec_queue(struct xe_exec_queue q, int* ctx_type)
2944	{
2945	struct xe_guc *guc = exec_queue_to_guc(q);
2946	struct xe_device *xe = guc_to_xe(guc);
2947	struct xe_gt *gt = guc_to_gt(guc);
2948
2949	xe_gt_assert(gt, IS_SRIOV_VF(xe));
2950	xe_gt_assert(gt, !IS_DGFX(xe));
2951	xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE \|\|
2952	ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
2953	xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(`1`, `23`, `0`));
2954
2955	register_exec_queue(q, ctx_type);
2956	enable_scheduling(q);
2957	}
2958
2959	/**
2960	* xe_guc_submit_print - GuC Submit Print.
2961	* @guc: GuC.
2962	* @p: drm_printer where it will be printed out.
2963	*
2964	* This function capture and prints snapshots of all GuC Engines.
2965	*/
2966	void xe_guc_submit_print(struct xe_guc guc, struct* drm_printer *p)
2967	{
2968	struct xe_exec_queue *q;
2969	unsigned long index;
2970
2971	if (!xe_device_uc_enabled(xe: guc_to_xe(guc)))
2972	return;
2973
2974	mutex_lock(&guc->submission_state.lock);
2975	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
2976	guc_exec_queue_print(q, p);
2977	mutex_unlock(lock: &guc->submission_state.lock);
2978	}
2979
2980	/**
2981	* xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
2982	* exec queues registered to given GuC.
2983	* @guc: the &xe_guc struct instance
2984	* @scratch: scratch buffer to be used as temporary storage
2985	*
2986	* Returns: zero on success, negative error code on failure.
2987	*/
2988	int xe_guc_contexts_hwsp_rebase(struct xe_guc guc, void* *scratch)
2989	{
2990	struct xe_exec_queue *q;
2991	unsigned long index;
2992	int err = `0`;
2993
2994	mutex_lock(&guc->submission_state.lock);
2995	xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
2996	/ Prevent redundant attempts to stop parallel queues /
2997	if (q->guc->id != index)
2998	continue;
2999
3000	err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
3001	if (err)
3002	break;
3003	}
3004	mutex_unlock(lock: &guc->submission_state.lock);
3005
3006	return err;
3007	}
3008

source code of linux/drivers/gpu/drm/xe/xe_guc_submit.c